mx1.adamsgaard.dk/1/src/sphere/file/src/helper_math.h.gph

  URI:

       thelper_math.h - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       thelper_math.h (38472B)
       ---
            1 /* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
            2  *
            3  * Redistribution and use in source and binary forms, with or without
            4  * modification, are permitted provided that the following conditions
            5  * are met:
            6  *  * Redistributions of source code must retain the above copyright
            7  *    notice, this list of conditions and the following disclaimer.
            8  *  * Redistributions in binary form must reproduce the above copyright
            9  *    notice, this list of conditions and the following disclaimer in the
           10  *    documentation and/or other materials provided with the distribution.
           11  *  * Neither the name of NVIDIA CORPORATION nor the names of its
           12  *    contributors may be used to endorse or promote products derived
           13  *    from this software without specific prior written permission.
           14  *
           15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
           16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
           17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
           18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
           19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
           20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
           21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
           22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
           23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
           24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
           25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
           26  */
           27 
           28 /*
           29  *  This file implements common mathematical operations on vector types
           30  *  (float3, float4 etc.) since these are not provided as standard by CUDA.
           31  *
           32  *  The syntax is modeled on the Cg standard library.
           33  *
           34  *  This is part of the Helper library includes
           35  *
           36  *    Thanks to Linh Hah for additions and fixes.
           37  */
           38 
           39 #ifndef HELPER_MATH_H
           40 #define HELPER_MATH_H
           41 
           42 #include "cuda_runtime.h"
           43 
           44 typedef unsigned int uint;
           45 typedef unsigned short ushort;
           46 
           47 #ifndef EXIT_WAIVED
           48 #define EXIT_WAIVED 2
           49 #endif
           50 
           51 #ifndef __CUDACC__
           52 #include <math.h>
           53 
           54 ////////////////////////////////////////////////////////////////////////////////
           55 // host implementations of CUDA functions
           56 ////////////////////////////////////////////////////////////////////////////////
           57 
           58 inline float fminf(float a, float b)
           59 {
           60     return a < b ? a : b;
           61 }
           62 
           63 inline float fmaxf(float a, float b)
           64 {
           65     return a > b ? a : b;
           66 }
           67 
           68 inline int max(int a, int b)
           69 {
           70     return a > b ? a : b;
           71 }
           72 
           73 inline int min(int a, int b)
           74 {
           75     return a < b ? a : b;
           76 }
           77 
           78 inline float rsqrtf(float x)
           79 {
           80     return 1.0f / sqrtf(x);
           81 }
           82 #endif
           83 
           84 ////////////////////////////////////////////////////////////////////////////////
           85 // constructors
           86 ////////////////////////////////////////////////////////////////////////////////
           87 
           88 inline __host__ __device__ float2 make_float2(float s)
           89 {
           90     return make_float2(s, s);
           91 }
           92 inline __host__ __device__ float2 make_float2(float3 a)
           93 {
           94     return make_float2(a.x, a.y);
           95 }
           96 inline __host__ __device__ float2 make_float2(int2 a)
           97 {
           98     return make_float2(float(a.x), float(a.y));
           99 }
          100 inline __host__ __device__ float2 make_float2(uint2 a)
          101 {
          102     return make_float2(float(a.x), float(a.y));
          103 }
          104 
          105 inline __host__ __device__ int2 make_int2(int s)
          106 {
          107     return make_int2(s, s);
          108 }
          109 inline __host__ __device__ int2 make_int2(int3 a)
          110 {
          111     return make_int2(a.x, a.y);
          112 }
          113 inline __host__ __device__ int2 make_int2(uint2 a)
          114 {
          115     return make_int2(int(a.x), int(a.y));
          116 }
          117 inline __host__ __device__ int2 make_int2(float2 a)
          118 {
          119     return make_int2(int(a.x), int(a.y));
          120 }
          121 
          122 inline __host__ __device__ uint2 make_uint2(uint s)
          123 {
          124     return make_uint2(s, s);
          125 }
          126 inline __host__ __device__ uint2 make_uint2(uint3 a)
          127 {
          128     return make_uint2(a.x, a.y);
          129 }
          130 inline __host__ __device__ uint2 make_uint2(int2 a)
          131 {
          132     return make_uint2(uint(a.x), uint(a.y));
          133 }
          134 
          135 inline __host__ __device__ float3 make_float3(float s)
          136 {
          137     return make_float3(s, s, s);
          138 }
          139 inline __host__ __device__ float3 make_float3(float2 a)
          140 {
          141     return make_float3(a.x, a.y, 0.0f);
          142 }
          143 inline __host__ __device__ float3 make_float3(float2 a, float s)
          144 {
          145     return make_float3(a.x, a.y, s);
          146 }
          147 inline __host__ __device__ float3 make_float3(float4 a)
          148 {
          149     return make_float3(a.x, a.y, a.z);
          150 }
          151 inline __host__ __device__ float3 make_float3(int3 a)
          152 {
          153     return make_float3(float(a.x), float(a.y), float(a.z));
          154 }
          155 inline __host__ __device__ float3 make_float3(uint3 a)
          156 {
          157     return make_float3(float(a.x), float(a.y), float(a.z));
          158 }
          159 
          160 inline __host__ __device__ int3 make_int3(int s)
          161 {
          162     return make_int3(s, s, s);
          163 }
          164 inline __host__ __device__ int3 make_int3(int2 a)
          165 {
          166     return make_int3(a.x, a.y, 0);
          167 }
          168 inline __host__ __device__ int3 make_int3(int2 a, int s)
          169 {
          170     return make_int3(a.x, a.y, s);
          171 }
          172 inline __host__ __device__ int3 make_int3(uint3 a)
          173 {
          174     return make_int3(int(a.x), int(a.y), int(a.z));
          175 }
          176 inline __host__ __device__ int3 make_int3(float3 a)
          177 {
          178     return make_int3(int(a.x), int(a.y), int(a.z));
          179 }
          180 
          181 inline __host__ __device__ uint3 make_uint3(uint s)
          182 {
          183     return make_uint3(s, s, s);
          184 }
          185 inline __host__ __device__ uint3 make_uint3(uint2 a)
          186 {
          187     return make_uint3(a.x, a.y, 0);
          188 }
          189 inline __host__ __device__ uint3 make_uint3(uint2 a, uint s)
          190 {
          191     return make_uint3(a.x, a.y, s);
          192 }
          193 inline __host__ __device__ uint3 make_uint3(uint4 a)
          194 {
          195     return make_uint3(a.x, a.y, a.z);
          196 }
          197 inline __host__ __device__ uint3 make_uint3(int3 a)
          198 {
          199     return make_uint3(uint(a.x), uint(a.y), uint(a.z));
          200 }
          201 
          202 inline __host__ __device__ float4 make_float4(float s)
          203 {
          204     return make_float4(s, s, s, s);
          205 }
          206 inline __host__ __device__ float4 make_float4(float3 a)
          207 {
          208     return make_float4(a.x, a.y, a.z, 0.0f);
          209 }
          210 inline __host__ __device__ float4 make_float4(float3 a, float w)
          211 {
          212     return make_float4(a.x, a.y, a.z, w);
          213 }
          214 inline __host__ __device__ float4 make_float4(int4 a)
          215 {
          216     return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
          217 }
          218 inline __host__ __device__ float4 make_float4(uint4 a)
          219 {
          220     return make_float4(float(a.x), float(a.y), float(a.z), float(a.w));
          221 }
          222 
          223 inline __host__ __device__ int4 make_int4(int s)
          224 {
          225     return make_int4(s, s, s, s);
          226 }
          227 inline __host__ __device__ int4 make_int4(int3 a)
          228 {
          229     return make_int4(a.x, a.y, a.z, 0);
          230 }
          231 inline __host__ __device__ int4 make_int4(int3 a, int w)
          232 {
          233     return make_int4(a.x, a.y, a.z, w);
          234 }
          235 inline __host__ __device__ int4 make_int4(uint4 a)
          236 {
          237     return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
          238 }
          239 inline __host__ __device__ int4 make_int4(float4 a)
          240 {
          241     return make_int4(int(a.x), int(a.y), int(a.z), int(a.w));
          242 }
          243 
          244 
          245 inline __host__ __device__ uint4 make_uint4(uint s)
          246 {
          247     return make_uint4(s, s, s, s);
          248 }
          249 inline __host__ __device__ uint4 make_uint4(uint3 a)
          250 {
          251     return make_uint4(a.x, a.y, a.z, 0);
          252 }
          253 inline __host__ __device__ uint4 make_uint4(uint3 a, uint w)
          254 {
          255     return make_uint4(a.x, a.y, a.z, w);
          256 }
          257 inline __host__ __device__ uint4 make_uint4(int4 a)
          258 {
          259     return make_uint4(uint(a.x), uint(a.y), uint(a.z), uint(a.w));
          260 }
          261 
          262 ////////////////////////////////////////////////////////////////////////////////
          263 // negate
          264 ////////////////////////////////////////////////////////////////////////////////
          265 
          266 inline __host__ __device__ float2 operator-(float2 &a)
          267 {
          268     return make_float2(-a.x, -a.y);
          269 }
          270 inline __host__ __device__ int2 operator-(int2 &a)
          271 {
          272     return make_int2(-a.x, -a.y);
          273 }
          274 inline __host__ __device__ float3 operator-(float3 &a)
          275 {
          276     return make_float3(-a.x, -a.y, -a.z);
          277 }
          278 inline __host__ __device__ int3 operator-(int3 &a)
          279 {
          280     return make_int3(-a.x, -a.y, -a.z);
          281 }
          282 inline __host__ __device__ float4 operator-(float4 &a)
          283 {
          284     return make_float4(-a.x, -a.y, -a.z, -a.w);
          285 }
          286 inline __host__ __device__ int4 operator-(int4 &a)
          287 {
          288     return make_int4(-a.x, -a.y, -a.z, -a.w);
          289 }
          290 
          291 ////////////////////////////////////////////////////////////////////////////////
          292 // addition
          293 ////////////////////////////////////////////////////////////////////////////////
          294 
          295 inline __host__ __device__ float2 operator+(float2 a, float2 b)
          296 {
          297     return make_float2(a.x + b.x, a.y + b.y);
          298 }
          299 inline __host__ __device__ void operator+=(float2 &a, float2 b)
          300 {
          301     a.x += b.x;
          302     a.y += b.y;
          303 }
          304 inline __host__ __device__ float2 operator+(float2 a, float b)
          305 {
          306     return make_float2(a.x + b, a.y + b);
          307 }
          308 inline __host__ __device__ float2 operator+(float b, float2 a)
          309 {
          310     return make_float2(a.x + b, a.y + b);
          311 }
          312 inline __host__ __device__ void operator+=(float2 &a, float b)
          313 {
          314     a.x += b;
          315     a.y += b;
          316 }
          317 
          318 inline __host__ __device__ int2 operator+(int2 a, int2 b)
          319 {
          320     return make_int2(a.x + b.x, a.y + b.y);
          321 }
          322 inline __host__ __device__ void operator+=(int2 &a, int2 b)
          323 {
          324     a.x += b.x;
          325     a.y += b.y;
          326 }
          327 inline __host__ __device__ int2 operator+(int2 a, int b)
          328 {
          329     return make_int2(a.x + b, a.y + b);
          330 }
          331 inline __host__ __device__ int2 operator+(int b, int2 a)
          332 {
          333     return make_int2(a.x + b, a.y + b);
          334 }
          335 inline __host__ __device__ void operator+=(int2 &a, int b)
          336 {
          337     a.x += b;
          338     a.y += b;
          339 }
          340 
          341 inline __host__ __device__ uint2 operator+(uint2 a, uint2 b)
          342 {
          343     return make_uint2(a.x + b.x, a.y + b.y);
          344 }
          345 inline __host__ __device__ void operator+=(uint2 &a, uint2 b)
          346 {
          347     a.x += b.x;
          348     a.y += b.y;
          349 }
          350 inline __host__ __device__ uint2 operator+(uint2 a, uint b)
          351 {
          352     return make_uint2(a.x + b, a.y + b);
          353 }
          354 inline __host__ __device__ uint2 operator+(uint b, uint2 a)
          355 {
          356     return make_uint2(a.x + b, a.y + b);
          357 }
          358 inline __host__ __device__ void operator+=(uint2 &a, uint b)
          359 {
          360     a.x += b;
          361     a.y += b;
          362 }
          363 
          364 
          365 inline __host__ __device__ float3 operator+(float3 a, float3 b)
          366 {
          367     return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
          368 }
          369 inline __host__ __device__ void operator+=(float3 &a, float3 b)
          370 {
          371     a.x += b.x;
          372     a.y += b.y;
          373     a.z += b.z;
          374 }
          375 inline __host__ __device__ float3 operator+(float3 a, float b)
          376 {
          377     return make_float3(a.x + b, a.y + b, a.z + b);
          378 }
          379 inline __host__ __device__ void operator+=(float3 &a, float b)
          380 {
          381     a.x += b;
          382     a.y += b;
          383     a.z += b;
          384 }
          385 
          386 inline __host__ __device__ int3 operator+(int3 a, int3 b)
          387 {
          388     return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
          389 }
          390 inline __host__ __device__ void operator+=(int3 &a, int3 b)
          391 {
          392     a.x += b.x;
          393     a.y += b.y;
          394     a.z += b.z;
          395 }
          396 inline __host__ __device__ int3 operator+(int3 a, int b)
          397 {
          398     return make_int3(a.x + b, a.y + b, a.z + b);
          399 }
          400 inline __host__ __device__ void operator+=(int3 &a, int b)
          401 {
          402     a.x += b;
          403     a.y += b;
          404     a.z += b;
          405 }
          406 
          407 inline __host__ __device__ uint3 operator+(uint3 a, uint3 b)
          408 {
          409     return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z);
          410 }
          411 inline __host__ __device__ void operator+=(uint3 &a, uint3 b)
          412 {
          413     a.x += b.x;
          414     a.y += b.y;
          415     a.z += b.z;
          416 }
          417 inline __host__ __device__ uint3 operator+(uint3 a, uint b)
          418 {
          419     return make_uint3(a.x + b, a.y + b, a.z + b);
          420 }
          421 inline __host__ __device__ void operator+=(uint3 &a, uint b)
          422 {
          423     a.x += b;
          424     a.y += b;
          425     a.z += b;
          426 }
          427 
          428 inline __host__ __device__ int3 operator+(int b, int3 a)
          429 {
          430     return make_int3(a.x + b, a.y + b, a.z + b);
          431 }
          432 inline __host__ __device__ uint3 operator+(uint b, uint3 a)
          433 {
          434     return make_uint3(a.x + b, a.y + b, a.z + b);
          435 }
          436 inline __host__ __device__ float3 operator+(float b, float3 a)
          437 {
          438     return make_float3(a.x + b, a.y + b, a.z + b);
          439 }
          440 
          441 inline __host__ __device__ float4 operator+(float4 a, float4 b)
          442 {
          443     return make_float4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
          444 }
          445 inline __host__ __device__ void operator+=(float4 &a, float4 b)
          446 {
          447     a.x += b.x;
          448     a.y += b.y;
          449     a.z += b.z;
          450     a.w += b.w;
          451 }
          452 inline __host__ __device__ float4 operator+(float4 a, float b)
          453 {
          454     return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
          455 }
          456 inline __host__ __device__ float4 operator+(float b, float4 a)
          457 {
          458     return make_float4(a.x + b, a.y + b, a.z + b, a.w + b);
          459 }
          460 inline __host__ __device__ void operator+=(float4 &a, float b)
          461 {
          462     a.x += b;
          463     a.y += b;
          464     a.z += b;
          465     a.w += b;
          466 }
          467 
          468 inline __host__ __device__ int4 operator+(int4 a, int4 b)
          469 {
          470     return make_int4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
          471 }
          472 inline __host__ __device__ void operator+=(int4 &a, int4 b)
          473 {
          474     a.x += b.x;
          475     a.y += b.y;
          476     a.z += b.z;
          477     a.w += b.w;
          478 }
          479 inline __host__ __device__ int4 operator+(int4 a, int b)
          480 {
          481     return make_int4(a.x + b, a.y + b, a.z + b,  a.w + b);
          482 }
          483 inline __host__ __device__ int4 operator+(int b, int4 a)
          484 {
          485     return make_int4(a.x + b, a.y + b, a.z + b,  a.w + b);
          486 }
          487 inline __host__ __device__ void operator+=(int4 &a, int b)
          488 {
          489     a.x += b;
          490     a.y += b;
          491     a.z += b;
          492     a.w += b;
          493 }
          494 
          495 inline __host__ __device__ uint4 operator+(uint4 a, uint4 b)
          496 {
          497     return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z,  a.w + b.w);
          498 }
          499 inline __host__ __device__ void operator+=(uint4 &a, uint4 b)
          500 {
          501     a.x += b.x;
          502     a.y += b.y;
          503     a.z += b.z;
          504     a.w += b.w;
          505 }
          506 inline __host__ __device__ uint4 operator+(uint4 a, uint b)
          507 {
          508     return make_uint4(a.x + b, a.y + b, a.z + b,  a.w + b);
          509 }
          510 inline __host__ __device__ uint4 operator+(uint b, uint4 a)
          511 {
          512     return make_uint4(a.x + b, a.y + b, a.z + b,  a.w + b);
          513 }
          514 inline __host__ __device__ void operator+=(uint4 &a, uint b)
          515 {
          516     a.x += b;
          517     a.y += b;
          518     a.z += b;
          519     a.w += b;
          520 }
          521 
          522 ////////////////////////////////////////////////////////////////////////////////
          523 // subtract
          524 ////////////////////////////////////////////////////////////////////////////////
          525 
          526 inline __host__ __device__ float2 operator-(float2 a, float2 b)
          527 {
          528     return make_float2(a.x - b.x, a.y - b.y);
          529 }
          530 inline __host__ __device__ void operator-=(float2 &a, float2 b)
          531 {
          532     a.x -= b.x;
          533     a.y -= b.y;
          534 }
          535 inline __host__ __device__ float2 operator-(float2 a, float b)
          536 {
          537     return make_float2(a.x - b, a.y - b);
          538 }
          539 inline __host__ __device__ float2 operator-(float b, float2 a)
          540 {
          541     return make_float2(b - a.x, b - a.y);
          542 }
          543 inline __host__ __device__ void operator-=(float2 &a, float b)
          544 {
          545     a.x -= b;
          546     a.y -= b;
          547 }
          548 
          549 inline __host__ __device__ int2 operator-(int2 a, int2 b)
          550 {
          551     return make_int2(a.x - b.x, a.y - b.y);
          552 }
          553 inline __host__ __device__ void operator-=(int2 &a, int2 b)
          554 {
          555     a.x -= b.x;
          556     a.y -= b.y;
          557 }
          558 inline __host__ __device__ int2 operator-(int2 a, int b)
          559 {
          560     return make_int2(a.x - b, a.y - b);
          561 }
          562 inline __host__ __device__ int2 operator-(int b, int2 a)
          563 {
          564     return make_int2(b - a.x, b - a.y);
          565 }
          566 inline __host__ __device__ void operator-=(int2 &a, int b)
          567 {
          568     a.x -= b;
          569     a.y -= b;
          570 }
          571 
          572 inline __host__ __device__ uint2 operator-(uint2 a, uint2 b)
          573 {
          574     return make_uint2(a.x - b.x, a.y - b.y);
          575 }
          576 inline __host__ __device__ void operator-=(uint2 &a, uint2 b)
          577 {
          578     a.x -= b.x;
          579     a.y -= b.y;
          580 }
          581 inline __host__ __device__ uint2 operator-(uint2 a, uint b)
          582 {
          583     return make_uint2(a.x - b, a.y - b);
          584 }
          585 inline __host__ __device__ uint2 operator-(uint b, uint2 a)
          586 {
          587     return make_uint2(b - a.x, b - a.y);
          588 }
          589 inline __host__ __device__ void operator-=(uint2 &a, uint b)
          590 {
          591     a.x -= b;
          592     a.y -= b;
          593 }
          594 
          595 inline __host__ __device__ float3 operator-(float3 a, float3 b)
          596 {
          597     return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
          598 }
          599 inline __host__ __device__ void operator-=(float3 &a, float3 b)
          600 {
          601     a.x -= b.x;
          602     a.y -= b.y;
          603     a.z -= b.z;
          604 }
          605 inline __host__ __device__ float3 operator-(float3 a, float b)
          606 {
          607     return make_float3(a.x - b, a.y - b, a.z - b);
          608 }
          609 inline __host__ __device__ float3 operator-(float b, float3 a)
          610 {
          611     return make_float3(b - a.x, b - a.y, b - a.z);
          612 }
          613 inline __host__ __device__ void operator-=(float3 &a, float b)
          614 {
          615     a.x -= b;
          616     a.y -= b;
          617     a.z -= b;
          618 }
          619 
          620 inline __host__ __device__ int3 operator-(int3 a, int3 b)
          621 {
          622     return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
          623 }
          624 inline __host__ __device__ void operator-=(int3 &a, int3 b)
          625 {
          626     a.x -= b.x;
          627     a.y -= b.y;
          628     a.z -= b.z;
          629 }
          630 inline __host__ __device__ int3 operator-(int3 a, int b)
          631 {
          632     return make_int3(a.x - b, a.y - b, a.z - b);
          633 }
          634 inline __host__ __device__ int3 operator-(int b, int3 a)
          635 {
          636     return make_int3(b - a.x, b - a.y, b - a.z);
          637 }
          638 inline __host__ __device__ void operator-=(int3 &a, int b)
          639 {
          640     a.x -= b;
          641     a.y -= b;
          642     a.z -= b;
          643 }
          644 
          645 inline __host__ __device__ uint3 operator-(uint3 a, uint3 b)
          646 {
          647     return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z);
          648 }
          649 inline __host__ __device__ void operator-=(uint3 &a, uint3 b)
          650 {
          651     a.x -= b.x;
          652     a.y -= b.y;
          653     a.z -= b.z;
          654 }
          655 inline __host__ __device__ uint3 operator-(uint3 a, uint b)
          656 {
          657     return make_uint3(a.x - b, a.y - b, a.z - b);
          658 }
          659 inline __host__ __device__ uint3 operator-(uint b, uint3 a)
          660 {
          661     return make_uint3(b - a.x, b - a.y, b - a.z);
          662 }
          663 inline __host__ __device__ void operator-=(uint3 &a, uint b)
          664 {
          665     a.x -= b;
          666     a.y -= b;
          667     a.z -= b;
          668 }
          669 
          670 inline __host__ __device__ float4 operator-(float4 a, float4 b)
          671 {
          672     return make_float4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
          673 }
          674 inline __host__ __device__ void operator-=(float4 &a, float4 b)
          675 {
          676     a.x -= b.x;
          677     a.y -= b.y;
          678     a.z -= b.z;
          679     a.w -= b.w;
          680 }
          681 inline __host__ __device__ float4 operator-(float4 a, float b)
          682 {
          683     return make_float4(a.x - b, a.y - b, a.z - b,  a.w - b);
          684 }
          685 inline __host__ __device__ void operator-=(float4 &a, float b)
          686 {
          687     a.x -= b;
          688     a.y -= b;
          689     a.z -= b;
          690     a.w -= b;
          691 }
          692 
          693 inline __host__ __device__ int4 operator-(int4 a, int4 b)
          694 {
          695     return make_int4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
          696 }
          697 inline __host__ __device__ void operator-=(int4 &a, int4 b)
          698 {
          699     a.x -= b.x;
          700     a.y -= b.y;
          701     a.z -= b.z;
          702     a.w -= b.w;
          703 }
          704 inline __host__ __device__ int4 operator-(int4 a, int b)
          705 {
          706     return make_int4(a.x - b, a.y - b, a.z - b,  a.w - b);
          707 }
          708 inline __host__ __device__ int4 operator-(int b, int4 a)
          709 {
          710     return make_int4(b - a.x, b - a.y, b - a.z, b - a.w);
          711 }
          712 inline __host__ __device__ void operator-=(int4 &a, int b)
          713 {
          714     a.x -= b;
          715     a.y -= b;
          716     a.z -= b;
          717     a.w -= b;
          718 }
          719 
          720 inline __host__ __device__ uint4 operator-(uint4 a, uint4 b)
          721 {
          722     return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z,  a.w - b.w);
          723 }
          724 inline __host__ __device__ void operator-=(uint4 &a, uint4 b)
          725 {
          726     a.x -= b.x;
          727     a.y -= b.y;
          728     a.z -= b.z;
          729     a.w -= b.w;
          730 }
          731 inline __host__ __device__ uint4 operator-(uint4 a, uint b)
          732 {
          733     return make_uint4(a.x - b, a.y - b, a.z - b,  a.w - b);
          734 }
          735 inline __host__ __device__ uint4 operator-(uint b, uint4 a)
          736 {
          737     return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w);
          738 }
          739 inline __host__ __device__ void operator-=(uint4 &a, uint b)
          740 {
          741     a.x -= b;
          742     a.y -= b;
          743     a.z -= b;
          744     a.w -= b;
          745 }
          746 
          747 ////////////////////////////////////////////////////////////////////////////////
          748 // multiply
          749 ////////////////////////////////////////////////////////////////////////////////
          750 
          751 inline __host__ __device__ float2 operator*(float2 a, float2 b)
          752 {
          753     return make_float2(a.x * b.x, a.y * b.y);
          754 }
          755 inline __host__ __device__ void operator*=(float2 &a, float2 b)
          756 {
          757     a.x *= b.x;
          758     a.y *= b.y;
          759 }
          760 inline __host__ __device__ float2 operator*(float2 a, float b)
          761 {
          762     return make_float2(a.x * b, a.y * b);
          763 }
          764 inline __host__ __device__ float2 operator*(float b, float2 a)
          765 {
          766     return make_float2(b * a.x, b * a.y);
          767 }
          768 inline __host__ __device__ void operator*=(float2 &a, float b)
          769 {
          770     a.x *= b;
          771     a.y *= b;
          772 }
          773 
          774 inline __host__ __device__ int2 operator*(int2 a, int2 b)
          775 {
          776     return make_int2(a.x * b.x, a.y * b.y);
          777 }
          778 inline __host__ __device__ void operator*=(int2 &a, int2 b)
          779 {
          780     a.x *= b.x;
          781     a.y *= b.y;
          782 }
          783 inline __host__ __device__ int2 operator*(int2 a, int b)
          784 {
          785     return make_int2(a.x * b, a.y * b);
          786 }
          787 inline __host__ __device__ int2 operator*(int b, int2 a)
          788 {
          789     return make_int2(b * a.x, b * a.y);
          790 }
          791 inline __host__ __device__ void operator*=(int2 &a, int b)
          792 {
          793     a.x *= b;
          794     a.y *= b;
          795 }
          796 
          797 inline __host__ __device__ uint2 operator*(uint2 a, uint2 b)
          798 {
          799     return make_uint2(a.x * b.x, a.y * b.y);
          800 }
          801 inline __host__ __device__ void operator*=(uint2 &a, uint2 b)
          802 {
          803     a.x *= b.x;
          804     a.y *= b.y;
          805 }
          806 inline __host__ __device__ uint2 operator*(uint2 a, uint b)
          807 {
          808     return make_uint2(a.x * b, a.y * b);
          809 }
          810 inline __host__ __device__ uint2 operator*(uint b, uint2 a)
          811 {
          812     return make_uint2(b * a.x, b * a.y);
          813 }
          814 inline __host__ __device__ void operator*=(uint2 &a, uint b)
          815 {
          816     a.x *= b;
          817     a.y *= b;
          818 }
          819 
          820 inline __host__ __device__ float3 operator*(float3 a, float3 b)
          821 {
          822     return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
          823 }
          824 inline __host__ __device__ void operator*=(float3 &a, float3 b)
          825 {
          826     a.x *= b.x;
          827     a.y *= b.y;
          828     a.z *= b.z;
          829 }
          830 inline __host__ __device__ float3 operator*(float3 a, float b)
          831 {
          832     return make_float3(a.x * b, a.y * b, a.z * b);
          833 }
          834 inline __host__ __device__ float3 operator*(float b, float3 a)
          835 {
          836     return make_float3(b * a.x, b * a.y, b * a.z);
          837 }
          838 inline __host__ __device__ void operator*=(float3 &a, float b)
          839 {
          840     a.x *= b;
          841     a.y *= b;
          842     a.z *= b;
          843 }
          844 
          845 inline __host__ __device__ int3 operator*(int3 a, int3 b)
          846 {
          847     return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
          848 }
          849 inline __host__ __device__ void operator*=(int3 &a, int3 b)
          850 {
          851     a.x *= b.x;
          852     a.y *= b.y;
          853     a.z *= b.z;
          854 }
          855 inline __host__ __device__ int3 operator*(int3 a, int b)
          856 {
          857     return make_int3(a.x * b, a.y * b, a.z * b);
          858 }
          859 inline __host__ __device__ int3 operator*(int b, int3 a)
          860 {
          861     return make_int3(b * a.x, b * a.y, b * a.z);
          862 }
          863 inline __host__ __device__ void operator*=(int3 &a, int b)
          864 {
          865     a.x *= b;
          866     a.y *= b;
          867     a.z *= b;
          868 }
          869 
          870 inline __host__ __device__ uint3 operator*(uint3 a, uint3 b)
          871 {
          872     return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z);
          873 }
          874 inline __host__ __device__ void operator*=(uint3 &a, uint3 b)
          875 {
          876     a.x *= b.x;
          877     a.y *= b.y;
          878     a.z *= b.z;
          879 }
          880 inline __host__ __device__ uint3 operator*(uint3 a, uint b)
          881 {
          882     return make_uint3(a.x * b, a.y * b, a.z * b);
          883 }
          884 inline __host__ __device__ uint3 operator*(uint b, uint3 a)
          885 {
          886     return make_uint3(b * a.x, b * a.y, b * a.z);
          887 }
          888 inline __host__ __device__ void operator*=(uint3 &a, uint b)
          889 {
          890     a.x *= b;
          891     a.y *= b;
          892     a.z *= b;
          893 }
          894 
          895 inline __host__ __device__ float4 operator*(float4 a, float4 b)
          896 {
          897     return make_float4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
          898 }
          899 inline __host__ __device__ void operator*=(float4 &a, float4 b)
          900 {
          901     a.x *= b.x;
          902     a.y *= b.y;
          903     a.z *= b.z;
          904     a.w *= b.w;
          905 }
          906 inline __host__ __device__ float4 operator*(float4 a, float b)
          907 {
          908     return make_float4(a.x * b, a.y * b, a.z * b,  a.w * b);
          909 }
          910 inline __host__ __device__ float4 operator*(float b, float4 a)
          911 {
          912     return make_float4(b * a.x, b * a.y, b * a.z, b * a.w);
          913 }
          914 inline __host__ __device__ void operator*=(float4 &a, float b)
          915 {
          916     a.x *= b;
          917     a.y *= b;
          918     a.z *= b;
          919     a.w *= b;
          920 }
          921 
          922 inline __host__ __device__ int4 operator*(int4 a, int4 b)
          923 {
          924     return make_int4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
          925 }
          926 inline __host__ __device__ void operator*=(int4 &a, int4 b)
          927 {
          928     a.x *= b.x;
          929     a.y *= b.y;
          930     a.z *= b.z;
          931     a.w *= b.w;
          932 }
          933 inline __host__ __device__ int4 operator*(int4 a, int b)
          934 {
          935     return make_int4(a.x * b, a.y * b, a.z * b,  a.w * b);
          936 }
          937 inline __host__ __device__ int4 operator*(int b, int4 a)
          938 {
          939     return make_int4(b * a.x, b * a.y, b * a.z, b * a.w);
          940 }
          941 inline __host__ __device__ void operator*=(int4 &a, int b)
          942 {
          943     a.x *= b;
          944     a.y *= b;
          945     a.z *= b;
          946     a.w *= b;
          947 }
          948 
          949 inline __host__ __device__ uint4 operator*(uint4 a, uint4 b)
          950 {
          951     return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z,  a.w * b.w);
          952 }
          953 inline __host__ __device__ void operator*=(uint4 &a, uint4 b)
          954 {
          955     a.x *= b.x;
          956     a.y *= b.y;
          957     a.z *= b.z;
          958     a.w *= b.w;
          959 }
          960 inline __host__ __device__ uint4 operator*(uint4 a, uint b)
          961 {
          962     return make_uint4(a.x * b, a.y * b, a.z * b,  a.w * b);
          963 }
          964 inline __host__ __device__ uint4 operator*(uint b, uint4 a)
          965 {
          966     return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w);
          967 }
          968 inline __host__ __device__ void operator*=(uint4 &a, uint b)
          969 {
          970     a.x *= b;
          971     a.y *= b;
          972     a.z *= b;
          973     a.w *= b;
          974 }
          975 
          976 ////////////////////////////////////////////////////////////////////////////////
          977 // divide
          978 ////////////////////////////////////////////////////////////////////////////////
          979 
          980 inline __host__ __device__ float2 operator/(float2 a, float2 b)
          981 {
          982     return make_float2(a.x / b.x, a.y / b.y);
          983 }
          984 inline __host__ __device__ void operator/=(float2 &a, float2 b)
          985 {
          986     a.x /= b.x;
          987     a.y /= b.y;
          988 }
          989 inline __host__ __device__ float2 operator/(float2 a, float b)
          990 {
          991     return make_float2(a.x / b, a.y / b);
          992 }
          993 inline __host__ __device__ void operator/=(float2 &a, float b)
          994 {
          995     a.x /= b;
          996     a.y /= b;
          997 }
          998 inline __host__ __device__ float2 operator/(float b, float2 a)
          999 {
         1000     return make_float2(b / a.x, b / a.y);
         1001 }
         1002 
         1003 inline __host__ __device__ float3 operator/(float3 a, float3 b)
         1004 {
         1005     return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
         1006 }
         1007 inline __host__ __device__ void operator/=(float3 &a, float3 b)
         1008 {
         1009     a.x /= b.x;
         1010     a.y /= b.y;
         1011     a.z /= b.z;
         1012 }
         1013 inline __host__ __device__ float3 operator/(float3 a, float b)
         1014 {
         1015     return make_float3(a.x / b, a.y / b, a.z / b);
         1016 }
         1017 inline __host__ __device__ void operator/=(float3 &a, float b)
         1018 {
         1019     a.x /= b;
         1020     a.y /= b;
         1021     a.z /= b;
         1022 }
         1023 inline __host__ __device__ float3 operator/(float b, float3 a)
         1024 {
         1025     return make_float3(b / a.x, b / a.y, b / a.z);
         1026 }
         1027 
         1028 inline __host__ __device__ float4 operator/(float4 a, float4 b)
         1029 {
         1030     return make_float4(a.x / b.x, a.y / b.y, a.z / b.z,  a.w / b.w);
         1031 }
         1032 inline __host__ __device__ void operator/=(float4 &a, float4 b)
         1033 {
         1034     a.x /= b.x;
         1035     a.y /= b.y;
         1036     a.z /= b.z;
         1037     a.w /= b.w;
         1038 }
         1039 inline __host__ __device__ float4 operator/(float4 a, float b)
         1040 {
         1041     return make_float4(a.x / b, a.y / b, a.z / b,  a.w / b);
         1042 }
         1043 inline __host__ __device__ void operator/=(float4 &a, float b)
         1044 {
         1045     a.x /= b;
         1046     a.y /= b;
         1047     a.z /= b;
         1048     a.w /= b;
         1049 }
         1050 inline __host__ __device__ float4 operator/(float b, float4 a)
         1051 {
         1052     return make_float4(b / a.x, b / a.y, b / a.z, b / a.w);
         1053 }
         1054 
         1055 ////////////////////////////////////////////////////////////////////////////////
         1056 // min
         1057 ////////////////////////////////////////////////////////////////////////////////
         1058 
         1059 inline  __host__ __device__ float2 fminf(float2 a, float2 b)
         1060 {
         1061     return make_float2(fminf(a.x,b.x), fminf(a.y,b.y));
         1062 }
         1063 inline __host__ __device__ float3 fminf(float3 a, float3 b)
         1064 {
         1065     return make_float3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z));
         1066 }
         1067 inline  __host__ __device__ float4 fminf(float4 a, float4 b)
         1068 {
         1069     return make_float4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fminf(a.w,b.w));
         1070 }
         1071 
         1072 inline __host__ __device__ int2 min(int2 a, int2 b)
         1073 {
         1074     return make_int2(min(a.x,b.x), min(a.y,b.y));
         1075 }
         1076 inline __host__ __device__ int3 min(int3 a, int3 b)
         1077 {
         1078     return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
         1079 }
         1080 inline __host__ __device__ int4 min(int4 a, int4 b)
         1081 {
         1082     return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
         1083 }
         1084 
         1085 inline __host__ __device__ uint2 min(uint2 a, uint2 b)
         1086 {
         1087     return make_uint2(min(a.x,b.x), min(a.y,b.y));
         1088 }
         1089 inline __host__ __device__ uint3 min(uint3 a, uint3 b)
         1090 {
         1091     return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z));
         1092 }
         1093 inline __host__ __device__ uint4 min(uint4 a, uint4 b)
         1094 {
         1095     return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w));
         1096 }
         1097 
         1098 ////////////////////////////////////////////////////////////////////////////////
         1099 // max
         1100 ////////////////////////////////////////////////////////////////////////////////
         1101 
         1102 inline __host__ __device__ float2 fmaxf(float2 a, float2 b)
         1103 {
         1104     return make_float2(fmaxf(a.x,b.x), fmaxf(a.y,b.y));
         1105 }
         1106 inline __host__ __device__ float3 fmaxf(float3 a, float3 b)
         1107 {
         1108     return make_float3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z));
         1109 }
         1110 inline __host__ __device__ float4 fmaxf(float4 a, float4 b)
         1111 {
         1112     return make_float4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fmaxf(a.w,b.w));
         1113 }
         1114 
         1115 inline __host__ __device__ int2 max(int2 a, int2 b)
         1116 {
         1117     return make_int2(max(a.x,b.x), max(a.y,b.y));
         1118 }
         1119 inline __host__ __device__ int3 max(int3 a, int3 b)
         1120 {
         1121     return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
         1122 }
         1123 inline __host__ __device__ int4 max(int4 a, int4 b)
         1124 {
         1125     return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
         1126 }
         1127 
         1128 inline __host__ __device__ uint2 max(uint2 a, uint2 b)
         1129 {
         1130     return make_uint2(max(a.x,b.x), max(a.y,b.y));
         1131 }
         1132 inline __host__ __device__ uint3 max(uint3 a, uint3 b)
         1133 {
         1134     return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z));
         1135 }
         1136 inline __host__ __device__ uint4 max(uint4 a, uint4 b)
         1137 {
         1138     return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w));
         1139 }
         1140 
         1141 ////////////////////////////////////////////////////////////////////////////////
         1142 // lerp
         1143 // - linear interpolation between a and b, based on value t in [0, 1] range
         1144 ////////////////////////////////////////////////////////////////////////////////
         1145 
         1146 inline __device__ __host__ float lerp(float a, float b, float t)
         1147 {
         1148     return a + t*(b-a);
         1149 }
         1150 inline __device__ __host__ float2 lerp(float2 a, float2 b, float t)
         1151 {
         1152     return a + t*(b-a);
         1153 }
         1154 inline __device__ __host__ float3 lerp(float3 a, float3 b, float t)
         1155 {
         1156     return a + t*(b-a);
         1157 }
         1158 inline __device__ __host__ float4 lerp(float4 a, float4 b, float t)
         1159 {
         1160     return a + t*(b-a);
         1161 }
         1162 
         1163 ////////////////////////////////////////////////////////////////////////////////
         1164 // clamp
         1165 // - clamp the value v to be in the range [a, b]
         1166 ////////////////////////////////////////////////////////////////////////////////
         1167 
         1168 inline __device__ __host__ float clamp(float f, float a, float b)
         1169 {
         1170     return fmaxf(a, fminf(f, b));
         1171 }
         1172 inline __device__ __host__ int clamp(int f, int a, int b)
         1173 {
         1174     return max(a, min(f, b));
         1175 }
         1176 inline __device__ __host__ uint clamp(uint f, uint a, uint b)
         1177 {
         1178     return max(a, min(f, b));
         1179 }
         1180 
         1181 inline __device__ __host__ float2 clamp(float2 v, float a, float b)
         1182 {
         1183     return make_float2(clamp(v.x, a, b), clamp(v.y, a, b));
         1184 }
         1185 inline __device__ __host__ float2 clamp(float2 v, float2 a, float2 b)
         1186 {
         1187     return make_float2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
         1188 }
         1189 inline __device__ __host__ float3 clamp(float3 v, float a, float b)
         1190 {
         1191     return make_float3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
         1192 }
         1193 inline __device__ __host__ float3 clamp(float3 v, float3 a, float3 b)
         1194 {
         1195     return make_float3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
         1196 }
         1197 inline __device__ __host__ float4 clamp(float4 v, float a, float b)
         1198 {
         1199     return make_float4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
         1200 }
         1201 inline __device__ __host__ float4 clamp(float4 v, float4 a, float4 b)
         1202 {
         1203     return make_float4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
         1204 }
         1205 
         1206 inline __device__ __host__ int2 clamp(int2 v, int a, int b)
         1207 {
         1208     return make_int2(clamp(v.x, a, b), clamp(v.y, a, b));
         1209 }
         1210 inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b)
         1211 {
         1212     return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
         1213 }
         1214 inline __device__ __host__ int3 clamp(int3 v, int a, int b)
         1215 {
         1216     return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
         1217 }
         1218 inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b)
         1219 {
         1220     return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
         1221 }
         1222 inline __device__ __host__ int4 clamp(int4 v, int a, int b)
         1223 {
         1224     return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
         1225 }
         1226 inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b)
         1227 {
         1228     return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
         1229 }
         1230 
         1231 inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b)
         1232 {
         1233     return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b));
         1234 }
         1235 inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b)
         1236 {
         1237     return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y));
         1238 }
         1239 inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b)
         1240 {
         1241     return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b));
         1242 }
         1243 inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b)
         1244 {
         1245     return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z));
         1246 }
         1247 inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b)
         1248 {
         1249     return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), clamp(v.w, a, b));
         1250 }
         1251 inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b)
         1252 {
         1253     return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.z, b.z), clamp(v.w, a.w, b.w));
         1254 }
         1255 
         1256 ////////////////////////////////////////////////////////////////////////////////
         1257 // dot product
         1258 ////////////////////////////////////////////////////////////////////////////////
         1259 
         1260 inline __host__ __device__ float dot(float2 a, float2 b)
         1261 {
         1262     return a.x * b.x + a.y * b.y;
         1263 }
         1264 inline __host__ __device__ float dot(float3 a, float3 b)
         1265 {
         1266     return a.x * b.x + a.y * b.y + a.z * b.z;
         1267 }
         1268 inline __host__ __device__ float dot(float4 a, float4 b)
         1269 {
         1270     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
         1271 }
         1272 
         1273 inline __host__ __device__ int dot(int2 a, int2 b)
         1274 {
         1275     return a.x * b.x + a.y * b.y;
         1276 }
         1277 inline __host__ __device__ int dot(int3 a, int3 b)
         1278 {
         1279     return a.x * b.x + a.y * b.y + a.z * b.z;
         1280 }
         1281 inline __host__ __device__ int dot(int4 a, int4 b)
         1282 {
         1283     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
         1284 }
         1285 
         1286 inline __host__ __device__ uint dot(uint2 a, uint2 b)
         1287 {
         1288     return a.x * b.x + a.y * b.y;
         1289 }
         1290 inline __host__ __device__ uint dot(uint3 a, uint3 b)
         1291 {
         1292     return a.x * b.x + a.y * b.y + a.z * b.z;
         1293 }
         1294 inline __host__ __device__ uint dot(uint4 a, uint4 b)
         1295 {
         1296     return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
         1297 }
         1298 
         1299 ////////////////////////////////////////////////////////////////////////////////
         1300 // length
         1301 ////////////////////////////////////////////////////////////////////////////////
         1302 
         1303 inline __host__ __device__ float length(float2 v)
         1304 {
         1305     return sqrtf(dot(v, v));
         1306 }
         1307 inline __host__ __device__ float length(float3 v)
         1308 {
         1309     return sqrtf(dot(v, v));
         1310 }
         1311 inline __host__ __device__ float length(float4 v)
         1312 {
         1313     return sqrtf(dot(v, v));
         1314 }
         1315 
         1316 ////////////////////////////////////////////////////////////////////////////////
         1317 // normalize
         1318 ////////////////////////////////////////////////////////////////////////////////
         1319 
         1320 inline __host__ __device__ float2 normalize(float2 v)
         1321 {
         1322     float invLen = rsqrtf(dot(v, v));
         1323     return v * invLen;
         1324 }
         1325 inline __host__ __device__ float3 normalize(float3 v)
         1326 {
         1327     float invLen = rsqrtf(dot(v, v));
         1328     return v * invLen;
         1329 }
         1330 inline __host__ __device__ float4 normalize(float4 v)
         1331 {
         1332     float invLen = rsqrtf(dot(v, v));
         1333     return v * invLen;
         1334 }
         1335 
         1336 ////////////////////////////////////////////////////////////////////////////////
         1337 // floor
         1338 ////////////////////////////////////////////////////////////////////////////////
         1339 
         1340 inline __host__ __device__ float2 floorf(float2 v)
         1341 {
         1342     return make_float2(floorf(v.x), floorf(v.y));
         1343 }
         1344 inline __host__ __device__ float3 floorf(float3 v)
         1345 {
         1346     return make_float3(floorf(v.x), floorf(v.y), floorf(v.z));
         1347 }
         1348 inline __host__ __device__ float4 floorf(float4 v)
         1349 {
         1350     return make_float4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w));
         1351 }
         1352 
         1353 ////////////////////////////////////////////////////////////////////////////////
         1354 // frac - returns the fractional portion of a scalar or each vector component
         1355 ////////////////////////////////////////////////////////////////////////////////
         1356 
         1357 inline __host__ __device__ float fracf(float v)
         1358 {
         1359     return v - floorf(v);
         1360 }
         1361 inline __host__ __device__ float2 fracf(float2 v)
         1362 {
         1363     return make_float2(fracf(v.x), fracf(v.y));
         1364 }
         1365 inline __host__ __device__ float3 fracf(float3 v)
         1366 {
         1367     return make_float3(fracf(v.x), fracf(v.y), fracf(v.z));
         1368 }
         1369 inline __host__ __device__ float4 fracf(float4 v)
         1370 {
         1371     return make_float4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w));
         1372 }
         1373 
         1374 ////////////////////////////////////////////////////////////////////////////////
         1375 // fmod
         1376 ////////////////////////////////////////////////////////////////////////////////
         1377 
         1378 inline __host__ __device__ float2 fmodf(float2 a, float2 b)
         1379 {
         1380     return make_float2(fmodf(a.x, b.x), fmodf(a.y, b.y));
         1381 }
         1382 inline __host__ __device__ float3 fmodf(float3 a, float3 b)
         1383 {
         1384     return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
         1385 }
         1386 inline __host__ __device__ float4 fmodf(float4 a, float4 b)
         1387 {
         1388     return make_float4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmodf(a.w, b.w));
         1389 }
         1390 
         1391 ////////////////////////////////////////////////////////////////////////////////
         1392 // absolute value
         1393 ////////////////////////////////////////////////////////////////////////////////
         1394 
         1395 inline __host__ __device__ float2 fabs(float2 v)
         1396 {
         1397     return make_float2(fabs(v.x), fabs(v.y));
         1398 }
         1399 inline __host__ __device__ float3 fabs(float3 v)
         1400 {
         1401     return make_float3(fabs(v.x), fabs(v.y), fabs(v.z));
         1402 }
         1403 inline __host__ __device__ float4 fabs(float4 v)
         1404 {
         1405     return make_float4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w));
         1406 }
         1407 
         1408 inline __host__ __device__ int2 abs(int2 v)
         1409 {
         1410     return make_int2(abs(v.x), abs(v.y));
         1411 }
         1412 inline __host__ __device__ int3 abs(int3 v)
         1413 {
         1414     return make_int3(abs(v.x), abs(v.y), abs(v.z));
         1415 }
         1416 inline __host__ __device__ int4 abs(int4 v)
         1417 {
         1418     return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w));
         1419 }
         1420 
         1421 ////////////////////////////////////////////////////////////////////////////////
         1422 // reflect
         1423 // - returns reflection of incident ray I around surface normal N
         1424 // - N should be normalized, reflected vector's length is equal to length of I
         1425 ////////////////////////////////////////////////////////////////////////////////
         1426 
         1427 inline __host__ __device__ float3 reflect(float3 i, float3 n)
         1428 {
         1429     return i - 2.0f * n * dot(n,i);
         1430 }
         1431 
         1432 ////////////////////////////////////////////////////////////////////////////////
         1433 // cross product
         1434 ////////////////////////////////////////////////////////////////////////////////
         1435 
         1436 inline __host__ __device__ float3 cross(float3 a, float3 b)
         1437 {
         1438     return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);
         1439 }
         1440 
         1441 ////////////////////////////////////////////////////////////////////////////////
         1442 // smoothstep
         1443 // - returns 0 if x < a
         1444 // - returns 1 if x > b
         1445 // - otherwise returns smooth interpolation between 0 and 1 based on x
         1446 ////////////////////////////////////////////////////////////////////////////////
         1447 
         1448 inline __device__ __host__ float smoothstep(float a, float b, float x)
         1449 {
         1450     float y = clamp((x - a) / (b - a), 0.0f, 1.0f);
         1451     return (y*y*(3.0f - (2.0f*y)));
         1452 }
         1453 inline __device__ __host__ float2 smoothstep(float2 a, float2 b, float2 x)
         1454 {
         1455     float2 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
         1456     return (y*y*(make_float2(3.0f) - (make_float2(2.0f)*y)));
         1457 }
         1458 inline __device__ __host__ float3 smoothstep(float3 a, float3 b, float3 x)
         1459 {
         1460     float3 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
         1461     return (y*y*(make_float3(3.0f) - (make_float3(2.0f)*y)));
         1462 }
         1463 inline __device__ __host__ float4 smoothstep(float4 a, float4 b, float4 x)
         1464 {
         1465     float4 y = clamp((x - a) / (b - a), 0.0f, 1.0f);
         1466     return (y*y*(make_float4(3.0f) - (make_float4(2.0f)*y)));
         1467 }
         1468 
         1469 #endif