URI: 
       tsorting.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
  HTML git clone git://src.adamsgaard.dk/sphere
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       tsorting.cuh (4802B)
       ---
            1 #ifndef SORTING_CUH_
            2 #define SORTING_CUH_
            3 
            4 // Returns the cellID containing the particle, based cubic grid
            5 // See Bayraktar et al. 2009
            6 // Kernel is executed on the device, and is callable from the device only
            7 __device__ unsigned int calcCellID(Float3 x) 
            8 { 
            9     unsigned int i_x, i_y, i_z;
           10 
           11     // Calculate integral coordinates:
           12     i_x = floor((x.x - devC_grid.origo[0]) / (devC_grid.L[0]/devC_grid.num[0]));
           13     i_y = floor((x.y - devC_grid.origo[1]) / (devC_grid.L[1]/devC_grid.num[1]));
           14     i_z = floor((x.z - devC_grid.origo[2]) / (devC_grid.L[2]/devC_grid.num[2]));
           15 
           16     // Integral coordinates are converted to 1D coordinate:
           17     return (i_z * devC_grid.num[1])
           18         * devC_grid.num[0] + i_y * devC_grid.num[0] + i_x;
           19 
           20 } // End of calcCellID(...)
           21 
           22 
           23 // Calculate hash value for each particle, based on position in grid.
           24 // Kernel executed on device, and callable from host only.
           25 __global__ void calcParticleCellID(unsigned int* dev_gridParticleCellID, 
           26         unsigned int* dev_gridParticleIndex, 
           27         Float4* dev_x) 
           28 {
           29     unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
           30 
           31     if (idx < devC_np) { // Condition prevents block size error
           32 
           33         //volatile Float4 x = dev_x[idx]; // Ensure coalesced read
           34         Float4 x = dev_x[idx]; // Ensure coalesced read
           35 
           36         unsigned int cellID = calcCellID(MAKE_FLOAT3(x.x, x.y, x.z));
           37 
           38         // Check for NaN
           39         if (x.x != x.x || x.y != x.y || x.z != x.z)
           40             printf("\ncalcParticleCellID: Error! NaN encountered. "
           41                     "idx = %d: cellID = %d, "
           42                     "x = %f,%f,%f\n",
           43                     idx, cellID, x.x, x.y, x.z);
           44 
           45         // Store values    
           46         __syncthreads();
           47         dev_gridParticleCellID[idx] = cellID;
           48         dev_gridParticleIndex[idx]  = idx;
           49 
           50     }
           51 } // End of calcParticleCellID(...)
           52 
           53 
           54 // Reorder particle data into sorted order, and find the start and end particle
           55 // indexes of each cell in the sorted hash array.
           56 __global__ void reorderArrays(unsigned int* dev_cellStart, 
           57         unsigned int* dev_cellEnd,
           58         unsigned int* dev_gridParticleCellID, 
           59         unsigned int* dev_gridParticleIndex,
           60         Float4* dev_x, 
           61         Float4* dev_vel, 
           62         Float4* dev_angvel,
           63         Float4* dev_x_sorted, 
           64         Float4* dev_vel_sorted,
           65         Float4* dev_angvel_sorted)
           66 { 
           67 
           68     // Create hash array in shared on-chip memory. The size of the array 
           69     // (threadsPerBlock + 1) is determined at launch time (extern notation).
           70     extern __shared__ unsigned int shared_data[]; 
           71 
           72     // Thread index in block
           73     unsigned int tidx = threadIdx.x;
           74 
           75     // Thread index in grid
           76     unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; 
           77 
           78     // CellID hash value of particle idx
           79     unsigned int cellID;
           80 
           81     // Read cellID data and store it in shared memory (shared_data)
           82     if (idx < devC_np) { // Condition prevents block size error
           83         cellID = dev_gridParticleCellID[idx];
           84 
           85         // Load hash data into shared memory, allowing access to neighbor
           86         // particle cellID values
           87         shared_data[tidx+1] = cellID; 
           88 
           89         if (idx > 0 && tidx == 0) {
           90             // First thread in block must load neighbor particle hash
           91             shared_data[0] = dev_gridParticleCellID[idx-1];
           92         }
           93     }
           94     //if (cellID != 0)
           95         //printf("reorderArrays: %d,%d\tcellID = %d\n", tidx, idx, cellID);
           96 
           97     // Pause completed threads in this block, until all 
           98     // threads are done loading data into shared memory
           99     __syncthreads();
          100 
          101     // Find lowest and highest particle index in each cell
          102     if (idx < devC_np) { // Condition prevents block size error
          103         // If this particle has a different cell index to the previous particle,
          104         // it's the first particle in the cell -> Store the index of this
          105         // particle in the cell. The previous particle must be the last particle
          106         // in the previous cell.
          107         if (idx == 0 || cellID != shared_data[tidx]) {
          108             dev_cellStart[cellID] = idx;
          109             if (idx > 0)
          110                 dev_cellEnd[shared_data[tidx]] = idx;
          111         }
          112 
          113         // Check wether the thread is the last one
          114         if (idx == (devC_np - 1)) 
          115             dev_cellEnd[cellID] = idx + 1;
          116 
          117 
          118         // Use the sorted index to reorder the position and velocity data
          119         unsigned int sortedIndex = dev_gridParticleIndex[idx];
          120 
          121         // Fetch from global read
          122         Float4 x      = dev_x[sortedIndex];
          123         Float4 vel    = dev_vel[sortedIndex];
          124         Float4 angvel = dev_angvel[sortedIndex];
          125 
          126         __syncthreads();
          127         // Write sorted data to global memory
          128         dev_x_sorted[idx]      = x;
          129         dev_vel_sorted[idx]    = vel;
          130         dev_angvel_sorted[idx] = angvel;
          131     }
          132 } // End of reorderArrays(...)
          133 
          134 #endif
          135 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4