tsorting.cuh - sphere - GPU-based 3D discrete element method algorithm with optional fluid coupling
HTML git clone git://src.adamsgaard.dk/sphere
DIR Log
DIR Files
DIR Refs
DIR LICENSE
---
tsorting.cuh (4802B)
---
1 #ifndef SORTING_CUH_
2 #define SORTING_CUH_
3
4 // Returns the cellID containing the particle, based cubic grid
5 // See Bayraktar et al. 2009
6 // Kernel is executed on the device, and is callable from the device only
7 __device__ unsigned int calcCellID(Float3 x)
8 {
9 unsigned int i_x, i_y, i_z;
10
11 // Calculate integral coordinates:
12 i_x = floor((x.x - devC_grid.origo[0]) / (devC_grid.L[0]/devC_grid.num[0]));
13 i_y = floor((x.y - devC_grid.origo[1]) / (devC_grid.L[1]/devC_grid.num[1]));
14 i_z = floor((x.z - devC_grid.origo[2]) / (devC_grid.L[2]/devC_grid.num[2]));
15
16 // Integral coordinates are converted to 1D coordinate:
17 return (i_z * devC_grid.num[1])
18 * devC_grid.num[0] + i_y * devC_grid.num[0] + i_x;
19
20 } // End of calcCellID(...)
21
22
23 // Calculate hash value for each particle, based on position in grid.
24 // Kernel executed on device, and callable from host only.
25 __global__ void calcParticleCellID(unsigned int* dev_gridParticleCellID,
26 unsigned int* dev_gridParticleIndex,
27 Float4* dev_x)
28 {
29 unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;
30
31 if (idx < devC_np) { // Condition prevents block size error
32
33 //volatile Float4 x = dev_x[idx]; // Ensure coalesced read
34 Float4 x = dev_x[idx]; // Ensure coalesced read
35
36 unsigned int cellID = calcCellID(MAKE_FLOAT3(x.x, x.y, x.z));
37
38 // Check for NaN
39 if (x.x != x.x || x.y != x.y || x.z != x.z)
40 printf("\ncalcParticleCellID: Error! NaN encountered. "
41 "idx = %d: cellID = %d, "
42 "x = %f,%f,%f\n",
43 idx, cellID, x.x, x.y, x.z);
44
45 // Store values
46 __syncthreads();
47 dev_gridParticleCellID[idx] = cellID;
48 dev_gridParticleIndex[idx] = idx;
49
50 }
51 } // End of calcParticleCellID(...)
52
53
54 // Reorder particle data into sorted order, and find the start and end particle
55 // indexes of each cell in the sorted hash array.
56 __global__ void reorderArrays(unsigned int* dev_cellStart,
57 unsigned int* dev_cellEnd,
58 unsigned int* dev_gridParticleCellID,
59 unsigned int* dev_gridParticleIndex,
60 Float4* dev_x,
61 Float4* dev_vel,
62 Float4* dev_angvel,
63 Float4* dev_x_sorted,
64 Float4* dev_vel_sorted,
65 Float4* dev_angvel_sorted)
66 {
67
68 // Create hash array in shared on-chip memory. The size of the array
69 // (threadsPerBlock + 1) is determined at launch time (extern notation).
70 extern __shared__ unsigned int shared_data[];
71
72 // Thread index in block
73 unsigned int tidx = threadIdx.x;
74
75 // Thread index in grid
76 unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x;
77
78 // CellID hash value of particle idx
79 unsigned int cellID;
80
81 // Read cellID data and store it in shared memory (shared_data)
82 if (idx < devC_np) { // Condition prevents block size error
83 cellID = dev_gridParticleCellID[idx];
84
85 // Load hash data into shared memory, allowing access to neighbor
86 // particle cellID values
87 shared_data[tidx+1] = cellID;
88
89 if (idx > 0 && tidx == 0) {
90 // First thread in block must load neighbor particle hash
91 shared_data[0] = dev_gridParticleCellID[idx-1];
92 }
93 }
94 //if (cellID != 0)
95 //printf("reorderArrays: %d,%d\tcellID = %d\n", tidx, idx, cellID);
96
97 // Pause completed threads in this block, until all
98 // threads are done loading data into shared memory
99 __syncthreads();
100
101 // Find lowest and highest particle index in each cell
102 if (idx < devC_np) { // Condition prevents block size error
103 // If this particle has a different cell index to the previous particle,
104 // it's the first particle in the cell -> Store the index of this
105 // particle in the cell. The previous particle must be the last particle
106 // in the previous cell.
107 if (idx == 0 || cellID != shared_data[tidx]) {
108 dev_cellStart[cellID] = idx;
109 if (idx > 0)
110 dev_cellEnd[shared_data[tidx]] = idx;
111 }
112
113 // Check wether the thread is the last one
114 if (idx == (devC_np - 1))
115 dev_cellEnd[cellID] = idx + 1;
116
117
118 // Use the sorted index to reorder the position and velocity data
119 unsigned int sortedIndex = dev_gridParticleIndex[idx];
120
121 // Fetch from global read
122 Float4 x = dev_x[sortedIndex];
123 Float4 vel = dev_vel[sortedIndex];
124 Float4 angvel = dev_angvel[sortedIndex];
125
126 __syncthreads();
127 // Write sorted data to global memory
128 dev_x_sorted[idx] = x;
129 dev_vel_sorted[idx] = vel;
130 dev_angvel_sorted[idx] = angvel;
131 }
132 } // End of reorderArrays(...)
133
134 #endif
135 // vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4