Lattice Microbes 2.5
This is for whole cell modeling
Loading...
Searching...
No Matches
particle_size_kernels.cuh
Go to the documentation of this file.
1/*
2 * University of Illinois Open Source License
3 * Copyright 2024 Luthey-Schulten Group,
4 * All rights reserved.
5 *
6 * CUDA kernels for particle size-aware RDME operations
7 */
8
9#ifndef LM_CUDA_PARTICLE_SIZE_KERNELS_CUH
10#define LM_CUDA_PARTICLE_SIZE_KERNELS_CUH
11
12#include "config.h"
13#include "core/Types.h"
14#include "cuda/constant.cuh"
15
16// Ensure particle_t is defined for CUDA compilation
17#ifndef particle_t
18typedef uint32_t particle_t;
19#endif
20
21#ifndef lattice_size_t
22typedef uint32_t lattice_size_t;
23#endif
24
25// Maximum number of particle types supported in constant memory
26#define MAX_PARTICLE_TYPES_CONSTANT 256
27
28// Constant memory for particle sizes (accessible from all kernels) - must be global
29extern __constant__ uint32_t particle_sizes_constant[MAX_PARTICLE_TYPES_CONSTANT];
30
31namespace lm {
32namespace cuda {
33
34/**
35 * @brief Device function to get particle size from constant memory
36 * @param particleType Particle type ID
37 * @return Size of the particle (default: 1)
38 */
39__device__ __forceinline__ uint32_t getParticleSize(particle_t particleType) {
40 return (particleType < MAX_PARTICLE_TYPES_CONSTANT) ?
41 particle_sizes_constant[particleType] : 1;
42}
43
44/**
45 * @brief Template device function to calculate total size occupancy at a site
46 * @param particles Array of particles at the site
47 * @param count Number of particles
48 * @return Total size of all particles
49 */
50template<typename ParticleType>
51__device__ uint32_t calculateSiteOccupancy(const ParticleType* particles, uint32_t count) {
52 uint32_t totalSize = 0;
53 for (uint32_t i = 0; i < count; ++i) {
54 totalSize += getParticleSize(particles[i]);
55 }
56 return totalSize;
57}
58
59__device__ void validateKernelParams(uint32_t latticeXSize, uint32_t latticeYSize,
60 uint32_t latticeZSize, uint32_t particlesPerSite);
61
62
63/**
64 * @brief Memory-safe site occupancy calculation with overflow protection
65 * @param particles Array of particles at the site
66 * @param count Number of particles
67 * @param maxCount Maximum safe count to prevent buffer overflow
68 * @return Total size of all particles, UINT32_MAX if overflow detected
69 */
70template<typename ParticleType>
71__device__ uint32_t calculateSiteOccupancySafe(const ParticleType* particles, uint32_t count, uint32_t maxCount);
72
73/**
74 * @brief Template device function to check if adding a particle would exceed capacity
75 * @param particles Current particles at site
76 * @param count Current particle count
77 * @param newParticle Particle to add
78 * @param maxCapacity Maximum site capacity
79 * @return True if particle can be added
80 */
81template<typename ParticleType>
82__device__ bool canAddParticleDevice(const ParticleType* particles, uint32_t count,
83 particle_t newParticle, uint32_t maxCapacity) {
84 uint32_t currentOccupancy = calculateSiteOccupancy(particles, count);
85 uint32_t newParticleSize = getParticleSize(newParticle);
86 return (currentOccupancy + newParticleSize) <= maxCapacity;
87}
88
89// Explicit instantiations for common types
90__device__ uint32_t calculateSiteOccupancy(const uint8_t* particles, uint32_t count);
91__device__ uint32_t calculateSiteOccupancy(const uint32_t* particles, uint32_t count);
92__device__ bool canAddParticleDevice(const uint8_t* particles, uint32_t count,
93 particle_t newParticle, uint32_t maxCapacity);
94__device__ bool canAddParticleDevice(const uint32_t* particles, uint32_t count,
95 particle_t newParticle, uint32_t maxCapacity);
96
97/**
98 * @brief Kernel to validate size constraints across entire lattice
99 * @param lattice Particle lattice data
100 * @param siteLattice Site type lattice
101 * @param violationFlags Output array for violation flags
102 * @param latticeSize Total lattice size
103 * @param particlesPerSite Maximum particles per site
104 */
105__global__ void validateSizeConstraintsKernel(
106 const uint8_t* lattice,
107 const uint8_t* siteLattice,
108 uint32_t* violationFlags,
109 uint32_t latticeSize,
110 uint32_t particlesPerSite
111);
112
113/**
114 * @brief Kernel to calculate occupancy statistics
115 * @param lattice Particle lattice data
116 * @param occupancyCounts Output array for occupancy histogram
117 * @param latticeSize Total lattice size
118 * @param particlesPerSite Maximum particles per site
119 * @param maxOccupancy Maximum occupancy value to track
120 */
121__global__ void calculateOccupancyStatsKernel(
122 const uint8_t* lattice,
123 uint32_t* occupancyCounts,
124 uint32_t latticeSize,
125 uint32_t particlesPerSite,
126 uint32_t maxOccupancy
127);
128
129
130/**
131 * @brief Host function to copy particle sizes to constant memory
132 * @param particleSizes Host array of particle sizes
133 * @param numTypes Number of particle types
134 * @return cudaError_t error code
135 */
136cudaError_t copyParticleSizesToConstantMemory(const uint32_t* particleSizes, size_t numTypes);
137
138/**
139 * @brief Host function to validate lattice size constraints (ByteLattice)
140 * @param lattice Device pointer to lattice data
141 * @param siteLattice Device pointer to site data
142 * @param latticeSize Total lattice size
143 * @param particlesPerSite Maximum particles per site
144 * @param stream CUDA stream for async execution
145 * @return Number of sites violating constraints
146 */
147uint32_t validateLatticeConstraints(const uint8_t* lattice, const uint8_t* siteLattice,
148 uint32_t latticeSize, uint32_t particlesPerSite,
149 cudaStream_t stream = 0);
150
151/**
152 * @brief Host function to validate lattice size constraints (IntLattice)
153 * @param lattice Device pointer to lattice data
154 * @param siteLattice Device pointer to site data
155 * @param latticeSize Total lattice size
156 * @param particlesPerSite Maximum particles per site
157 * @param stream CUDA stream for async execution
158 * @return Number of sites violating constraints
159 */
160uint32_t validateLatticeConstraints(const uint32_t* lattice, const uint8_t* siteLattice,
161 uint32_t latticeSize, uint32_t particlesPerSite,
162 cudaStream_t stream = 0);
163
164/**
165 * @brief Host function to get occupancy statistics from GPU (ByteLattice)
166 * @param lattice Device pointer to lattice data
167 * @param latticeSize Total lattice size
168 * @param particlesPerSite Maximum particles per site
169 * @param occupancyCounts Host output array for histogram
170 * @param maxOccupancy Maximum occupancy to track
171 * @param stream CUDA stream for async execution
172 * @return cudaError_t error code
173 */
174cudaError_t getOccupancyStatistics(const uint8_t* lattice, uint32_t latticeSize,
175 uint32_t particlesPerSite, uint32_t* occupancyCounts,
176 uint32_t maxOccupancy, cudaStream_t stream = 0);
177
178/**
179 * @brief Host function to get occupancy statistics from GPU (IntLattice)
180 * @param lattice Device pointer to lattice data
181 * @param latticeSize Total lattice size
182 * @param particlesPerSite Maximum particles per site
183 * @param occupancyCounts Host output array for histogram
184 * @param maxOccupancy Maximum occupancy to track
185 * @param stream CUDA stream for async execution
186 * @return cudaError_t error code
187 */
188cudaError_t getOccupancyStatistics(const uint32_t* lattice, uint32_t latticeSize,
189 uint32_t particlesPerSite, uint32_t* occupancyCounts,
190 uint32_t maxOccupancy, cudaStream_t stream = 0);
191
192
193
194// =============== NEW EFFICIENT SIZE TRACKING KERNELS ===============
195
196/**
197 * @brief Initialize site sizes lattice from current particle lattice
198 * @param particles Particle lattice data
199 * @param siteSizes Output site sizes lattice (uint32_t per site)
200 * @param particleSizes Particle sizes lookup table
201 * @param latticeSize Total number of sites
202 * @param particlesPerSite Maximum particles per site
203 */
204__global__ void initializeSiteSizesKernel(
205 const uint8_t* particles,
206 uint32_t* siteSizes,
207 const uint32_t* particleSizes,
208 uint32_t latticeSize,
209 uint32_t particlesPerSite
210);
211
212/**
213 * @brief Check if particles can be added to sites (batch operation)
214 * @param siteSizes Current site sizes lattice
215 * @param particleSizes Particle sizes lookup table
216 * @param subvolumes Array of subvolume indices to check
217 * @param particleTypes Array of particle types to add
218 * @param results Output array of boolean results
219 * @param maxCapacity Maximum capacity per site
220 * @param count Number of operations to perform
221 */
222__global__ void canAddParticlesBatchKernel(
223 const uint32_t* siteSizes,
224 const uint32_t* particleSizes,
225 const lattice_size_t* subvolumes,
226 const particle_t* particleTypes,
227 bool* results,
228 uint32_t maxCapacity,
229 uint32_t count
230);
231
232/**
233 * @brief Add particles to sites and update size lattice (batch operation)
234 * @param siteSizes Site sizes lattice to update
235 * @param particleSizes Particle sizes lookup table
236 * @param subvolumes Array of subvolume indices
237 * @param particleTypes Array of particle types to add
238 * @param maxCapacity Maximum capacity per site
239 * @param count Number of operations to perform
240 * @param results Output array indicating success/failure
241 */
242__global__ void addParticlesBatchUpdateSizesKernel(
243 uint32_t* siteSizes,
244 const uint32_t* particleSizes,
245 const lattice_size_t* subvolumes,
246 const particle_t* particleTypes,
247 uint32_t maxCapacity,
248 uint32_t count,
249 bool* results
250);
251
252/**
253 * @brief Remove particles from sites and update size lattice (batch operation)
254 * @param siteSizes Site sizes lattice to update
255 * @param particleSizes Particle sizes lookup table
256 * @param subvolumes Array of subvolume indices
257 * @param particleTypes Array of particle types to remove
258 * @param count Number of operations to perform
259 */
260__global__ void removeParticlesBatchUpdateSizesKernel(
261 uint32_t* siteSizes,
262 const uint32_t* particleSizes,
263 const lattice_size_t* subvolumes,
264 const particle_t* particleTypes,
265 uint32_t count
266);
267
268/**
269 * @brief Fast size-aware diffusion with dedicated site size lattice
270 * @param inParticles Input particle lattice
271 * @param inSiteSizes Input site sizes lattice
272 * @param outParticles Output particle lattice
273 * @param outSiteSizes Output site sizes lattice
274 * @param inSites Site types
275 * @param particleSizes Particle sizes lookup table
276 * @param timestepHash Random seed
277 * @param latticeXSize X dimension
278 * @param latticeYSize Y dimension
279 * @param latticeZSize Z dimension
280 * @param particlesPerSite Maximum particles per site
281 * @param maxCapacity Maximum site capacity
282 */
283__global__ void fastSizeAwareDiffusionKernel(
284 const uint8_t* inParticles,
285 const uint32_t* inSiteSizes,
286 uint8_t* outParticles,
287 uint32_t* outSiteSizes,
288 const uint8_t* inSites,
289 const uint32_t* particleSizes,
290 unsigned long long timestepHash,
291 uint32_t latticeXSize,
292 uint32_t latticeYSize,
293 uint32_t latticeZSize,
294 uint32_t particlesPerSite,
295 uint32_t maxCapacity
296);
297/**
298 * @brief Perform size-aware propagation
299 * @param outLattice Output lattice
300 * @param outSizeLattice Output size lattice
301 * @param window Window
302 * @param sizeWindow Size window
303 * @param choices Choices
304 * @param latticeIndex Lattice index
305 * @param latticeXYSize Lattice XY size
306 * @param latticeXYZSize Lattice XYZ size
307 * @param windowIndex Window index
308 * @param particlesPerSite Maximum particles per site
309 * @param siteOverflowList Site overflow list
310 */
311__device__ void performSizeAwarePropagation(
312 unsigned int* __restrict__ outLattice,
313 uint32_t* __restrict__ outSizeLattice,
314 const unsigned int* __restrict__ window,
315 const uint32_t* __restrict__ sizeWindow,
316 const uint8_t* __restrict__ choices,
317 const uint32_t latticeIndex,
318 const uint32_t latticeXYSize,
319 const uint32_t latticeXYZSize,
320 const uint32_t windowIndex,
321 const uint32_t particlesPerSite,
322 uint32_t* __restrict__ siteOverflowList);
323
324// Host wrapper functions for the new kernels
325cudaError_t initializeSiteSizes(const uint8_t* particles, uint32_t* siteSizes,
326 const uint32_t* particleSizes, uint32_t latticeSize,
327 uint32_t particlesPerSite, cudaStream_t stream = 0);
328
329cudaError_t canAddParticlesBatch(const uint32_t* siteSizes, const uint32_t* particleSizes,
330 const lattice_size_t* subvolumes, const particle_t* particleTypes,
331 bool* results, uint32_t maxCapacity, uint32_t count,
332 cudaStream_t stream = 0);
333
334cudaError_t addParticlesBatchUpdateSizes(uint32_t* siteSizes, const uint32_t* particleSizes,
335 const lattice_size_t* subvolumes, const particle_t* particleTypes,
336 uint32_t maxCapacity, uint32_t count, bool* results,
337 cudaStream_t stream = 0);
338
339cudaError_t removeParticlesBatchUpdateSizes(uint32_t* siteSizes, const uint32_t* particleSizes,
340 const lattice_size_t* subvolumes, const particle_t* particleTypes,
341 uint32_t count, cudaStream_t stream = 0);
342
343} // namespace cuda
344} // namespace lm
345
346/**
347 * @brief Kernel for windowed size-aware diffusion
348 * @param inLattice Input lattice state
349 * @param inSites Site types
350 * @param inSizeLattice Input size lattice
351 * @param outLattice Output lattice state
352 * @param outSizeLattice Output size lattice
353 * @param timestepHash Random seed
354 * @param siteOverflowList List for tracking overflow sites
355 * @param latticeXSize X dimension
356 * @param latticeYSize Y dimension
357 * @param latticeZSize Z dimension
358 * @param particlesPerSite Maximum particles per site
359 */
360__global__ void sizeAwareWindowedDiffusionKernel(
361 const unsigned int* inLattice,
362 const uint8_t* inSites,
363 const uint32_t* inSizeLattice,
364 unsigned int* outLattice,
365 uint32_t* outSizeLattice,
366 const unsigned long long timestepHash,
367 uint32_t* siteOverflowList,
368 uint32_t latticeXSize,
369 uint32_t latticeYSize,
370 uint32_t latticeZSize,
371 uint32_t particlesPerSite);
372
373/**
374 * @brief Kernel for size-aware reaction processing
375 * @param inLattice Input lattice state
376 * @param inSites Site types
377 * @param outLattice Output lattice state
378 * @param timestepHash Random seed for this timestep
379 * @param siteOverflowList List for tracking overflow sites
380 * @param reactionMatrix Reaction stoichiometry matrix
381 * @param reactionLocationMatrix Reaction location matrix
382 * @param latticeXSize X dimension
383 * @param latticeYSize Y dimension
384 * @param latticeZSize Z dimension
385 * @param particlesPerSite Maximum particles per site
386 * @param numReactions Number of reactions
387 * @param numSpecies Number of species
388 */
389__global__ void sizeAwareReactionKernel(
390 const uint8_t* inLattice,
391 const uint32_t* inSizeLattice,
392 const uint8_t* inSites,
393 uint32_t* outSizeLattice,
394 uint8_t* outLattice,
395 unsigned long long timestepHash,
396 uint32_t* siteOverflowList,
397 const int8_t* reactionMatrix,
398 const uint8_t* reactionLocationMatrix,
399 uint32_t latticeXSize,
400 uint32_t latticeYSize,
401 uint32_t latticeZSize,
402 uint32_t particlesPerSite,
403 uint32_t numReactions,
404 uint32_t numSpecies,
405 uint32_t numSiteTypes
406);
407
408/**
409 * Optimized size-aware reaction kernel with precomputed propensities
410 * Multiple signatures based on compile-time macros (MPD_GLOBAL_S_MATRIX, MPD_GLOBAL_R_MATRIX)
411 */
412#ifdef MPD_GLOBAL_S_MATRIX
413#ifdef MPD_GLOBAL_R_MATRIX
414__global__ void sizeAwarePrecompReactionKernel(
415 const unsigned int* inLattice,
416 const uint32_t* inSizeLattice,
417 const uint8_t* inSites,
418 uint32_t* outSizeLattice,
419 unsigned int* outLattice,
420 const unsigned long long timestepHash,
421 uint32_t* siteOverflowList,
422 const int8_t* __restrict__ SG,
423 const uint8_t* __restrict__ RLG,
424 const unsigned int* __restrict__ reactionOrdersG,
425 const unsigned int* __restrict__ reactionSitesG,
426 const unsigned int* __restrict__ D1G,
427 const unsigned int* __restrict__ D2G,
428 const float* __restrict__ reactionRatesG,
429 const float* __restrict__ qp0,
430 const float* __restrict__ qp1,
431 const float* __restrict__ qp2,
432 uint32_t latticeXSize,
433 uint32_t latticeYSize,
434 uint32_t latticeZSize,
435 uint32_t particlesPerSite,
436 uint32_t numSpecies);
437#else
438__global__ void sizeAwarePrecompReactionKernel(
439 const uint8_t* inLattice,
440 const uint32_t* inSizeLattice,
441 const uint8_t* inSites,
442 uint32_t* outSizeLattice,
443 uint8_t* outLattice,
444 unsigned long long timestepHash,
445 uint32_t* siteOverflowList,
446 const int8_t* __restrict__ SG,
447 const uint8_t* __restrict__ RLG,
448 const unsigned int* __restrict__ reactionOrdersG,
449 const unsigned int* __restrict__ reactionSitesG,
450 const unsigned int* __restrict__ D1G,
451 const unsigned int* __restrict__ D2G,
452 const float* __restrict__ reactionRatesG,
453 const float* __restrict__ qp0,
454 const float* __restrict__ qp1,
455 const float* __restrict__ qp2,
456 uint32_t latticeXSize,
457 uint32_t latticeYSize,
458 uint32_t latticeZSize,
459 uint32_t particlesPerSite,
460 uint32_t numSpecies);
461#endif
462#else
463__global__ void sizeAwarePrecompReactionKernel(
464 const uint8_t* inLattice,
465 const uint32_t* inSizeLattice,
466 const uint8_t* inSites,
467 uint32_t* outSizeLattice,
468 uint8_t* outLattice,
469 unsigned long long timestepHash,
470 uint32_t* siteOverflowList,
471 const float* __restrict__ qp0,
472 const float* __restrict__ qp1,
473 const float* __restrict__ qp2,
474 uint32_t latticeXSize,
475 uint32_t latticeYSize,
476 uint32_t latticeZSize,
477 uint32_t particlesPerSite,
478 uint32_t numSpecies);
479#endif
480
481// =============== BYTE LATTICE OVERFLOW CORRECTION KERNEL ===============
482
483/**
484 * Byte-lattice-compatible overflow correction kernel
485 * Handles overflows for uint8_t lattices instead of unsigned int lattices
486 */
487__global__ void correct_byte_overflows(
488 uint8_t* lattice,
489 uint32_t* siteOverflowList,
490 uint32_t latticeXSize,
491 uint32_t latticeYSize,
492 uint32_t latticeZSize,
493 uint32_t particlesPerSite);
494
495// =============== NEW 1D SIZE-AWARE DIFFUSION KERNELS ===============
496
497/**
498 * @brief Size-aware Z-direction diffusion kernel (1D approach)
499 */
500__global__ void sizeAwareZDiffusionKernel(
501 const uint8_t* inLattice,
502 const uint8_t* inSites,
503 const uint32_t* inSizeLattice,
504 uint8_t* outLattice,
505 uint32_t* outSizeLattice,
506 const unsigned long long timestepHash,
507 uint32_t* siteOverflowList,
508 uint32_t latticeXSize,
509 uint32_t latticeYSize,
510 uint32_t latticeZSize,
511 uint32_t particlesPerSite
512);
513
514/**
515 * @brief Size-aware Y-direction diffusion kernel (1D approach)
516 */
517__global__ void sizeAwareYDiffusionKernel(
518 const uint8_t* inLattice,
519 const uint8_t* inSites,
520 const uint32_t* inSizeLattice,
521 uint8_t* outLattice,
522 uint32_t* outSizeLattice,
523 const unsigned long long timestepHash,
524 uint32_t* siteOverflowList,
525 uint32_t latticeXSize,
526 uint32_t latticeYSize,
527 uint32_t latticeZSize,
528 uint32_t particlesPerSite
529);
530
531/**
532 * @brief Size-aware X-direction diffusion kernel (1D approach)
533 */
534__global__ void sizeAwareXDiffusionKernel(
535 const uint8_t* inLattice,
536 const uint8_t* inSites,
537 const uint32_t* inSizeLattice,
538 uint8_t* outLattice,
539 uint32_t* outSizeLattice,
540 const unsigned long long timestepHash,
541 uint32_t* siteOverflowList,
542 uint32_t latticeXSize,
543 uint32_t latticeYSize,
544 uint32_t latticeZSize,
545 uint32_t particlesPerSite
546);
547
548
549#endif // LM_CUDA_PARTICLE_SIZE_KERNELS_CUH