45#ifndef LM_RDME_MGPUMPDRDMESOLVER_H_ 
   46#define LM_RDME_MGPUMPDRDMESOLVER_H_ 
   59#define OVERFLOW_MODE_CLASSIC 0 
   60#define OVERFLOW_MODE_RELAXED 1 
   75struct gpu_worker_thread_params;
 
   94                            const uint numberReactionsA,
 
   95                            const uint* initialSpeciesCountsA,
 
   96                            const uint* reactionTypeA,
 
  100                            const uint kCols = 1);
 
  109                                     const unsigned int bytes_per_particle,
 
  111                                     const uint8_t* latticeData,
 
  112                                     const uint8_t* latticeSitesData,
 
  113                                     bool rowMajorData = 
true);
 
  159                                 const unsigned int bytes_per_particle,
 
  164                                  lm::io::Lattice* latticeDataSet);
 
  169                                     lm::io::SpeciesCounts* speciesCountsDataSet);
 
  192                                            const unsigned int maxXBlockSize,
 
  193                                            const unsigned int latticeXSize,
 
  194                                            const unsigned int latticeYSize,
 
  195                                            const unsigned int latticeZSize);
 
  198                                            const unsigned int blockXSize,
 
  199                                            const unsigned int blockYSize,
 
  200                                            const unsigned int latticeXSize,
 
  201                                            const unsigned int latticeYSize,
 
  202                                            const unsigned int latticeZSize);
 
  205                                            const unsigned int blockXSize,
 
  206                                            const unsigned int blockZSize,
 
  207                                            const unsigned int latticeXSize,
 
  208                                            const unsigned int latticeYSize,
 
  209                                            const unsigned int latticeZSize);
 
  212                                                   const unsigned int blockXSize,
 
  213                                                   const unsigned int blockYSize,
 
  214                                                   const unsigned int latticeXSize,
 
  215                                                   const unsigned int latticeYSize,
 
  216                                                   const unsigned int latticeZSize);
 
 
  242#ifdef MPD_GLOBAL_S_MATRIX 
  247#ifdef MPD_GLOBAL_T_MATRIX 
  251#ifdef MPD_GLOBAL_R_MATRIX 
  252    float*        reactionRatesG;
 
  253    unsigned int* reactionOrdersG;
 
  254    unsigned int* reactionSitesG;
 
 
  265__device__ 
inline size_t local_to_global(
unsigned int x, 
unsigned int y, 
unsigned int z);
 
  266__device__ 
inline size_t local_index(
unsigned int x, 
unsigned int y, 
unsigned int z);
 
  270                                     const uint8_t* inSites,
 
  271                                     unsigned int* outLattice,
 
  272                                     const unsigned int z_start,
 
  273                                     const unsigned long long timestepHash,
 
  274                                     unsigned int* siteOverflowList,
 
  275                                     unsigned int* buf_top,
 
  276                                     unsigned int* buf_bot);
 
  279                              const uint8_t* inSites,
 
  280                              unsigned int* outLattice,
 
  281                              const unsigned int z_start,
 
  282                              const unsigned long long timestepHash,
 
  283                              unsigned int* siteOverflowList);
 
  286                              const uint8_t* inSites,
 
  287                              unsigned int* outLattice,
 
  288                              const unsigned long long timestepHash,
 
  289                              unsigned int* siteOverflowList);
 
  292                              const uint8_t* inSites,
 
  293                              unsigned int* outLattice,
 
  294                              const unsigned long long timestepHash,
 
  295                              unsigned int* siteOverflowList,
 
  296                              const unsigned int z_start);
 
  298#ifdef MPD_GLOBAL_S_MATRIX 
  299    #ifdef MPD_GLOBAL_R_MATRIX 
  301                                                     const uint8_t* inSites,
 
  302                                                     unsigned int* outLattice,
 
  303                                                     const unsigned long long timestepHash,
 
  304                                                     unsigned int* siteOverflowList,
 
  306                                                     const __restrict__ int8_t* SG,
 
  307                                                     const __restrict__ uint8_t* RLG,
 
  308                                                     const unsigned int* __restrict__ reactionOrderG,
 
  309                                                     const unsigned int* __restrict__ reactionSitesG,
 
  310                                                     const unsigned int* __restrict__ D1G,
 
  311                                                     const unsigned int* __restrict__ D2G,
 
  312                                                     const float* __restrict__ reactionRatesG,
 
  313                                                     const float* __restrict__ qp0,
 
  314                                                     const float* __restrict__ qp1,
 
  315                                                     const float* __restrict__ qp2);
 
  318                                                             const uint8_t* inSites,
 
  319                                                             unsigned int* outLattice,
 
  320                                                             const unsigned long long timestepHash,
 
  321                                                             unsigned int* siteOverflowList,
 
  323                                                             const __restrict__ int8_t* SG,
 
  324                                                             const __restrict__ uint8_t* RLG,
 
  325                                                             const unsigned int* __restrict__ reactionOrdersG,
 
  326                                                             const unsigned int* __restrict__ reactionSitesG,
 
  327                                                             const unsigned int* __restrict__ D1G,
 
  328                                                             const unsigned int* __restrict__ D2G,
 
  329                                                             const float* __restrict__ reactionRatesG,
 
  330                                                             const float* __restrict__ qp0,
 
  331                                                             const float* __restrict__ qp1,
 
  332                                                             const float* __restrict__ qp2,
 
  333                                                             unsigned int* buf_top,
 
  334                                                             unsigned int* buf_bot);
 
  337                                             const uint8_t* inSites,
 
  338                                             unsigned int* outLattice,
 
  339                                             const unsigned long long timestepHash,
 
  340                                             unsigned int* siteOverflowList,
 
  341                                             const unsigned int z_start,
 
  342                                             const int8_t* __restrict__ SG,
 
  343                                             const uint8_t* __restrict__ RLG,
 
  344                                             const unsigned int* __restrict__ reactionOrdersG,
 
  345                                             const unsigned int* __restrict__ reactionSitesG,
 
  346                                             const unsigned int* __restrict__ D1G,
 
  347                                             const unsigned int* __restrict__ D2G,
 
  348                                             const float* __restrict__ reactionRatesG);
 
  351                                                     const uint8_t* inSites,
 
  352                                                     unsigned int* outLattice,
 
  353                                                     const unsigned long long timestepHash,
 
  354                                                     unsigned int* siteOverflowList,
 
  356                                                     const __restrict__ int8_t* SG,
 
  357                                                     const __restrict__ uint8_t* RLG,
 
  358                                                     const float* __restrict__ qp0,
 
  359                                                     const float* __restrict__ qp1,
 
  360                                                     const float* __restrict__ qp2);
 
  363                                                             const uint8_t* inSites,
 
  364                                                             unsigned int* outLattice,
 
  365                                                             const unsigned long long timestepHash,
 
  366                                                             unsigned int* siteOverflowList,
 
  368                                                             const __restrict__ int8_t* SG,
 
  369                                                             const __restrict__ uint8_t* RLG,
 
  370                                                             const float* __restrict__ qp0,
 
  371                                                             const float* __restrict__ qp1,
 
  372                                                             const float* __restrict__ qp2,
 
  373                                                             unsigned int* buf_top,
 
  374                                                             unsigned int* buf_bot);
 
  377                                             const uint8_t* inSites,
 
  378                                             unsigned int* outLattice,
 
  379                                             const unsigned long long timestepHash,
 
  380                                             unsigned int* siteOverflowList,
 
  381                                             const unsigned int z_start,
 
  382                                             const int8_t* __restrict__ SG,
 
  383                                             const uint8_t* __restrict__ RLG);
 
  387                                                 const uint8_t* inSites,
 
  388                                                 unsigned int* outLattice,
 
  389                                                 const unsigned long long timestepHash,
 
  390                                                 unsigned int* siteOverflowList,
 
  392                                                 const float* __restrict__ qp0,
 
  393                                                 const float* __restrict__ qp1,
 
  394                                                 const float* __restrict__ qp2);
 
  397                                                         const uint8_t* inSites,
 
  398                                                         unsigned int* outLattice,
 
  399                                                         const unsigned long long timestepHash,
 
  400                                                         unsigned int* siteOverflowList,
 
  402                                                         const float* __restrict__ qp0,
 
  403                                                         const float* __restrict__ qp1,
 
  404                                                         const float* __restrict__ qp2,
 
  405                                                         unsigned int* buf_top,
 
  406                                                         unsigned int* buf_bot);
 
  409                                         const uint8_t* inSites,
 
  410                                         unsigned int* outLattice,
 
  411                                         const unsigned long long timestepHash,
 
  412                                         unsigned int* siteOverflowList,
 
  413                                         const unsigned int z_start);
 
 
uint32_t site_size_t
Definition ByteLatticeExtended.h:23
uint32_t lattice_size_t
Definition Lattice.h:55
struct segmentDescriptor SegmentDescriptor_s
Definition SegmentDescriptor.h:57
double si_dist_t
Definition Types.h:63
unsigned int uint
Definition Types.h:52
virtual int hookSimulation(double time)
Definition CMESolver.cpp:1242
Definition MultiGPUMapper.h:50
A representation for the resources for a given node.
Definition ResourceAllocator.h:62
map< string, string > * parameters
Definition CMESolver.h:266
unsigned int replicate
Definition CMESolver.h:265
An object that tracks the available resources for the main simulation runner.
Definition ResourceAllocator.h:57
A Lattice that is based on packed bytes of memory, i.e. one byte per lattice site to hold particles.
Definition ByteLattice.h:53
Base class for lattice type objects.
Definition Lattice.h:132
Definition MGPUMpdRdmeSolver.h:78
virtual void * run_thread(int)
virtual void writeLatticeData(double time, ByteLattice *lattice, lm::io::Lattice *latticeDataSet)
virtual void initialize_decomposition()
int timesteps_to_run
Definition MGPUMpdRdmeSolver.h:147
virtual void calculateReactionLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int blockXSize, const unsigned int blockYSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
float * model_T
Definition MGPUMpdRdmeSolver.h:136
virtual void writeLatticeSites(double time, ByteLattice *lattice)
float * secondOrder
Definition MGPUMpdRdmeSolver.h:140
uint32_t overflowTimesteps
Definition MGPUMpdRdmeSolver.h:123
pthread_barrier_t stop_barrier
Definition MGPUMpdRdmeSolver.h:145
virtual void buildDiffusionModel(const uint numberSiteTypesA, const double *DFA, const uint *RLA, lattice_size_t latticeXSize, lattice_size_t latticeYSize, lattice_size_t latticeZSize, site_size_t particlesPerSite, const unsigned int bytes_per_particle, si_dist_t latticeSpacing, const uint8_t *latticeData, const uint8_t *latticeSitesData, bool rowMajorData=true)
virtual bool needsDiffusionModel()
Tells whether the solver needs a reaction model.
Definition MGPUMpdRdmeSolver.h:91
uint32_t overflowListUses
Definition MGPUMpdRdmeSolver.h:124
bool aggcopy_x_unpack
Definition MGPUMpdRdmeSolver.h:151
virtual void copyModelsToDevice(int gpu)
float * firstOrder
Definition MGPUMpdRdmeSolver.h:140
virtual int handle_overflows(int gpu, void *hptr, void *dptr, int ts)
uint8_t * model_RL
Definition MGPUMpdRdmeSolver.h:137
virtual void calculateZLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int blockXSize, const unsigned int blockZSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
int overflow_handling
Definition MGPUMpdRdmeSolver.h:125
virtual void allocateLattice(lattice_size_t latticeXSize, lattice_size_t latticeYSize, lattice_size_t latticeZSize, site_size_t particlesPerSite, const unsigned int bytes_per_particle, si_dist_t latticeSpacing)
double printPerfInterval
Definition MGPUMpdRdmeSolver.h:149
unsigned int * model_reactionOrders
Definition MGPUMpdRdmeSolver.h:128
float * zeroOrder
Definition MGPUMpdRdmeSolver.h:140
unsigned int * model_reactionSites
Definition MGPUMpdRdmeSolver.h:129
virtual void buildModel(const uint numberSpeciesA, const uint numberReactionsA, const uint *initialSpeciesCountsA, const uint *reactionTypeA, const double *kA, const int *SA, const uint *DA, const uint kCols=1)
uint32_t seed
Definition MGPUMpdRdmeSolver.h:119
virtual void calculateYLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int blockXSize, const unsigned int blockYSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
virtual void hookCheckSimulation(double time, ByteLattice *lattice)
virtual ~MGPUMpdRdmeSolver()
unsigned int * model_D2
Definition MGPUMpdRdmeSolver.h:133
bool reactionModelModified
Definition MGPUMpdRdmeSolver.h:121
size_t zeroOrderSize
Definition MGPUMpdRdmeSolver.h:139
virtual void stop_threads()
virtual void recordSpeciesCounts(double time, ByteLattice *lattice, lm::io::SpeciesCounts *speciesCountsDataSet)
virtual int hookSimulation(double time, ByteLattice *lattice)
pthread_barrier_t start_barrier
Definition MGPUMpdRdmeSolver.h:145
virtual int run_next_timestep(int gpu, uint32_t timestep)
uint32_t current_timestep
Definition MGPUMpdRdmeSolver.h:148
unsigned int * model_D1
Definition MGPUMpdRdmeSolver.h:132
int8_t * model_S
Definition MGPUMpdRdmeSolver.h:135
size_t firstOrderSize
Definition MGPUMpdRdmeSolver.h:139
virtual void setupModelsOnDevice(int gpu)
friend void * gpu_worker_thread(void *arg)
bool use_spin_barrier
Definition MGPUMpdRdmeSolver.h:153
virtual void initialize(unsigned int replicate, map< string, string > *parameters, ResourceAllocator::ComputeResources *resources)
Initialize the simulation.
virtual void writeSpeciesCounts(lm::io::SpeciesCounts *speciesCountsDataSet)
float * model_reactionRates
Definition MGPUMpdRdmeSolver.h:130
virtual void start_threads()
pthread_barrier_t overflow_barrier
Definition MGPUMpdRdmeSolver.h:145
virtual bool needsReactionModel()
Tells whether the solver needs a reaction model.
Definition MGPUMpdRdmeSolver.h:90
bool aggcopy_r_pack
Definition MGPUMpdRdmeSolver.h:152
virtual int handle_all_overflows()
virtual void calculateXLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int maxXBlockSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
MultiGPUMapper * mapper
Definition MGPUMpdRdmeSolver.h:142
ResourceAllocator::ComputeResources * resources
Definition MGPUMpdRdmeSolver.h:143
gpu_worker_thread_params * threads
Definition MGPUMpdRdmeSolver.h:144
pthread_barrier_t simulation_barrier
Definition MGPUMpdRdmeSolver.h:145
virtual void generateTrajectory()
Actually run the simulation.
size_t secondOrderSize
Definition MGPUMpdRdmeSolver.h:139
virtual uint64_t getTimestepSeed(uint32_t timestep, uint32_t substep)
virtual void setReactionRate(unsigned int rxid, float rate)
virtual void computePropensities()
double tau
Definition MGPUMpdRdmeSolver.h:120
Definition RDMESolver.h:55
Lattice * lattice
Definition RDMESolver.h:73
virtual void buildDiffusionModel(const uint numberSiteTypesA, const double *DFA, const uint *RLA, lattice_size_t latticeXSize, lattice_size_t latticeYSize, lattice_size_t latticeZSize, site_size_t particlesPerSite, const unsigned int bytes_per_particle, si_dist_t latticeSpacing, const uint8_t *latticeData, const uint8_t *latticeSitesData, bool rowMajorData=true)
Definition RDMESolver.cpp:110
RDMESolver(RandomGenerator::Distributions neededDists)
Definition RDMESolver.cpp:58
Definition MGPUMpdRdmeSolver.h:264
__global__ void MGPU_x_kernel_unpack(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned int z_start, const unsigned long long timestepHash, unsigned int *siteOverflowList, unsigned int *buf_top, unsigned int *buf_bot)
__device__ size_t local_index(unsigned int x, unsigned int y, unsigned int z)
__global__ void MGPU_z_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, const unsigned int z_start)
__device__ size_t local_to_global(unsigned int x, unsigned int y, unsigned int z)
__global__ void MGPU_reaction_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, const unsigned int z_start)
__global__ void MGPU_precomp_reaction_kernel_packing(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, int z_start, const float *__restrict__ qp0, const float *__restrict__ qp1, const float *__restrict__ qp2, unsigned int *buf_top, unsigned int *buf_bot)
__global__ void correct_overflows_mgpu(unsigned int *lattice, unsigned int *siteOverflowList)
__global__ void MGPU_precomp_reaction_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, int z_start, const float *__restrict__ qp0, const float *__restrict__ qp1, const float *__restrict__ qp2)
__global__ void MGPU_y_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList)
__global__ void MGPU_x_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned int z_start, const unsigned long long timestepHash, unsigned int *siteOverflowList)
Definition Capsule.cpp:46
Definition MGPUMpdRdmeSolver.h:220
dim3 threads_z
Definition MGPUMpdRdmeSolver.h:237
unsigned int * dLattice
Definition MGPUMpdRdmeSolver.h:230
dim3 threads_x
Definition MGPUMpdRdmeSolver.h:237
SegmentDescriptor_s * segment
Definition MGPUMpdRdmeSolver.h:240
float * propZeroOrder
Definition MGPUMpdRdmeSolver.h:260
dim3 grid_y
Definition MGPUMpdRdmeSolver.h:236
unsigned int * d_overflows
Definition MGPUMpdRdmeSolver.h:233
dim3 threads_y
Definition MGPUMpdRdmeSolver.h:237
MultiGPUMapper * mapper
Definition MGPUMpdRdmeSolver.h:223
unsigned int * h_overflows
Definition MGPUMpdRdmeSolver.h:233
dim3 grid_x
Definition MGPUMpdRdmeSolver.h:236
int timesteps_to_run
Definition MGPUMpdRdmeSolver.h:227
int gpu
Definition MGPUMpdRdmeSolver.h:225
dim3 threads_r
Definition MGPUMpdRdmeSolver.h:237
pthread_t thread
Definition MGPUMpdRdmeSolver.h:221
float * propSecondOrder
Definition MGPUMpdRdmeSolver.h:260
dim3 grid_r
Definition MGPUMpdRdmeSolver.h:236
int ngpus
Definition MGPUMpdRdmeSolver.h:226
uint8_t * dSites
Definition MGPUMpdRdmeSolver.h:231
float * propFirstOrder
Definition MGPUMpdRdmeSolver.h:260
MGPUMpdRdmeSolver * runner
Definition MGPUMpdRdmeSolver.h:222
dim3 grid_z
Definition MGPUMpdRdmeSolver.h:236
cudaStream_t stream2
Definition MGPUMpdRdmeSolver.h:232
unsigned int * dLatticeTmp
Definition MGPUMpdRdmeSolver.h:230
cudaStream_t stream1
Definition MGPUMpdRdmeSolver.h:232