45#ifndef LM_RDME_MGPUINTMPDRDMESOLVER_H_ 
   46#define LM_RDME_MGPUINTMPDRDMESOLVER_H_ 
   59#define OVERFLOW_MODE_CLASSIC 0 
   60#define OVERFLOW_MODE_RELAXED 1 
   75struct gpu_worker_thread_int_params;
 
   94                            const uint numberReactionsA,
 
   95                            const uint* initialSpeciesCountsA,
 
   96                            const uint* reactionTypeA,
 
  100                            const uint kCols = 1);
 
  109                                     const unsigned int bytes_per_particle,
 
  111                                     const uint8_t* latticeData,
 
  112                                     const uint8_t* latticeSitesData,
 
  113                                     bool rowMajorData = 
true);
 
  159                                 const unsigned int bytes_per_particle,
 
  164                                  lm::io::Lattice* latticeDataSet);
 
  169                                     lm::io::SpeciesCounts* speciesCountsDataSet);
 
  193                                            const unsigned int maxXBlockSize,
 
  194                                            const unsigned int latticeXSize,
 
  195                                            const unsigned int latticeYSize,
 
  196                                            const unsigned int latticeZSize);
 
  199                                            const unsigned int blockXSize,
 
  200                                            const unsigned int blockYSize,
 
  201                                            const unsigned int latticeXSize,
 
  202                                            const unsigned int latticeYSize,
 
  203                                            const unsigned int latticeZSize);
 
  206                                            const unsigned int blockXSize,
 
  207                                            const unsigned int blockZSize,
 
  208                                            const unsigned int latticeXSize,
 
  209                                            const unsigned int latticeYSize,
 
  210                                            const unsigned int latticeZSize);
 
  213                                                   const unsigned int blockXSize,
 
  214                                                   const unsigned int blockYSize,
 
  215                                                   const unsigned int latticeXSize,
 
  216                                                   const unsigned int latticeYSize,
 
  217                                                   const unsigned int latticeZSize);
 
 
  243#ifdef MPD_GLOBAL_S_MATRIX 
  248#ifdef MPD_GLOBAL_T_MATRIX 
  252#ifdef MPD_GLOBAL_R_MATRIX 
  253    float*        reactionRatesG;
 
  254    unsigned int* reactionOrdersG;
 
  255    unsigned int* reactionSitesG;
 
 
  266__device__ 
inline size_t local_to_global(
unsigned int x, 
unsigned int y, 
unsigned int z);
 
  267__device__ 
inline size_t local_index(
unsigned int x, 
unsigned int y, 
unsigned int z);
 
  271                                     const uint8_t* inSites,
 
  272                                     unsigned int* outLattice,
 
  273                                     const unsigned int z_start,
 
  274                                     const unsigned long long timestepHash,
 
  275                                     unsigned int* siteOverflowList,
 
  276                                     unsigned int* buf_top,
 
  277                                     unsigned int* buf_bot);
 
  280                              const uint8_t* inSites,
 
  281                              unsigned int* outLattice,
 
  282                              const unsigned int z_start,
 
  283                              const unsigned long long timestepHash,
 
  284                              unsigned int* siteOverflowList);
 
  287                              const uint8_t* inSites,
 
  288                              unsigned int* outLattice,
 
  289                              const unsigned long long timestepHash,
 
  290                              unsigned int* siteOverflowList);
 
  293                              const uint8_t* inSites,
 
  294                              unsigned int* outLattice,
 
  295                              const unsigned long long timestepHash,
 
  296                              unsigned int* siteOverflowList,
 
  297                              const unsigned int z_start);
 
  299#ifdef MPD_GLOBAL_S_MATRIX 
  300    #ifdef MPD_GLOBAL_R_MATRIX 
  302                                                     const uint8_t* inSites,
 
  303                                                     unsigned int* outLattice,
 
  304                                                     const unsigned long long timestepHash,
 
  305                                                     unsigned int* siteOverflowList,
 
  307                                                     const __restrict__ int8_t* SG,
 
  308                                                     const __restrict__ uint8_t* RLG,
 
  309                                                     const unsigned int* __restrict__ reactionOrderG,
 
  310                                                     const unsigned int* __restrict__ reactionSitesG,
 
  311                                                     const unsigned int* __restrict__ D1G,
 
  312                                                     const unsigned int* __restrict__ D2G,
 
  313                                                     const float* __restrict__ reactionRatesG,
 
  314                                                     const float* __restrict__ qp0,
 
  315                                                     const float* __restrict__ qp1,
 
  316                                                     const float* __restrict__ qp2);
 
  319                                                             const uint8_t* inSites,
 
  320                                                             unsigned int* outLattice,
 
  321                                                             const unsigned long long timestepHash,
 
  322                                                             unsigned int* siteOverflowList,
 
  324                                                             const __restrict__ int8_t* SG,
 
  325                                                             const __restrict__ uint8_t* RLG,
 
  326                                                             const unsigned int* __restrict__ reactionOrdersG,
 
  327                                                             const unsigned int* __restrict__ reactionSitesG,
 
  328                                                             const unsigned int* __restrict__ D1G,
 
  329                                                             const unsigned int* __restrict__ D2G,
 
  330                                                             const float* __restrict__ reactionRatesG,
 
  331                                                             const float* __restrict__ qp0,
 
  332                                                             const float* __restrict__ qp1,
 
  333                                                             const float* __restrict__ qp2,
 
  334                                                             unsigned int* buf_top,
 
  335                                                             unsigned int* buf_bot);
 
  338                                             const uint8_t* inSites,
 
  339                                             unsigned int* outLattice,
 
  340                                             const unsigned long long timestepHash,
 
  341                                             unsigned int* siteOverflowList,
 
  342                                             const unsigned int z_start,
 
  343                                             const int8_t* __restrict__ SG,
 
  344                                             const uint8_t* __restrict__ RLG,
 
  345                                             const unsigned int* __restrict__ reactionOrdersG,
 
  346                                             const unsigned int* __restrict__ reactionSitesG,
 
  347                                             const unsigned int* __restrict__ D1G,
 
  348                                             const unsigned int* __restrict__ D2G,
 
  349                                             const float* __restrict__ reactionRatesG);
 
  352                                                     const uint8_t* inSites,
 
  353                                                     unsigned int* outLattice,
 
  354                                                     const unsigned long long timestepHash,
 
  355                                                     unsigned int* siteOverflowList,
 
  357                                                     const __restrict__ int8_t* SG,
 
  358                                                     const __restrict__ uint8_t* RLG,
 
  359                                                     const float* __restrict__ qp0,
 
  360                                                     const float* __restrict__ qp1,
 
  361                                                     const float* __restrict__ qp2);
 
  364                                                             const uint8_t* inSites,
 
  365                                                             unsigned int* outLattice,
 
  366                                                             const unsigned long long timestepHash,
 
  367                                                             unsigned int* siteOverflowList,
 
  369                                                             const __restrict__ int8_t* SG,
 
  370                                                             const __restrict__ uint8_t* RLG,
 
  371                                                             const float* __restrict__ qp0,
 
  372                                                             const float* __restrict__ qp1,
 
  373                                                             const float* __restrict__ qp2,
 
  374                                                             unsigned int* buf_top,
 
  375                                                             unsigned int* buf_bot);
 
  378                                             const uint8_t* inSites,
 
  379                                             unsigned int* outLattice,
 
  380                                             const unsigned long long timestepHash,
 
  381                                             unsigned int* siteOverflowList,
 
  382                                             const unsigned int z_start,
 
  383                                             const int8_t* __restrict__ SG,
 
  384                                             const uint8_t* __restrict__ RLG);
 
  388                                                 const uint8_t* inSites,
 
  389                                                 unsigned int* outLattice,
 
  390                                                 const unsigned long long timestepHash,
 
  391                                                 unsigned int* siteOverflowList,
 
  393                                                 const float* __restrict__ qp0,
 
  394                                                 const float* __restrict__ qp1,
 
  395                                                 const float* __restrict__ qp2);
 
  398                                                         const uint8_t* inSites,
 
  399                                                         unsigned int* outLattice,
 
  400                                                         const unsigned long long timestepHash,
 
  401                                                         unsigned int* siteOverflowList,
 
  403                                                         const float* __restrict__ qp0,
 
  404                                                         const float* __restrict__ qp1,
 
  405                                                         const float* __restrict__ qp2,
 
  406                                                         unsigned int* buf_top,
 
  407                                                         unsigned int* buf_bot);
 
  410                                         const uint8_t* inSites,
 
  411                                         unsigned int* outLattice,
 
  412                                         const unsigned long long timestepHash,
 
  413                                         unsigned int* siteOverflowList,
 
  414                                         const unsigned int z_start);
 
 
uint32_t site_size_t
Definition ByteLatticeExtended.h:23
uint32_t lattice_size_t
Definition Lattice.h:55
struct segmentDescriptor SegmentDescriptor_s
Definition SegmentDescriptor.h:57
double si_dist_t
Definition Types.h:63
unsigned int uint
Definition Types.h:52
virtual int hookSimulation(double time)
Definition CMESolver.cpp:1242
Definition MultiGPUMapper.h:50
A representation for the resources for a given node.
Definition ResourceAllocator.h:62
map< string, string > * parameters
Definition CMESolver.h:266
unsigned int replicate
Definition CMESolver.h:265
An object that tracks the available resources for the main simulation runner.
Definition ResourceAllocator.h:57
Definition CudaIntLattice.h:54
Base class for lattice type objects.
Definition Lattice.h:132
Definition MGPUIntMpdRdmeSolver.h:78
uint32_t overflowTimesteps
Definition MGPUIntMpdRdmeSolver.h:123
virtual int run_next_timestep(int gpu, uint32_t timestep)
bool use_spin_barrier
Definition MGPUIntMpdRdmeSolver.h:153
virtual int handle_overflows(int gpu, void *hptr, void *dptr, int ts)
gpu_worker_thread_int_params * threads
Definition MGPUIntMpdRdmeSolver.h:144
virtual void initialize(unsigned int replicate, map< string, string > *parameters, ResourceAllocator::ComputeResources *resources)
Initialize the simulation.
virtual void stop_threads()
virtual void setupModelsOnDevice(int gpu)
double printPerfInterval
Definition MGPUIntMpdRdmeSolver.h:149
virtual void writeLatticeSites(double time, CudaIntLattice *lattice)
virtual void writeSpeciesCounts(lm::io::SpeciesCounts *speciesCountsDataSet)
virtual void buildDiffusionModel(const uint numberSiteTypesA, const double *DFA, const uint *RLA, lattice_size_t latticeXSize, lattice_size_t latticeYSize, lattice_size_t latticeZSize, site_size_t particlesPerSite, const unsigned int bytes_per_particle, si_dist_t latticeSpacing, const uint8_t *latticeData, const uint8_t *latticeSitesData, bool rowMajorData=true)
virtual void writeLatticeData(double time, CudaIntLattice *lattice, lm::io::Lattice *latticeDataSet)
virtual void calculateReactionLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int blockXSize, const unsigned int blockYSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
size_t firstOrderSize
Definition MGPUIntMpdRdmeSolver.h:139
float * secondOrder
Definition MGPUIntMpdRdmeSolver.h:140
int overflow_handling
Definition MGPUIntMpdRdmeSolver.h:125
friend void * gpu_worker_thread_int(void *arg)
pthread_barrier_t start_barrier
Definition MGPUIntMpdRdmeSolver.h:145
bool aggcopy_r_pack
Definition MGPUIntMpdRdmeSolver.h:152
size_t zeroOrderSize
Definition MGPUIntMpdRdmeSolver.h:139
unsigned int * model_reactionOrders
Definition MGPUIntMpdRdmeSolver.h:128
MultiGPUMapper * mapper
Definition MGPUIntMpdRdmeSolver.h:142
pthread_barrier_t stop_barrier
Definition MGPUIntMpdRdmeSolver.h:145
int timesteps_to_run
Definition MGPUIntMpdRdmeSolver.h:147
virtual void computePropensities()
virtual void recordSpeciesCounts(double time, CudaIntLattice *lattice, lm::io::SpeciesCounts *speciesCountsDataSet)
int8_t * model_S
Definition MGPUIntMpdRdmeSolver.h:135
virtual void allocateLattice(lattice_size_t latticeXSize, lattice_size_t latticeYSize, lattice_size_t latticeZSize, site_size_t particlesPerSite, const unsigned int bytes_per_particle, si_dist_t latticeSpacing)
virtual void setReactionRate(unsigned int rxid, float rate)
double tau
Definition MGPUIntMpdRdmeSolver.h:120
float * zeroOrder
Definition MGPUIntMpdRdmeSolver.h:140
virtual void * run_thread(int)
unsigned int * model_D1
Definition MGPUIntMpdRdmeSolver.h:132
pthread_barrier_t overflow_barrier
Definition MGPUIntMpdRdmeSolver.h:145
virtual void setLatticeData(const uint8_t *latticeData)
virtual void initialize_decomposition()
virtual ~MGPUIntMpdRdmeSolver()
float * model_T
Definition MGPUIntMpdRdmeSolver.h:136
virtual bool needsDiffusionModel()
Tells whether the solver needs a reaction model.
Definition MGPUIntMpdRdmeSolver.h:91
virtual void calculateXLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int maxXBlockSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
virtual void buildModel(const uint numberSpeciesA, const uint numberReactionsA, const uint *initialSpeciesCountsA, const uint *reactionTypeA, const double *kA, const int *SA, const uint *DA, const uint kCols=1)
uint32_t overflowListUses
Definition MGPUIntMpdRdmeSolver.h:124
virtual void start_threads()
unsigned int * model_D2
Definition MGPUIntMpdRdmeSolver.h:133
ResourceAllocator::ComputeResources * resources
Definition MGPUIntMpdRdmeSolver.h:143
virtual void hookCheckSimulation(double time, CudaIntLattice *lattice)
virtual bool needsReactionModel()
Tells whether the solver needs a reaction model.
Definition MGPUIntMpdRdmeSolver.h:90
pthread_barrier_t simulation_barrier
Definition MGPUIntMpdRdmeSolver.h:145
unsigned int * model_reactionSites
Definition MGPUIntMpdRdmeSolver.h:129
virtual void calculateZLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int blockXSize, const unsigned int blockZSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
virtual int handle_all_overflows()
virtual void calculateYLaunchParameters(dim3 *gridSize, dim3 *threadBlockSize, const unsigned int blockXSize, const unsigned int blockYSize, const unsigned int latticeXSize, const unsigned int latticeYSize, const unsigned int latticeZSize)
virtual void generateTrajectory()
Actually run the simulation.
float * model_reactionRates
Definition MGPUIntMpdRdmeSolver.h:130
bool aggcopy_x_unpack
Definition MGPUIntMpdRdmeSolver.h:151
uint8_t * model_RL
Definition MGPUIntMpdRdmeSolver.h:137
size_t secondOrderSize
Definition MGPUIntMpdRdmeSolver.h:139
bool reactionModelModified
Definition MGPUIntMpdRdmeSolver.h:121
uint32_t seed
Definition MGPUIntMpdRdmeSolver.h:119
virtual int hookSimulation(double time, CudaIntLattice *lattice)
float * firstOrder
Definition MGPUIntMpdRdmeSolver.h:140
virtual uint64_t getTimestepSeed(uint32_t timestep, uint32_t substep)
uint32_t current_timestep
Definition MGPUIntMpdRdmeSolver.h:148
virtual void copyModelsToDevice(int gpu)
Definition RDMESolver.h:55
Lattice * lattice
Definition RDMESolver.h:73
virtual void buildDiffusionModel(const uint numberSiteTypesA, const double *DFA, const uint *RLA, lattice_size_t latticeXSize, lattice_size_t latticeYSize, lattice_size_t latticeZSize, site_size_t particlesPerSite, const unsigned int bytes_per_particle, si_dist_t latticeSpacing, const uint8_t *latticeData, const uint8_t *latticeSitesData, bool rowMajorData=true)
Definition RDMESolver.cpp:110
RDMESolver(RandomGenerator::Distributions neededDists)
Definition RDMESolver.cpp:58
Definition MGPUIntMpdRdmeSolver.h:265
__device__ size_t local_to_global(unsigned int x, unsigned int y, unsigned int z)
__global__ void MGPU_precomp_reaction_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, int z_start, const float *__restrict__ qp0, const float *__restrict__ qp1, const float *__restrict__ qp2)
__global__ void MGPU_y_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList)
__global__ void MGPU_reaction_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, const unsigned int z_start)
__global__ void MGPU_z_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, const unsigned int z_start)
__device__ size_t local_index(unsigned int x, unsigned int y, unsigned int z)
__global__ void MGPU_x_kernel(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned int z_start, const unsigned long long timestepHash, unsigned int *siteOverflowList)
__global__ void MGPU_x_kernel_unpack(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned int z_start, const unsigned long long timestepHash, unsigned int *siteOverflowList, unsigned int *buf_top, unsigned int *buf_bot)
__global__ void correct_overflows_mgpu(unsigned int *lattice, unsigned int *siteOverflowList)
__global__ void MGPU_precomp_reaction_kernel_packing(const unsigned int *inLattice, const uint8_t *inSites, unsigned int *outLattice, const unsigned long long timestepHash, unsigned int *siteOverflowList, int z_start, const float *__restrict__ qp0, const float *__restrict__ qp1, const float *__restrict__ qp2, unsigned int *buf_top, unsigned int *buf_bot)
Definition Capsule.cpp:46
Definition MGPUIntMpdRdmeSolver.h:221
float * propZeroOrder
Definition MGPUIntMpdRdmeSolver.h:261
dim3 threads_r
Definition MGPUIntMpdRdmeSolver.h:238
unsigned int * h_overflows
Definition MGPUIntMpdRdmeSolver.h:234
float * propFirstOrder
Definition MGPUIntMpdRdmeSolver.h:261
MultiGPUMapper * mapper
Definition MGPUIntMpdRdmeSolver.h:224
int ngpus
Definition MGPUIntMpdRdmeSolver.h:227
SegmentDescriptor_s * segment
Definition MGPUIntMpdRdmeSolver.h:241
uint8_t * dSites
Definition MGPUIntMpdRdmeSolver.h:232
int timesteps_to_run
Definition MGPUIntMpdRdmeSolver.h:228
pthread_t thread
Definition MGPUIntMpdRdmeSolver.h:222
cudaStream_t stream1
Definition MGPUIntMpdRdmeSolver.h:233
dim3 grid_y
Definition MGPUIntMpdRdmeSolver.h:237
cudaStream_t stream2
Definition MGPUIntMpdRdmeSolver.h:233
unsigned int * dLattice
Definition MGPUIntMpdRdmeSolver.h:231
dim3 grid_r
Definition MGPUIntMpdRdmeSolver.h:237
dim3 grid_z
Definition MGPUIntMpdRdmeSolver.h:237
dim3 threads_x
Definition MGPUIntMpdRdmeSolver.h:238
dim3 threads_z
Definition MGPUIntMpdRdmeSolver.h:238
dim3 grid_x
Definition MGPUIntMpdRdmeSolver.h:237
unsigned int * d_overflows
Definition MGPUIntMpdRdmeSolver.h:234
dim3 threads_y
Definition MGPUIntMpdRdmeSolver.h:238
int gpu
Definition MGPUIntMpdRdmeSolver.h:226
float * propSecondOrder
Definition MGPUIntMpdRdmeSolver.h:261
unsigned int * dLatticeTmp
Definition MGPUIntMpdRdmeSolver.h:231
MGPUIntMpdRdmeSolver * runner
Definition MGPUIntMpdRdmeSolver.h:223