40#ifndef __MULTIGPUMAPPER__ 
   41#define __MULTIGPUMAPPER__ 
   43#define POSDIM(_p, _d) ((_p).x + (_p).y * (_d).x + (_p).z * (_d).x * (_d).y) 
   44#define DIMSIZE(_d) ((_d).x*(_d).y*(_d).z) 
  101        virtual void stage_in(
int gpu, 
void *dptr, 
void *hptr)=0;
 
  103        virtual void stage_out(
int gpu, 
void *hptr, 
void *dptr)=0;
 
  105                                   cudaStream_t top, cudaStream_t bot,
 
  106                                   void *dptr = NULL)=0;
 
  107        virtual void refresh(
int gpu, 
void *dptr, 
int timestamp)=0;
 
  109        virtual void schedule_send(
int gpu, 
void *dptr, 
int timestamp, 
int neighbor, cudaStream_t stream)=0;
 
  110        virtual void schedule_recv(
int gpu, 
void *dptr, 
int timestamp, 
int neighbor, cudaStream_t stream)=0;
 
 
  120#define check_error() ({ cudaError_t err=cudaGetLastError(); if(err!=cudaSuccess) { printf("Cuda error %s:%d: %s\n", __FILE__, __LINE__,cudaGetErrorString(err)); exit(1); } }) 
struct segmentDescriptor SegmentDescriptor_s
Definition SegmentDescriptor.h:57
virtual size_t get_authority_offset(int gpu)=0
virtual void initialize_gpu(int gpu)
SegmentDescriptor_s ** descriptor
Definition MultiGPUMapper.h:59
int apron
Definition MultiGPUMapper.h:58
float * lb_weights
Definition MultiGPUMapper.h:60
MultiGPUMapper(dim3 ldim, size_t cellsize, int apron, int overlap, int num_gpus, int *devices, int pages)
virtual void schedule_send(int gpu, void *dptr, int timestamp, int neighbor, cudaStream_t stream)=0
size_t cellsize
Definition MultiGPUMapper.h:56
virtual void stage_out(int gpu, void *hptr, void *dptr)=0
int overlap
Definition MultiGPUMapper.h:55
virtual bool determine_load_balance()=0
dim3 lattice_dim
Definition MultiGPUMapper.h:54
virtual dim3 get_local_dim(int gpu)=0
virtual void publish_state(int gpu, int timestamp, cudaStream_t top, cudaStream_t bot, void *dptr=NULL)=0
int num_gpus
Definition MultiGPUMapper.h:52
int * lb_cost
Definition MultiGPUMapper.h:61
virtual void stage_in_sites(int gpu, void *dptr, void *hptr)=0
SegmentDescriptor_s * getSegmentDescriptor(int gpu)
pthread_key_t affinity
Definition MultiGPUMapper.h:57
virtual int map_index_to_gpu(size_t index)=0
virtual int3 get_global_offset(int gpu)=0
virtual void initialize()=0
virtual size_t get_authority_size(int gpu)=0
virtual void stage_in(int gpu, void *dptr, void *hptr)=0
virtual ssize_t get_global_input_offset(int gpu)=0
int * device_id
Definition MultiGPUMapper.h:53
virtual size_t get_global_output_offset(int gpu)=0
size_t * device_memory
Definition MultiGPUMapper.h:62
void build_descriptor(int gpu, dim3 ldim, int3 goffset, dim3 active, dim3 loffset)
int pagecount
Definition MultiGPUMapper.h:63
virtual void refresh(int gpu, void *dptr, int timestamp)=0
bool enable_peer_access(int src, int dst)
virtual void schedule_recv(int gpu, void *dptr, int timestamp, int neighbor, cudaStream_t stream)=0
virtual size_t get_local_size(int gpu)=0
bool numa_bind_thread(int)
virtual ~MultiGPUMapper()
virtual dim3 get_global_dim(int gpu)=0
void record_execution_cost(int, int)