Lattice Microbes 2.5
This is for whole cell modeling
Loading...
Searching...
No Matches
Profile.h
Go to the documentation of this file.
1/*
2 * University of Illinois Open Source License
3 * Copyright 2010-2018 Luthey-Schulten Group,
4 * All rights reserved.
5 *
6 * Developed by: Luthey-Schulten Group
7 * University of Illinois at Urbana-Champaign
8 * http://www.scs.uiuc.edu/~schulten
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy of
11 * this software and associated documentation files (the Software), to deal with
12 * the Software without restriction, including without limitation the rights to
13 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
14 * of the Software, and to permit persons to whom the Software is furnished to
15 * do so, subject to the following conditions:
16 *
17 * - Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimers.
19 *
20 * - Redistributions in binary form must reproduce the above copyright notice,
21 * this list of conditions and the following disclaimers in the documentation
22 * and/or other materials provided with the distribution.
23 *
24 * - Neither the names of the Luthey-Schulten Group, University of Illinois at
25 * Urbana-Champaign, nor the names of its contributors may be used to endorse or
26 * promote products derived from this Software without specific prior written
27 * permission.
28 *
29 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
32 * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
33 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
34 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
35 * OTHER DEALINGS WITH THE SOFTWARE.
36 *
37 * Author(s): Elijah Roberts, Mike Hallock
38 */
39#ifndef LPTF_PROF_H_
40#define LPTF_PROF_H_
41
42
43
44
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48
49#ifndef PROF_ENABLE
50
51#include "ProfileCodes.h"
52// No-op definitions.
53#define PROF_ALLOC
54#define PROF_INIT
55#define PROF_FORK_INIT
56#define PROF_SET_THREAD(thread)
57#define PROF_TEVENT(thread,event)
58#define PROF_EVENT(event)
59#define PROF_TBEGIN(thread,event)
60#define PROF_BEGIN(event)
61#define PROF_TEND(thread,event)
62#define PROF_END(event)
63#define PROF_WRITE
64#define PROF_FORK_WRITE
65
66#ifdef CUDA_VERSION
67#define PROF_CUDA_TSTART(thread,stream)
68#define PROF_CUDA_START(stream)
69#define PROF_CUDA_TEVENT(thread,event,stream)
70#define PROF_CUDA_EVENT(event,stream)
71#define PROF_CUDA_TBEGIN(thread,event,stream)
72#define PROF_CUDA_BEGIN(event,stream)
73#define PROF_CUDA_TEND(thread,event,stream)
74#define PROF_CUDA_END(event,stream)
75#define PROF_CUDA_TFINISH(thread,stream)
76#define PROF_CUDA_FINISH(stream)
77#endif
78
79#elif defined PROF_USE_NVTX
80 #include "nvToolsExt.h"
81 #include "Profile_NVTX.h"
82
83 #define PROF_ALLOC
84 #define PROF_INIT
85 #define PROF_FORK_INIT
86 #define PROF_SET_THREAD(thread)
87 #define PROF_TEVENT(thread,event)
88 #define PROF_EVENT(event)
89 #define PROF_TBEGIN(thread,event)
90 #define PROF_BEGIN(event) {nvtxRangePushA(profile_description[event]);}
91 #define PROF_TEND(thread,event)
92 #define PROF_END(event) {nvtxRangePop();}
93 #define PROF_WRITE
94 #define PROF_FORK_WRITE
95
96 #define PROF_CUDA_TSTART(thread,stream)
97 #define PROF_CUDA_START(stream)
98 #define PROF_CUDA_TEVENT(thread,event,stream)
99 #define PROF_CUDA_EVENT(event,stream)
100 #define PROF_CUDA_TBEGIN(thread,event,stream)
101 #define PROF_CUDA_BEGIN(event,stream)
102 #define PROF_CUDA_TEND(thread,event,stream)
103 #define PROF_CUDA_END(event,stream)
104 #define PROF_CUDA_TFINISH(thread,stream)
105 #define PROF_CUDA_FINISH(stream)
106
107#else // LPTF
108
109#include "ProfileCodes.h"
110
111#ifndef PROF_MAX_THREADS
112#error Must specify PROF_MAX_THREADS.
113#endif
114
115#ifndef PROF_MAX_EVENTS
116#error Must specify PROF_MAX_EVENTS.
117#endif
118
119#ifndef PROF_OUT_FILE
120#error Must specify PROF_OUT_FILE.
121#endif
122
123#define PROF_STRINGIFY(a) #a
124#define PROF_MAKE_STR(a) PROF_STRINGIFY(a)
125
126
127#if !defined(MACOSX) && !defined(LINUX)
128#error Unknown profile architecture.
129#endif
130
131
132// Include the headers.
133#include <stdio.h>
134#if defined(MACOSX)
135#include <mach/mach_time.h>
136#include <machine/endian.h>
137#include <pthread.h>
138#elif defined(LINUX)
139#include <byteswap.h>
140#include <arpa/inet.h>
141#include <time.h>
142#endif
143
144
145// Define some types.
146#if defined(MACOSX)
147#define _prof_time_to_net(x) __DARWIN_OSSwapInt64(x)
148typedef uint64_t _prof_time_t;
149extern double _prof_time_mult;
150extern pthread_key_t _prof_thread_key;
151#elif defined(LINUX)
152#if __BYTE_ORDER == __LITTLE_ENDIAN
153#define _prof_time_to_net(x) bswap_64(x)
154#elif __BYTE_ORDER == __BIG_ENDIAN
155#define _prof_time_to_net(x) x
156#endif
157typedef unsigned long long _prof_time_t;
158extern __thread unsigned int _prof_thread_id;
159extern __thread struct timespec _prof_timespec;
160#endif
161#define _prof_event_to_net(x) htonl(x)
162typedef unsigned int _prof_event_t;
163extern _prof_time_t _prof_start_time;
164extern unsigned int _prof_next_event[PROF_MAX_THREADS];
165extern _prof_event_t _prof_event_types[PROF_MAX_THREADS][PROF_MAX_EVENTS];
166extern _prof_time_t _prof_event_times[PROF_MAX_THREADS][PROF_MAX_EVENTS];
167extern unsigned int _prof_cuda_next_event[PROF_MAX_THREADS];
168extern _prof_time_t _prof_cuda_start_time[PROF_MAX_THREADS];\
169
170struct _eventList
171{
172 _prof_event_t evt;
173 void *begin;
174 void *end;
175 struct _eventList *next;
176};
177typedef struct _eventList _eventList_s;
178extern _eventList* _prof_cuda_event_list[PROF_MAX_THREADS];
179extern void* _prof_cuda_ref_event[PROF_MAX_THREADS];
180
181
182
183// Allocate storage.
184#define PROF_ALLOC \
185 double _prof_time_mult;\
186 _prof_time_t _prof_start_time;\
187 unsigned int _prof_next_event[PROF_MAX_THREADS];\
188 _prof_event_t _prof_event_types[PROF_MAX_THREADS][PROF_MAX_EVENTS];\
189 _prof_time_t _prof_event_times[PROF_MAX_THREADS][PROF_MAX_EVENTS];\
190 unsigned int _prof_cuda_next_event[PROF_MAX_THREADS];\
191 _prof_time_t _prof_cuda_start_time[PROF_MAX_THREADS];\
192 _eventList_s* _prof_cuda_event_list[PROF_MAX_THREADS];\
193 void * _prof_cuda_ref_event[PROF_MAX_THREADS]; \
194 PROF_ALLOC_ARCH
195#if defined(MACOSX)
196#define PROF_ALLOC_ARCH \
197 pthread_key_t _prof_thread_key;
198#elif defined(LINUX)
199#define PROF_ALLOC_ARCH \
200 __thread unsigned int _prof_thread_id;\
201 __thread struct timespec _prof_timespec;
202#endif
203
204
205// Perform any initialization.
206#define PROF_INIT \
207 {\
208 PROF_INIT_ARCH \
209 memset(_prof_next_event,0,sizeof(unsigned int)*PROF_MAX_THREADS);\
210 memset(_prof_event_types,0,sizeof(_prof_event_t)*PROF_MAX_THREADS*PROF_MAX_EVENTS);\
211 memset(_prof_event_times,0,sizeof(_prof_time_t)*PROF_MAX_THREADS*PROF_MAX_EVENTS);\
212 memset(_prof_cuda_next_event,0,sizeof(unsigned int)*PROF_MAX_THREADS);\
213 memset(_prof_cuda_start_time,0,sizeof(_prof_time_t)*PROF_MAX_THREADS);\
214 memset(_prof_cuda_event_list,0,sizeof(_eventList_s*)*PROF_MAX_THREADS);\
215 memset(_prof_cuda_ref_event,0,sizeof(void *)*PROF_MAX_THREADS); \
216 }
217#if defined(MACOSX)
218#define PROF_INIT_ARCH \
219 mach_timebase_info_data_t _prof_info;\
220 mach_timebase_info(&_prof_info);\
221 _prof_time_mult=((double)_prof_info.numer)/((double)_prof_info.denom);\
222 _prof_start_time=mach_absolute_time();\
223 if (pthread_key_create(&_prof_thread_key, NULL)) _prof_thread_key=0;
224#elif defined(LINUX)
225#define PROF_INIT_ARCH \
226 _prof_start_time=PROF_GET_TIME;
227#endif
228
229
230// Perform any initialization for a new FORK.
231#define PROF_FORK_INIT \
232 {\
233 memset(_prof_next_event,0,sizeof(unsigned int)*PROF_MAX_THREADS);\
234 memset(_prof_event_types,0,sizeof(_prof_event_t)*PROF_MAX_THREADS*PROF_MAX_EVENTS);\
235 memset(_prof_event_times,0,sizeof(_prof_time_t)*PROF_MAX_THREADS*PROF_MAX_EVENTS);\
236 }
237
238
239// Get the current time.
240#if defined(MACOSX)
241#define PROF_GET_TIME ((_prof_time_t)(((double)(mach_absolute_time()-_prof_start_time))*_prof_time_mult))
242#elif defined(LINUX)
243#define PROF_GET_TIME ((clock_gettime(CLOCK_MONOTONIC, &_prof_timespec)==0)?((_prof_time_t)((((_prof_time_t)_prof_timespec.tv_sec)*1000000000ULL)+_prof_timespec.tv_nsec-_prof_start_time)):((_prof_time_t)0ULL))
244#endif
245
246
247// Set the index of this thread.
248#if defined(MACOSX)
249#define PROF_SET_THREAD(thread) \
250 {\
251 if (_prof_thread_key!=0){\
252 void* _prof_thread_id=NULL;\
253 if ((_prof_thread_id=pthread_getspecific(_prof_thread_key)) == NULL){\
254 if((_prof_thread_id=malloc(sizeof(unsigned int)))!=NULL){\
255 pthread_setspecific(_prof_thread_key, _prof_thread_id);\
256 }\
257 }\
258 if(_prof_thread_id!=NULL){\
259 *((unsigned int *)_prof_thread_id)=thread;\
260 }\
261 }}
262#elif defined(LINUX)
263#define PROF_SET_THREAD(thread) _prof_thread_id=thread;
264#endif
265
266
267// Set the index of this thread.
268#if defined(MACOSX)
269#define PROF_GET_THREAD ((_prof_thread_key != 0 && pthread_getspecific(_prof_thread_key) != NULL)?(*((unsigned int *)pthread_getspecific(_prof_thread_key))):(0))
270#elif defined(LINUX)
271#define PROF_GET_THREAD (_prof_thread_id)
272#endif
273
274
275// Record an event.
276#define PROF_TEVENT_AT_TIME(thread,event,event_time) \
277 {if (thread < PROF_MAX_THREADS && _prof_next_event[thread] < PROF_MAX_EVENTS){\
278 _prof_event_types[thread][_prof_next_event[thread]]=_prof_event_to_net(event);\
279 _prof_event_times[thread][_prof_next_event[thread]]=_prof_time_to_net(event_time);\
280 _prof_next_event[thread]++;\
281 }}
282#define PROF_TEVENT(thread, event) PROF_TEVENT_AT_TIME(thread,event,PROF_GET_TIME)
283#define PROF_EVENT(event) PROF_TEVENT_AT_TIME(PROF_GET_THREAD,event,PROF_GET_TIME)
284
285
286// Record an interval event start.
287#define PROF_TBEGIN(thread,event) PROF_TEVENT(thread,event+10000)
288#define PROF_BEGIN(event) PROF_EVENT(event+10000)
289
290
291// Record an interval event end.
292#define PROF_TEND(thread,event) PROF_TEVENT(thread,event+20000)
293#define PROF_END(event) PROF_EVENT(event+20000)
294
295#ifdef CUDA_VERSION
296
297#if !defined(PROF_CUDA_ENABLE) || PROF_CUDA_ENABLE == 0
298
299#define PROF_CUDA_TSTART(thread,stream)
300#define PROF_CUDA_START(stream)
301#define PROF_CUDA_TEVENT(thread,event,stream)
302#define PROF_CUDA_EVENT(event,stream)
303#define PROF_CUDA_TBEGIN(thread,event,stream)
304#define PROF_CUDA_BEGIN(event,stream)
305#define PROF_CUDA_TEND(thread,event,stream)
306#define PROF_CUDA_END(event,stream)
307#define PROF_CUDA_TFINISH(thread,stream)
308#define PROF_CUDA_FINISH(stream)
309
310#else
311
312#define PROF_CUDA_TINIT(thread, event) \
313 {if(thread < PROF_MAX_THREADS) { \
314 _eventList_s *e=new _eventList_s; e->evt=event; cudaEventCreate((cudaEvent_t*)&e->begin); cudaEventCreate((cudaEvent_t*)&e->end); \
315 e->next=_prof_cuda_event_list[thread]; _prof_cuda_event_list[thread]=e; \
316 }}
317#define PROF_CUDA_INIT(event) PROF_CUDA_TINIT(PROF_GET_THREAD,event)
318
319#define PROF_CUDA_TSTART(thread,stream) \
320 {if (thread < PROF_MAX_THREADS) \
321 { \
322 if(!_prof_cuda_ref_event[thread]) \
323 cudaEventCreate((cudaEvent_t*)&_prof_cuda_ref_event[thread]); \
324 if (cudaEventRecord((cudaEvent_t)_prof_cuda_ref_event[thread], stream)==cudaSuccess){\
325 _prof_cuda_start_time[thread]=PROF_GET_TIME;\
326 }\
327 }\
328 }
329#define PROF_CUDA_START(stream) PROF_CUDA_TSTART(PROF_GET_THREAD,stream)
330
331#define PROF_CUDA_TBEGIN(thread, event, stream) \
332 { if(thread < PROF_MAX_THREADS) { \
333 _eventList_s *e=_prof_cuda_event_list[thread]; \
334 while(e!=NULL) \
335 { \
336 if(e->evt==event){cudaEventRecord((cudaEvent_t)e->begin, stream); break;} \
337 e=e->next; \
338 } \
339 if(e==NULL) \
340 { \
341 PROF_CUDA_TINIT(thread, event); \
342 e=_prof_cuda_event_list[thread]; \
343 cudaEventRecord((cudaEvent_t)e->begin, stream); \
344 } \
345 } \
346 }
347
348#define PROF_CUDA_BEGIN(event,stream) PROF_CUDA_TBEGIN(PROF_GET_THREAD,event,stream)
349
350#define PROF_CUDA_TEND(thread, event, stream) \
351 { \
352 if(thread < PROF_MAX_THREADS) \
353 { \
354 _eventList_s *e=_prof_cuda_event_list[thread]; \
355 while(e!=NULL) \
356 { \
357 if(e->evt==event){cudaEventRecord((cudaEvent_t)e->end, stream); break;} \
358 e=e->next; \
359 } \
360 } \
361 }
362#define PROF_CUDA_END(event,stream) PROF_CUDA_TEND(PROF_GET_THREAD,event,stream)
363
364#define PROF_CUDA_TFINISH(thread,stream) \
365 {if (thread < PROF_MAX_THREADS){\
366 cudaEvent_t _prof_cuda_start_handle=(cudaEvent_t)_prof_cuda_ref_event[thread]; \
367 _eventList_s *e=_prof_cuda_event_list[thread]; \
368 while(e!=NULL) \
369 { \
370 float _prof_cuda_elaped_ms=0.0f;\
371 if (cudaEventElapsedTime(&_prof_cuda_elaped_ms,(cudaEvent_t)_prof_cuda_start_handle,(cudaEvent_t)e->begin)==cudaSuccess){\
372 PROF_TEVENT_AT_TIME(thread, (e->evt+10000), (_prof_cuda_start_time[thread]+(_prof_time_t)(1000000.0*((double)_prof_cuda_elaped_ms)))); \
373 } \
374 if (cudaEventElapsedTime(&_prof_cuda_elaped_ms,(cudaEvent_t)_prof_cuda_start_handle,(cudaEvent_t)e->end)==cudaSuccess){\
375 PROF_TEVENT_AT_TIME(thread, (e->evt+20000), (_prof_cuda_start_time[thread]+(_prof_time_t)(1000000.0*((double)_prof_cuda_elaped_ms)))); \
376 } \
377 e=e->next; \
378 } \
379 }}
380
381#define PROF_CUDA_FINISH(stream) PROF_CUDA_TFINISH(PROF_GET_THREAD,stream)
382
383#endif //PROF_CUDA_ENABLE
384#endif //CUDA_VERSION
385
386// Write out the profile data.
387#define PROF_WRITE \
388 {\
389 int _prof_buf_max=strlen(PROF_MAKE_STR(PROF_OUT_FILE))+256;\
390 char* _prof_buf = (char*)malloc(_prof_buf_max+1);\
391 if (_prof_buf!=NULL){\
392 snprintf(_prof_buf,_prof_buf_max,"%s.%d",PROF_MAKE_STR(PROF_OUT_FILE),getpid());\
393 FILE * _prof_fp=fopen(_prof_buf,"w");\
394 if (_prof_fp!=NULL){\
395 char _prof_magic[]="SBPT";\
396 fwrite(_prof_magic, 1, strlen(_prof_magic), _prof_fp);\
397 unsigned int _prof_tmp=htonl(1);\
398 fwrite(&_prof_tmp, sizeof(_prof_tmp), 1, _prof_fp);\
399 _prof_tmp=htonl(PROF_MAX_THREADS);\
400 fwrite(&_prof_tmp, sizeof(_prof_tmp), 1, _prof_fp);\
401 _prof_tmp=htonl(PROF_MAX_EVENTS);\
402 fwrite(&_prof_tmp, sizeof(_prof_tmp), 1, _prof_fp);\
403 fwrite(_prof_event_types, sizeof(_prof_event_t), PROF_MAX_THREADS*PROF_MAX_EVENTS, _prof_fp);\
404 fwrite(_prof_event_times, sizeof(_prof_time_t), PROF_MAX_THREADS*PROF_MAX_EVENTS, _prof_fp);\
405 fclose(_prof_fp);\
406 }\
407 free(_prof_buf);\
408 }}
409
410#define PROF_FORK_WRITE PROF_WRITE
411
412#endif
413#endif