From 728ae71ff9a6c7429d243dbab38a8d188fdec607 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Fri, 25 Jul 2014 14:27:29 +0000 Subject: [PATCH] all: begin of time measurement infrastructure measure time differences, write them to shm, print them on the host --- lb/esrc/lb_2d.c | 60 +++++++++++++++++++++++++++++++++++++++---------- lb/hsrc/main.c | 16 ++++++++++++- lb/shared.h | 3 +++ 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/lb/esrc/lb_2d.c b/lb/esrc/lb_2d.c index 094ed79..ff086a1 100644 --- a/lb/esrc/lb_2d.c +++ b/lb/esrc/lb_2d.c @@ -46,27 +46,40 @@ void init(void) e_barrier_init(barriers, tgt_bars); } +#define READ_TIMER(X) \ + do { \ + clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \ + e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \ + e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \ + } while(0); + int main() { const FLOAT omega = 1.0; + unsigned clocks[TIMERS] = {0}; init(); d2q9_init(*block); - for(int i = 0; i < 10000; i++) { -#if 0 + for(int i = 0; i < 500; i++) { + READ_TIMER(0); + +#if 1 /* collide all nodes */ for(int y = 0; y < BLOCK_Y; y++) for(int x = 0; x < BLOCK_X; x++) d2q9_collide(*block, x, y, omega); /* synchronize */ + READ_TIMER(1); e_barrier(barriers, tgt_bars); + READ_TIMER(2); /* stream all nodes */ for(int y = 0; y < BLOCK_Y; y++) for(int x = 0; x < BLOCK_X; x++) d2q9_stream(*block, x, y); + READ_TIMER(3); #else /* collide boundaries: top, bottom */ @@ -74,6 +87,7 @@ int main() d2q9_collide(*block, x, 0, omega); d2q9_collide(*block, x, BLOCK_Y-1, omega); } + READ_TIMER(1); /* collide boundaries: left, right */ for(int y = 1; y < BLOCK_Y-1; y++) { @@ -82,36 +96,58 @@ int main() } /* synchronize */ + READ_TIMER(2); e_barrier(barriers, tgt_bars); + READ_TIMER(3); /* collide and stream the bulk */ d2q9_collide_stream_bulk(*block, omega); + READ_TIMER(4); - /* stream the boundaries: left, right */ + /* stream the boundaries: top, bottom */ for(int x = 0; x < BLOCK_X; x++) { d2q9_stream(*block, x, 0 ); d2q9_stream(*block, x, BLOCK_Y-1); } + READ_TIMER(5); /* stream the boundaries: left, right */ for(int y = 1; y < BLOCK_Y-1; y++) { d2q9_stream(*block, 0, y); d2q9_stream(*block, BLOCK_X-1, y); } + READ_TIMER(6); #endif - /* copy grid to shm and synchronize */ - memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t)); - e_barrier(barriers, tgt_bars); - - /* flag host */ - if(core == 0 && !(i%100)) { - shm.states[row][col]++; - delay(1); + if(!(i%100)) { + /* copy grid to shm */ + memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t)); + + /* copy clock values to shm */ + for(int i = 0; i < TIMERS; i++) + shm.timers[row][col][i] = clocks[i]; + + /* synchronize and flag host */ + e_barrier(barriers, tgt_bars); + if(core == 0) { + shm.states[row][col]++; + delay(1); + } + } else { + e_barrier(barriers, tgt_bars); + READ_TIMER(7); } + + READ_TIMER(TIMERS-1); + } + + /* copy clock values to shm */ + for(int i = 1; i < TIMERS; i++) { + shm.timers[row][col][i] = clocks[i]; } + /* flag host and stop core */ shm.states[row][col] = -1; - while(1); + while(1) __asm__ volatile("idle"); } diff --git a/lb/hsrc/main.c b/lb/hsrc/main.c index cc89c68..3c33f0c 100644 --- a/lb/hsrc/main.c +++ b/lb/hsrc/main.c @@ -102,7 +102,21 @@ int main() old0 = states[0][0]; } - if(states[0][0] == -1) break; + /* print timers */ + printf("Timers:\n"); + for(int y = 0; y < CORES_Y; y++) { + for(int x = 0; x < CORES_X; x++) { + printf("[%d,%d] ", x, y); + for(int i = 0; i < TIMERS; i++) { + printf("%8d ", shm.timers[y][x][i]); + } + printf("\n"); + } + } + + if(states[0][0] == -1) { + break; + } } /* ================================================================ */ diff --git a/lb/shared.h b/lb/shared.h index e752a35..fea59e8 100644 --- a/lb/shared.h +++ b/lb/shared.h @@ -20,6 +20,8 @@ #define BLOCK_X 26 #define BLOCK_Y 26 +#define TIMERS 12 + /* floating point type */ typedef float FLOAT; @@ -33,6 +35,7 @@ typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X]; /* shared memory structure */ typedef struct { states_t states; + uint32_t timers[CORES_Y][CORES_X][TIMERS]; d2q9_block_t lattice[CORES_Y][CORES_X]; } PACKED shm_t; -- 2.30.2