all: begin of time measurement infrastructure
authorSebastian <git@sraa.de>
Fri, 25 Jul 2014 14:27:29 +0000 (14:27 +0000)
committerSebastian <git@sraa.de>
Fri, 25 Jul 2014 14:27:29 +0000 (14:27 +0000)
measure time differences, write them to shm, print them on the host

lb/esrc/lb_2d.c
lb/hsrc/main.c
lb/shared.h

index 094ed79e2778c17f1bf852bf74a3eb9667569fdc..ff086a1cc09ecb5f1aa8981aa1db6b07a2228b6b 100644 (file)
@@ -46,27 +46,40 @@ void init(void)
        e_barrier_init(barriers, tgt_bars);
 }
 
+#define READ_TIMER(X) \
+       do { \
+               clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
+               e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
+               e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
+       } while(0);
+
 int main()
 {
        const FLOAT omega = 1.0;
+       unsigned clocks[TIMERS] = {0};
 
        init();
        d2q9_init(*block);
 
-       for(int i = 0; i < 10000; i++) {
-#if 0
+       for(int i = 0; i < 500; i++) {
+               READ_TIMER(0);
+
+#if 1
                /* collide all nodes */
                for(int y = 0; y < BLOCK_Y; y++)
                        for(int x = 0; x < BLOCK_X; x++)
                                d2q9_collide(*block, x, y, omega);
 
                /* synchronize */
+               READ_TIMER(1);
                e_barrier(barriers, tgt_bars);
+               READ_TIMER(2);
 
                /* stream all nodes */
                for(int y = 0; y < BLOCK_Y; y++)
                        for(int x = 0; x < BLOCK_X; x++)
                                d2q9_stream(*block, x, y);
+               READ_TIMER(3);
 
 #else
                /* collide boundaries: top, bottom */
@@ -74,6 +87,7 @@ int main()
                        d2q9_collide(*block, x, 0,         omega);
                        d2q9_collide(*block, x, BLOCK_Y-1, omega);
                }
+               READ_TIMER(1);
 
                /* collide boundaries: left, right */
                for(int y = 1; y < BLOCK_Y-1; y++) {
@@ -82,36 +96,58 @@ int main()
                }
 
                /* synchronize */
+               READ_TIMER(2);
                e_barrier(barriers, tgt_bars);
+               READ_TIMER(3);
 
                /* collide and stream the bulk */
                d2q9_collide_stream_bulk(*block, omega);
+               READ_TIMER(4);
 
-               /* stream the boundaries: left, right */
+               /* stream the boundaries: top, bottom */
                for(int x = 0; x < BLOCK_X; x++) {
                        d2q9_stream(*block, x, 0        );
                        d2q9_stream(*block, x, BLOCK_Y-1);
                }
+               READ_TIMER(5);
 
                /* stream the boundaries: left, right */
                for(int y = 1; y < BLOCK_Y-1; y++) {
                        d2q9_stream(*block, 0,         y);
                        d2q9_stream(*block, BLOCK_X-1, y);
                }
+               READ_TIMER(6);
 #endif
 
-               /* copy grid to shm and synchronize */
-               memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
-               e_barrier(barriers, tgt_bars);
-
-               /* flag host */
-               if(core == 0 && !(i%100)) {
-                       shm.states[row][col]++;
-                       delay(1);
+               if(!(i%100)) {
+                       /* copy grid to shm */
+                       memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
+
+                       /* copy clock values to shm */
+                       for(int i = 0; i < TIMERS; i++)
+                               shm.timers[row][col][i] = clocks[i];
+
+                       /* synchronize and flag host */
+                       e_barrier(barriers, tgt_bars);
+                       if(core == 0) {
+                               shm.states[row][col]++;
+                               delay(1);
+                       }
+               } else {
+                       e_barrier(barriers, tgt_bars);
+                       READ_TIMER(7);
                }
+
+               READ_TIMER(TIMERS-1);
+       }
+
+       /* copy clock values to shm */
+       for(int i = 1; i < TIMERS; i++) {
+               shm.timers[row][col][i] = clocks[i];
        }
 
+       /* flag host and stop core */
        shm.states[row][col] = -1;
-       while(1);
+       while(1) __asm__ volatile("idle");
 }
 
index cc89c681667116a134e000efeb9388d43b187ae9..3c33f0c13885e3b09292568cd8b9550d36024d86 100644 (file)
@@ -102,7 +102,21 @@ int main()
                        old0 = states[0][0];
                }
 
-               if(states[0][0] == -1) break;
+               /* print timers */
+               printf("Timers:\n");
+               for(int y = 0; y < CORES_Y; y++) {
+                       for(int x = 0; x < CORES_X; x++) {
+                               printf("[%d,%d] ", x, y);
+                               for(int i = 0; i < TIMERS; i++) {
+                                       printf("%8d  ", shm.timers[y][x][i]);
+                               }
+                               printf("\n");
+                       }
+               }
+
+               if(states[0][0] == -1) {
+                       break;
+               }
        }
        /* ================================================================ */
 
index e752a359350f70da7a60bdbb795c3beff0d4c8c1..fea59e87ab1cdfc6218d5851a15cc52a60f64f76 100644 (file)
@@ -20,6 +20,8 @@
 #define BLOCK_X 26
 #define BLOCK_Y 26
 
+#define TIMERS 12
+
 /* floating point type */
 typedef float FLOAT;
 
@@ -33,6 +35,7 @@ typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X];
 /* shared memory structure */
 typedef struct {
        states_t     states;
+       uint32_t     timers[CORES_Y][CORES_X][TIMERS];
        d2q9_block_t lattice[CORES_Y][CORES_X];
 } PACKED shm_t;