all: new polling architecture
authorSebastian <git@sraa.de>
Mon, 28 Jul 2014 15:33:49 +0000 (15:33 +0000)
committerSebastian <git@sraa.de>
Mon, 28 Jul 2014 15:33:49 +0000 (15:33 +0000)
Replaces states[][] with pollflag+iteration.
The host communicates to the target when it is done reading shared
memory. No need for delay() on the target anymore. Only core 0
writes the iteration counter, reducing traffic to shared memory.
Writing populations is still slow. But no data loss anymore.

lb/esrc/lb_2d.c
lb/hsrc/main.c
lb/shared.h

index 19bbf11426817e71832907e78f4ccbec014ffc43..623148b1e236155d10490dbc88d1fc6e093c0737 100644 (file)
@@ -121,35 +121,34 @@ int main()
                READ_TIMER(6);
 #endif
 
+               /* copy data to shm if necessary */
                if(!(i%100)) {
-                       /* copy grid to shm */
+                       /* copy iteration, lattice and timers to shm */
+                       if(core == 0)
+                               shm.iteration = i;
+
                        memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
 
-                       /* copy clock values to shm */
                        for(int i = 0; i < TIMERS; i++)
                                shm.timers[row][col][i] = clocks[i];
 
-                       /* synchronize and flag host */
+                       /* synchronize */
                        e_barrier(barriers, tgt_bars);
+
+                       /* flag host and wait */
                        if(core == 0) {
-                               shm.states[row][col]++;
-                               delay(1);
+                               shm.pollflag = POLL_READY;
+                               while(shm.pollflag == POLL_READY);
                        }
-               } else {
-                       e_barrier(barriers, tgt_bars);
-                       READ_TIMER(7);
                }
 
+               /* synchronize */
+               e_barrier(barriers, tgt_bars);
                READ_TIMER(TIMERS-1);
        }
 
-       /* copy clock values to shm */
-       for(int i = 1; i < TIMERS; i++) {
-               shm.timers[row][col][i] = clocks[i];
-       }
-
-       /* flag host and stop core */
-       shm.states[row][col] = -1;
+       /* last iteration done: flag host and stop */
+       if(core == 0) shm.pollflag = POLL_DONE;
        while(1) __asm__ volatile("idle");
 }
 
index 72798916360de1712a2af3092111a46a33260384..ca244a4a81095ca83853eb4193543c3ee339be66 100644 (file)
@@ -20,8 +20,8 @@ void write_animation(void);
 void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
 
 /* globals */
-static states_t laststates, states;    /* old state value */
-static shm_t    shm = {{{ 0 }}};       /* local shm copy */
+static shm_t    shm = { 0 };   /* local shm copy */
+static uint32_t pollflag;
 
 int main()
 {
@@ -64,50 +64,36 @@ int main()
        while(1) {
 
                while(1) {
-                       /* read states */
-                       if(e_read(&mem, 0, 0, (off_t)0, &states,
-                               sizeof(states_t)) == E_ERR)
-                                       FAIL("Can't poll!\n");
-
-                       /* compare with old values */
-                       if(memcmp(&laststates, &states, sizeof(states_t)))
-                               break;
+                       /* read polling flag */
+                       if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
+                               sizeof(uint32_t)) == E_ERR)
+                                       FAIL("Can't read pollflag!\n");
+
+                       /* wait for something */
+                       if(pollflag != POLL_BUSY) break;
                }
 
+               /* finish if done */
+               if(pollflag == POLL_DONE) break;
+
                /* read full shared memory */
                if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
-                       FAIL("Can't read shm!\n");
+                       FAIL("Can't read full shm!\n");
 
-               /* finish if done */
-               if(states[0][0] == -1) {
-                       break;
+               /* reset pollflag */
+               pollflag = 0;
+               if(e_write(&mem, 0, 0, (off_t)0, &pollflag,
+                       sizeof(uint32_t)) == E_ERR) {
+                               FAIL("Can't reset pollflag!\n");
                }
 
-               /* save (updated) states */
-               memcpy(&states,     &shm, sizeof(states_t));
-               memcpy(&laststates, &shm, sizeof(states_t));
-
-               /* print states */
-#if 0
-               for(int y = 0; y < CORES_Y; y++) {
-                       printf("\t");
-                       for(int x = 0; x < CORES_X; x++) {
-                               printf("0x%08x ", states[y][x]);
-                       }
-                       printf("\n");
-               }
-#else
-               printf("0x%08x\r", states[0][0]); fflush(stdout);
-#endif
+               /* print iteration */
+               printf("0x%08x\r", shm.iteration); fflush(stdout);
 
                /* write data */
-               static uint32_t old0 = -1;
-               if(states[0][0] != old0) {
-                       write_populations(shm.lattice, states[0][0]);
-                       write_image(shm.lattice, states[0][0]);
-                       write_timers(shm.timers, states[0][0]);
-                       old0 = states[0][0];
-               }
+               //write_populations(shm.lattice, shm.iteration);
+               write_image(shm.lattice, shm.iteration);
+               write_timers(shm.timers, shm.iteration);
        }
        /* ================================================================ */
 
index fea59e87ab1cdfc6218d5851a15cc52a60f64f76..512ee958fb6a080269637e5ee6f76e9c5e77907b 100644 (file)
@@ -8,7 +8,7 @@
 #define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
 #define UNUSED __attribute__((unused))
 #ifndef PACKED
-#define PACKED __attribute__((packed))
+#      define PACKED __attribute__((packed))
 #endif /* PACKED */
 
 /* number of cores */
 
 #define TIMERS 12
 
+/* pollflag values */
+#define POLL_BUSY  0x00
+#define POLL_READY 0x01
+#define POLL_DONE  0x02
+
 /* floating point type */
 typedef float FLOAT;
 
-/* state type */
-typedef uint32_t states_t[CORES_Y][CORES_X];
-
 /* node and block type (D2Q9) */
 typedef FLOAT       d2q9_node_t[9];
 typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X];
 
 /* shared memory structure */
 typedef struct {
-       states_t     states;
+       uint32_t     pollflag;
+       uint32_t     iteration;
        uint32_t     timers[CORES_Y][CORES_X][TIMERS];
        d2q9_block_t lattice[CORES_Y][CORES_X];
 } PACKED shm_t;