From 33ecc46f0c30a200a1f45de78146e9be7cca659b Mon Sep 17 00:00:00 2001 From: Sebastian Date: Mon, 28 Jul 2014 15:33:49 +0000 Subject: [PATCH] all: new polling architecture Replaces states[][] with pollflag+iteration. The host communicates to the target when it is done reading shared memory. No need for delay() on the target anymore. Only core 0 writes the iteration counter, reducing traffic to shared memory. Writing populations is still slow. But no data loss anymore. --- lb/esrc/lb_2d.c | 29 ++++++++++++------------ lb/hsrc/main.c | 60 +++++++++++++++++++------------------------------ lb/shared.h | 13 ++++++----- 3 files changed, 45 insertions(+), 57 deletions(-) diff --git a/lb/esrc/lb_2d.c b/lb/esrc/lb_2d.c index 19bbf11..623148b 100644 --- a/lb/esrc/lb_2d.c +++ b/lb/esrc/lb_2d.c @@ -121,35 +121,34 @@ int main() READ_TIMER(6); #endif + /* copy data to shm if necessary */ if(!(i%100)) { - /* copy grid to shm */ + /* copy iteration, lattice and timers to shm */ + if(core == 0) + shm.iteration = i; + memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t)); - /* copy clock values to shm */ for(int i = 0; i < TIMERS; i++) shm.timers[row][col][i] = clocks[i]; - /* synchronize and flag host */ + /* synchronize */ e_barrier(barriers, tgt_bars); + + /* flag host and wait */ if(core == 0) { - shm.states[row][col]++; - delay(1); + shm.pollflag = POLL_READY; + while(shm.pollflag == POLL_READY); } - } else { - e_barrier(barriers, tgt_bars); - READ_TIMER(7); } + /* synchronize */ + e_barrier(barriers, tgt_bars); READ_TIMER(TIMERS-1); } - /* copy clock values to shm */ - for(int i = 1; i < TIMERS; i++) { - shm.timers[row][col][i] = clocks[i]; - } - - /* flag host and stop core */ - shm.states[row][col] = -1; + /* last iteration done: flag host and stop */ + if(core == 0) shm.pollflag = POLL_DONE; while(1) __asm__ volatile("idle"); } diff --git a/lb/hsrc/main.c b/lb/hsrc/main.c index 7279891..ca244a4 100644 --- a/lb/hsrc/main.c +++ b/lb/hsrc/main.c @@ -20,8 +20,8 @@ void write_animation(void); void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter); /* globals */ -static states_t laststates, states; /* old state value */ -static shm_t shm = {{{ 0 }}}; /* local shm copy */ +static shm_t shm = { 0 }; /* local shm copy */ +static uint32_t pollflag; int main() { @@ -64,50 +64,36 @@ int main() while(1) { while(1) { - /* read states */ - if(e_read(&mem, 0, 0, (off_t)0, &states, - sizeof(states_t)) == E_ERR) - FAIL("Can't poll!\n"); - - /* compare with old values */ - if(memcmp(&laststates, &states, sizeof(states_t))) - break; + /* read polling flag */ + if(e_read(&mem, 0, 0, (off_t)0, &pollflag, + sizeof(uint32_t)) == E_ERR) + FAIL("Can't read pollflag!\n"); + + /* wait for something */ + if(pollflag != POLL_BUSY) break; } + /* finish if done */ + if(pollflag == POLL_DONE) break; + /* read full shared memory */ if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) - FAIL("Can't read shm!\n"); + FAIL("Can't read full shm!\n"); - /* finish if done */ - if(states[0][0] == -1) { - break; + /* reset pollflag */ + pollflag = 0; + if(e_write(&mem, 0, 0, (off_t)0, &pollflag, + sizeof(uint32_t)) == E_ERR) { + FAIL("Can't reset pollflag!\n"); } - /* save (updated) states */ - memcpy(&states, &shm, sizeof(states_t)); - memcpy(&laststates, &shm, sizeof(states_t)); - - /* print states */ -#if 0 - for(int y = 0; y < CORES_Y; y++) { - printf("\t"); - for(int x = 0; x < CORES_X; x++) { - printf("0x%08x ", states[y][x]); - } - printf("\n"); - } -#else - printf("0x%08x\r", states[0][0]); fflush(stdout); -#endif + /* print iteration */ + printf("0x%08x\r", shm.iteration); fflush(stdout); /* write data */ - static uint32_t old0 = -1; - if(states[0][0] != old0) { - write_populations(shm.lattice, states[0][0]); - write_image(shm.lattice, states[0][0]); - write_timers(shm.timers, states[0][0]); - old0 = states[0][0]; - } + //write_populations(shm.lattice, shm.iteration); + write_image(shm.lattice, shm.iteration); + write_timers(shm.timers, shm.iteration); } /* ================================================================ */ diff --git a/lb/shared.h b/lb/shared.h index fea59e8..512ee95 100644 --- a/lb/shared.h +++ b/lb/shared.h @@ -8,7 +8,7 @@ #define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0); #define UNUSED __attribute__((unused)) #ifndef PACKED -#define PACKED __attribute__((packed)) +# define PACKED __attribute__((packed)) #endif /* PACKED */ /* number of cores */ @@ -22,19 +22,22 @@ #define TIMERS 12 +/* pollflag values */ +#define POLL_BUSY 0x00 +#define POLL_READY 0x01 +#define POLL_DONE 0x02 + /* floating point type */ typedef float FLOAT; -/* state type */ -typedef uint32_t states_t[CORES_Y][CORES_X]; - /* node and block type (D2Q9) */ typedef FLOAT d2q9_node_t[9]; typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X]; /* shared memory structure */ typedef struct { - states_t states; + uint32_t pollflag; + uint32_t iteration; uint32_t timers[CORES_Y][CORES_X][TIMERS]; d2q9_block_t lattice[CORES_Y][CORES_X]; } PACKED shm_t; -- 2.30.2