e_barrier_init(barriers, tgt_bars);
}
+#define READ_TIMER(X) \
+ do { \
+ clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
+ e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
+ e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
+ } while(0);
+
int main()
{
const FLOAT omega = 1.0;
+ unsigned clocks[TIMERS] = {0};
init();
d2q9_init(*block);
- for(int i = 0; i < 10000; i++) {
-#if 0
+ for(int i = 0; i < 500; i++) {
+ READ_TIMER(0);
+
+#if 1
/* collide all nodes */
for(int y = 0; y < BLOCK_Y; y++)
for(int x = 0; x < BLOCK_X; x++)
d2q9_collide(*block, x, y, omega);
/* synchronize */
+ READ_TIMER(1);
e_barrier(barriers, tgt_bars);
+ READ_TIMER(2);
/* stream all nodes */
for(int y = 0; y < BLOCK_Y; y++)
for(int x = 0; x < BLOCK_X; x++)
d2q9_stream(*block, x, y);
+ READ_TIMER(3);
#else
/* collide boundaries: top, bottom */
d2q9_collide(*block, x, 0, omega);
d2q9_collide(*block, x, BLOCK_Y-1, omega);
}
+ READ_TIMER(1);
/* collide boundaries: left, right */
for(int y = 1; y < BLOCK_Y-1; y++) {
}
/* synchronize */
+ READ_TIMER(2);
e_barrier(barriers, tgt_bars);
+ READ_TIMER(3);
/* collide and stream the bulk */
d2q9_collide_stream_bulk(*block, omega);
+ READ_TIMER(4);
- /* stream the boundaries: left, right */
+ /* stream the boundaries: top, bottom */
for(int x = 0; x < BLOCK_X; x++) {
d2q9_stream(*block, x, 0 );
d2q9_stream(*block, x, BLOCK_Y-1);
}
+ READ_TIMER(5);
/* stream the boundaries: left, right */
for(int y = 1; y < BLOCK_Y-1; y++) {
d2q9_stream(*block, 0, y);
d2q9_stream(*block, BLOCK_X-1, y);
}
+ READ_TIMER(6);
#endif
- /* copy grid to shm and synchronize */
- memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
- e_barrier(barriers, tgt_bars);
-
- /* flag host */
- if(core == 0 && !(i%100)) {
- shm.states[row][col]++;
- delay(1);
+ if(!(i%100)) {
+ /* copy grid to shm */
+ memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
+
+ /* copy clock values to shm */
+ for(int i = 0; i < TIMERS; i++)
+ shm.timers[row][col][i] = clocks[i];
+
+ /* synchronize and flag host */
+ e_barrier(barriers, tgt_bars);
+ if(core == 0) {
+ shm.states[row][col]++;
+ delay(1);
+ }
+ } else {
+ e_barrier(barriers, tgt_bars);
+ READ_TIMER(7);
}
+
+ READ_TIMER(TIMERS-1);
+ }
+
+ /* copy clock values to shm */
+ for(int i = 1; i < TIMERS; i++) {
+ shm.timers[row][col][i] = clocks[i];
}
+ /* flag host and stop core */
shm.states[row][col] = -1;
- while(1);
+ while(1) __asm__ volatile("idle");
}