d2q9: consistent naming for 2D and 3D case
authorSebastian <git@sraa.de>
Mon, 15 Sep 2014 15:13:34 +0000 (15:13 +0000)
committerSebastian <git@sraa.de>
Mon, 15 Sep 2014 15:13:34 +0000 (15:13 +0000)
d2q9/esrc/d2q9.c
d2q9/esrc/d2q9.h
d2q9/esrc/main.c
d2q9/hsrc/data.c
d2q9/hsrc/main.c
d2q9/shared.h

index bb7a940419fbc293623bccabff138fc4489ae64c..16e01dc97bcc1188c6313c5f9ef756662b223521 100644 (file)
@@ -6,6 +6,8 @@
 /* core indices */
 extern unsigned int row, col, core;
 
+/* ================================================================== */
+
 /* velocities */
 static const int d2q9_v[9][2] = { { 0, 0},
        {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
@@ -18,24 +20,26 @@ static const FLOAT d2q9_w[9] = { 4./9.,
        1./36., 1./9., 1./36., 1./9.,
 };
 
-void d2q9_init(d2q9_block_t block)
+/* ================================================================== */
+
+void init(block_t f)
 {
        /* all with rho = 0.1 */
-       for(int y = 0; y < BLOCKS_Y; y++)
-               for(int x = 0; x < BLOCKS_X; x++)
+       for(int y = 0; y < NODES_Y; y++)
+               for(int x = 0; x < NODES_X; x++)
                        for(int q = 0; q < 9; q++)
-                               block[y][x][q] = 0.1 * d2q9_w[q];
+                               f[y][x][q] = 0.1 * d2q9_w[q];
 
        if(core == 0) {
                /* except here with 0.2 */
                for(int q = 0; q < 9; q++)
-                       block[0][0][q] = 0.2 * d2q9_w[q];
+                       f[0][0][q] = 0.2 * d2q9_w[q];
        }
 
        return;
 }
 
-void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
+void collide(block_t f, int x, int y, FLOAT omega)
 {
        if(row == 0 && y == 0) {
                /* Zou/He boundary at top, with velocity */
@@ -53,7 +57,7 @@ void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
                f[y][x][1] = f[y][x][5] + tmp1 + tmp2;
                f[y][x][8] = f[y][x][4] + 2 * rho * UY / 3;
 
-       } else if(row == CORES_Y-1 && y == BLOCKS_Y-1) {
+       } else if(row == CORES_Y-1 && y == NODES_Y-1) {
                /* Zou/He boundary at bottom, no velocity */
                FLOAT tmp = ( f[y][x][6] - f[y][x][2] ) / 2;
                f[y][x][3] = f[y][x][7] + tmp;
@@ -67,7 +71,7 @@ void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
                f[y][x][7] = f[y][x][3] - tmp;
                f[y][x][6] = f[y][x][2];
 
-       } else if(col == CORES_X-1 && x == BLOCKS_X-1) {
+       } else if(col == CORES_X-1 && x == NODES_X-1) {
                /* Zou/He boundary at right, no velocity */
                FLOAT tmp = ( f[y][x][8] - f[y][x][4] ) / 2;
                f[y][x][1] = f[y][x][5] - tmp;
@@ -102,7 +106,7 @@ void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
        }
 }
 
-void d2q9_stream(d2q9_block_t f, int x, int y)
+void stream(block_t f, int x, int y)
 {
        for(int q = 1; q <= 4; q++) {
                int next_row = row;
@@ -111,10 +115,10 @@ void d2q9_stream(d2q9_block_t f, int x, int y)
                int next_y   = y + d2q9_v[q][1];
 
                /* inner borders (extend) */
-               if(next_x < 0)              { next_col--; next_x += BLOCKS_X; }
-               else if(next_x >= BLOCKS_X) { next_col++; next_x -= BLOCKS_X; }
-               if(next_y < 0)              { next_row--; next_y += BLOCKS_Y; }
-               else if(next_y >= BLOCKS_Y) { next_row++; next_y -= BLOCKS_Y; }
+               if(next_x < 0)             { next_col--; next_x += NODES_X; }
+               else if(next_x >= NODES_X) { next_col++; next_x -= NODES_X; }
+               if(next_y < 0)             { next_row--; next_y += NODES_Y; }
+               else if(next_y >= NODES_Y) { next_row++; next_y -= NODES_Y; }
 
 #if 0
                /* outer borders (wrap around) */
@@ -131,7 +135,7 @@ void d2q9_stream(d2q9_block_t f, int x, int y)
 #endif
 
                /* f: local block, g: local or remote block */
-               d2q9_block_t *g = (void*)f;
+               block_t *g = (void*)f;
                if(next_row != row || next_col != col) {
                        g = e_get_global_address(next_col, next_row, (void*)f);
                }
@@ -143,11 +147,11 @@ void d2q9_stream(d2q9_block_t f, int x, int y)
        }
 }
 
-void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
+void bulk(block_t f, FLOAT omega)
 {
        /* don't touch the border nodes */
-       for(int x = 1; x < BLOCKS_X-1; x++) {
-               for(int y = 1; y < BLOCKS_Y-1; y++) {
+       for(int x = 1; x < NODES_X-1; x++) {
+               for(int y = 1; y < NODES_Y-1; y++) {
                        /* macroscopic */
                        FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
                                f[y][x][3] + f[y][x][4] + f[y][x][5] +
index d1dc231122728e6cf101bfba5491528285ad9c9b..6660c8ed18219da003f8ac056df26a18593a4a8a 100644 (file)
@@ -2,9 +2,8 @@
 
 #include "../shared.h"
 
-/* D2Q9 functions */
-void d2q9_init               (d2q9_block_t);
-void d2q9_collide            (d2q9_block_t, int x, int y,  FLOAT);
-void d2q9_stream             (d2q9_block_t, int x, int y);
-void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT);
+void init    (block_t);
+void collide (block_t, int x, int y, FLOAT);
+void stream  (block_t, int x, int y);
+void bulk    (block_t, FLOAT);
 
index cecaab5e8aed3c84e48b771e390fb0046fdda6ca..89a23af3e5c3e1ad36adceb688ea3b65ad206132 100644 (file)
@@ -13,10 +13,10 @@ volatile shm_t shm SECTION(".shared_dram");
 
 /* statically allocate dummy memory and local block overlay
    to prevent linker from putting stuff in banks 1..3 */
-static uint8_t      dummy_bank1[8192] USED SECTION(".data_bank1");
-static uint8_t      dummy_bank2[8192] USED SECTION(".data_bank2");
-static uint8_t      dummy_bank3[8192] USED SECTION(".data_bank3");
-static d2q9_block_t *block = (void*)0x2000;
+static uint8_t dummy_bank1[8192] USED SECTION(".data_bank1");
+static uint8_t dummy_bank2[8192] USED SECTION(".data_bank2");
+static uint8_t dummy_bank3[8192] USED SECTION(".data_bank3");
+static block_t *block = (void*)0x2000;
 
 /* barrier structures */
 volatile e_barrier_t  barriers[CORES];
@@ -25,25 +25,9 @@ volatile e_barrier_t  barriers[CORES];
 /* global index variables */
 unsigned int row, col, core;
 
-void init(void)
-{
-       /* compile-time checks */
-       BUILD_BUG(BLOCKS_X * BLOCKS_Y * sizeof(d2q9_node_t) > 24*1024);
-       BUILD_BUG(BLOCKS_X < 3 || BLOCKS_Y < 3);
-       BUILD_BUG(CORES_X < 1  || CORES_Y < 1);
-       BUILD_BUG(CORES_X > 4  || CORES_Y > 4);
-
-       /* core index */
-       e_coords_from_coreid(e_get_coreid(), &col, &row);
-       core = row * CORES_X + col;
-
-       /* barrier initialization */
-       e_barrier_init(barriers, tgt_bars);
-}
-
 #define READ_TIMER(X) \
        do { \
-               clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
+               times[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
                e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
                e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
        } while(0);
@@ -51,19 +35,33 @@ void init(void)
 int main()
 {
        const FLOAT omega = 1.0;
-       unsigned clocks[TIMERS] = {0};
+       unsigned times[TIMERS] = {0};
 
-       init();
-       d2q9_init(*block);
+       /* compile-time checks */
+       BUILD_BUG(NODES * sizeof(node_t) > 24 * 1024);
+       BUILD_BUG(NODES_X < 3 || NODES_Y < 3);
+       BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
+       BUILD_BUG(CORES_X > 8 || CORES_Y > 8);
+
+       /* save mesh coordinates */
+       e_coords_from_coreid(e_get_coreid(), &col, &row);
+       core = row * CORES_X + col;
 
-       for(int i = 0; i < 500; i++) {
+       /* initialize barrier */
+       e_barrier_init(barriers, tgt_bars);
+
+       /* initialize block */
+       init(*block);
+
+       /* main loop */
+       for(int iter = 0; iter < 500; iter++) {
                READ_TIMER(0);
 
 #if 1
                /* collide all nodes */
-               for(int y = 0; y < BLOCKS_Y; y++)
-                       for(int x = 0; x < BLOCKS_X; x++)
-                               d2q9_collide(*block, x, y, omega);
+               for(int y = 0; y < NODES_Y; y++)
+                       for(int x = 0; x < NODES_X; x++)
+                               collide(*block, x, y, omega);
 
                /* synchronize */
                READ_TIMER(1);
@@ -71,23 +69,23 @@ int main()
                READ_TIMER(2);
 
                /* stream all nodes */
-               for(int y = 0; y < BLOCKS_Y; y++)
-                       for(int x = 0; x < BLOCKS_X; x++)
-                               d2q9_stream(*block, x, y);
+               for(int y = 0; y < NODES_Y; y++)
+                       for(int x = 0; x < NODES_X; x++)
+                               stream(*block, x, y);
                READ_TIMER(3);
 
 #else
                /* collide boundaries: top, bottom */
-               for(int x = 0; x < BLOCKS_X; x++) {
-                       d2q9_collide(*block, x, 0,          omega);
-                       d2q9_collide(*block, x, BLOCKS_Y-1, omega);
+               for(int x = 0; x < NODES_X; x++) {
+                       collide(*block, x, 0,          omega);
+                       collide(*block, x, NODES_Y-1, omega);
                }
                READ_TIMER(1);
 
                /* collide boundaries: left, right */
-               for(int y = 1; y < BLOCKS_Y-1; y++) {
-                       d2q9_collide(*block, 0,         y, omega);
-                       d2q9_collide(*block, BLOCKS_X-1, y, omega);
+               for(int y = 1; y < NODES_Y-1; y++) {
+                       collide(*block, 0,         y, omega);
+                       collide(*block, NODES_X-1, y, omega);
                }
 
                /* synchronize */
@@ -96,48 +94,45 @@ int main()
                READ_TIMER(3);
 
                /* collide and stream the bulk */
-               d2q9_collide_stream_bulk(*block, omega);
+               collide_stream_bulk(*block, omega);
                READ_TIMER(4);
 
                /* stream the boundaries: top, bottom */
-               for(int x = 0; x < BLOCKS_X; x++) {
-                       d2q9_stream(*block, x, 0         );
-                       d2q9_stream(*block, x, BLOCKS_Y-1);
+               for(int x = 0; x < NODES_X; x++) {
+                       stream(*block, x, 0         );
+                       stream(*block, x, NODES_Y-1);
                }
                READ_TIMER(5);
 
                /* stream the boundaries: left, right */
-               for(int y = 1; y < BLOCKS_Y-1; y++) {
-                       d2q9_stream(*block, 0,          y);
-                       d2q9_stream(*block, BLOCKS_X-1, y);
+               for(int y = 1; y < NODES_Y-1; y++) {
+                       stream(*block, 0,          y);
+                       stream(*block, NODES_X-1, y);
                }
                READ_TIMER(6);
 #endif
 
                /* copy data to shm if necessary */
-               if(!(i%100)) {
-                       /* copy iteration, lattice and timers to shm */
-                       if(core == 0)
-                               shm.iteration = i;
-
-                       memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
+               if(!(iter % 1)) {
+                       /* copy lattice to shm */
+                       memcpy(&shm.lattice[row][col], block, sizeof(block_t));
 
-                       for(int i = 0; i < TIMERS; i++)
-                               shm.timers[row][col][i] = clocks[i];
+                       /* copy times to shm */
+                       memcpy(&shm.times[row][col], times, sizeof(times_t));
 
                        /* synchronize */
                        e_barrier(barriers, tgt_bars);
 
-                       /* flag host and wait */
+                       /* core 0: write counter and flag host; wait */
                        if(core == 0) {
-                               shm.pollflag = POLL_READY;
+                               shm.iteration = iter;
+                               shm.pollflag  = POLL_READY;
                                while(shm.pollflag == POLL_READY);
                        }
                }
 
                /* synchronize */
                e_barrier(barriers, tgt_bars);
-               READ_TIMER(TIMERS-1);
        }
 
        /* last iteration done: flag host and stop */
index dfe2392be7b68cef2dac2245d2098fe8c2322ec4..678c0a2b430f64875312f6c2b2b0dc488656bcd8 100644 (file)
@@ -20,9 +20,8 @@ void fixsudo(const char *filename)
        }
 }
 
-
 /* write a (semi-) human-readable dump of the lattice */
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+void write_populations(block_t f[CORES_Y][CORES_X], int iter)
 {
        FILE *file = fopen("populations.dat", "a");
        if(!file) {
@@ -31,16 +30,17 @@ void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
        }
 
        for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCKS_Y; y++) {
+               for(int y = 0; y < NODES_Y; y++) {
                        for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCKS_X; x++) {
+                               for(int x = 0; x < NODES_X; x++) {
                                        fprintf(file, "%3d: [%3d,%3d]: ",
                                                iter,
-                                               cx * BLOCKS_X + x,
-                                               cy * BLOCKS_Y + y
+                                               cx * NODES_X + x,
+                                               cy * NODES_Y + y
                                        );
                                        for(int q = 0; q < 9; q++) {
-                                               fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]);
+                                               fprintf(file, "%.5f ",
+                                                       f[cy][cx][y][x][q]);
                                        }
                                        fprintf(file, "\n");
                                }
@@ -56,7 +56,7 @@ void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
 }
 
 /* write an 8-bit grayscale, binary PPM image of the particle density */
-void write_density(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+void write_density(block_t f[CORES_Y][CORES_X], int iter)
 {
        char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
 
@@ -67,18 +67,18 @@ void write_density(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
                return;
        }
        fprintf(file, "P5\n%d %d\n%d\n",
-               CORES_X*BLOCKS_X, CORES_Y*BLOCKS_Y, 255);
+               CORES_X*NODES_X, CORES_Y*NODES_Y, 255);
 
        /* calculate all densities and remember min/max */
        FLOAT min = 1.0, max = 0;
-       FLOAT rhos[CORES_Y][BLOCKS_Y][CORES_X][BLOCKS_X];
+       FLOAT rhos[CORES_Y][NODES_Y][CORES_X][NODES_X];
        for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCKS_Y; y++) {
+               for(int y = 0; y < NODES_Y; y++) {
                        for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCKS_X; x++) {
+                               for(int x = 0; x < NODES_X; x++) {
                                        FLOAT rho = 0;
                                        for(int q = 0; q < 9; q++)
-                                               rho += lattice[cy][cx][y][x][q];
+                                               rho += f[cy][cx][y][x][q];
                                        rhos[cy][y][cx][x] = rho;
 
                                        if(rho < min) min = rho;
@@ -90,9 +90,9 @@ void write_density(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
 
        /* scale values and write them to the image */
        for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCKS_Y; y++) {
+               for(int y = 0; y < NODES_Y; y++) {
                        for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCKS_X; x++) {
+                               for(int x = 0; x < NODES_X; x++) {
                                        unsigned char gray;
                                        gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min));
                                        fwrite(&gray, 1, 1, file);
@@ -109,7 +109,7 @@ void write_density(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
 }
 
 /* write an 8-bit grayscale, binary PPM image of the particle velocity */
-void write_velocity(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+void write_velocity(block_t f[CORES_Y][CORES_X], int iter)
 {
        char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
 
@@ -120,44 +120,44 @@ void write_velocity(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
                return;
        }
        fprintf(file, "P5\n%d %d\n%d\n",
-               CORES_X*BLOCKS_X, CORES_Y*BLOCKS_Y, 255);
+               CORES_X*NODES_X, CORES_Y*NODES_Y, 255);
 
 
        /* calculate all velocities and remember min/max */
        FLOAT min = 1000, max = 0;
-       FLOAT us[CORES_Y][BLOCKS_Y][CORES_X][BLOCKS_X];
+       FLOAT us[CORES_Y][NODES_Y][CORES_X][NODES_X];
        for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCKS_Y; y++) {
+               for(int y = 0; y < NODES_Y; y++) {
                        for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCKS_X; x++) {
+                               for(int x = 0; x < NODES_X; x++) {
                                        FLOAT rho = (
-                                               lattice[cy][cx][y][x][0] +
-                                               lattice[cy][cx][y][x][1] +
-                                               lattice[cy][cx][y][x][2] +
-                                               lattice[cy][cx][y][x][3] +
-                                               lattice[cy][cx][y][x][4] +
-                                               lattice[cy][cx][y][x][5] +
-                                               lattice[cy][cx][y][x][6] +
-                                               lattice[cy][cx][y][x][7] +
-                                               lattice[cy][cx][y][x][8]
+                                               f[cy][cx][y][x][0] +
+                                               f[cy][cx][y][x][1] +
+                                               f[cy][cx][y][x][2] +
+                                               f[cy][cx][y][x][3] +
+                                               f[cy][cx][y][x][4] +
+                                               f[cy][cx][y][x][5] +
+                                               f[cy][cx][y][x][6] +
+                                               f[cy][cx][y][x][7] +
+                                               f[cy][cx][y][x][8]
                                        );
 
                                        FLOAT ux = (
-                                               lattice[cy][cx][y][x][5] +
-                                               lattice[cy][cx][y][x][6] +
-                                               lattice[cy][cx][y][x][7] -
-                                               lattice[cy][cx][y][x][1] -
-                                               lattice[cy][cx][y][x][2] -
-                                               lattice[cy][cx][y][x][3]
+                                               f[cy][cx][y][x][5] +
+                                               f[cy][cx][y][x][6] +
+                                               f[cy][cx][y][x][7] -
+                                               f[cy][cx][y][x][1] -
+                                               f[cy][cx][y][x][2] -
+                                               f[cy][cx][y][x][3]
                                        ) / rho;
 
                                        FLOAT uy = (
-                                               lattice[cy][cx][y][x][1] +
-                                               lattice[cy][cx][y][x][7] +
-                                               lattice[cy][cx][y][x][8] -
-                                               lattice[cy][cx][y][x][3] -
-                                               lattice[cy][cx][y][x][4] -
-                                               lattice[cy][cx][y][x][5]
+                                               f[cy][cx][y][x][1] +
+                                               f[cy][cx][y][x][7] +
+                                               f[cy][cx][y][x][8] -
+                                               f[cy][cx][y][x][3] -
+                                               f[cy][cx][y][x][4] -
+                                               f[cy][cx][y][x][5]
                                        ) / rho;
 
                                        FLOAT u = sqrtf(ux*ux + uy*uy);
@@ -171,9 +171,9 @@ void write_velocity(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
 
        /* scale values and write them to the image */
        for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCKS_Y; y++) {
+               for(int y = 0; y < NODES_Y; y++) {
                        for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCKS_X; x++) {
+                               for(int x = 0; x < NODES_X; x++) {
                                        unsigned char gray;
                                        gray = (255. * (us[cy][y][cx][x]-min) / (max-min));
                                        fwrite(&gray, 1, 1, file);
@@ -217,7 +217,7 @@ void convert_to_mp4(void)
 }
 
 /* write timer values */
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
+void write_timers(times_t times[CORES_Y][CORES_X], uint32_t iter)
 {
        FILE *file = fopen("timers.dat", "ab");
        if(!file) {
@@ -230,7 +230,7 @@ void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
                for(int x = 0; x < CORES_X; x++) {
                        fprintf(file, "[%d,%d]: ", x, y);
                        for(int t = 0; t < TIMERS; t++) {
-                               fprintf(file, "%8d ", timers[y][x][t]);
+                               fprintf(file, "%8d ", times[t][y][x]);
                        }
                        fprintf(file, "\n");
                }
index f69be76d29e8711d15244caa91fb204c5ba2cfd9..73062520c68a9da7f6ee31e99b3c9da2ad60e809 100644 (file)
 
 /* helper functions */
 void fixsudo(const char *filename);
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_density    (d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_velocity   (d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
+void write_populations(block_t f[CORES_Y][CORES_X], int iter);
+void write_density    (block_t f[CORES_Y][CORES_X], int iter);
+void write_velocity   (block_t f[CORES_Y][CORES_X], int iter);
 void convert_to_gif(void);
 void convert_to_mp4(void);
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
+void write_timers(times_t times[CORES_Y][CORES_X], uint32_t iter);
 
 /* globals */
 static shm_t    shm = { 0 };   /* local shm copy */
@@ -65,7 +65,6 @@ int main()
        /* ================================================================ */
        printf("Polling shared memory.\n");
        while(1) {
-
                while(1) {
                        /* read polling flag */
                        if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
@@ -77,7 +76,8 @@ int main()
                }
 
                /* finish if done */
-               if(pollflag == POLL_DONE) break;
+               if(pollflag == POLL_DONE)
+                       break;
 
                /* read full shared memory */
                if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
@@ -96,7 +96,7 @@ int main()
                /* write data */
                //write_populations(shm.lattice, shm.iteration);
                write_density(shm.lattice, shm.iteration);
-               write_timers(shm.timers, shm.iteration);
+               write_timers(shm.times, shm.iteration);
        }
        /* ================================================================ */
 
index 790239bdc2cb36c49e6dcff34038528d1a36744f..0246e196383c42a6494057970c86100eeaca1d92 100644 (file)
@@ -4,6 +4,21 @@
 
 #include <stdint.h>
 
+/* ================================================================== */
+
+/* number of cores */
+#define CORES_X 4
+#define CORES_Y 4
+
+/* number of nodes per core */
+#define NODES_X 26
+#define NODES_Y 26
+
+/* number of timer values */
+#define TIMERS 12
+
+/* ================================================================== */
+
 /* preprocessor magic */
 #define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
 #define USED __attribute__((used))
 #undef ALIGN
 #define ALIGN(X) __attribute__((aligned(X)))
 
-/* number of cores */
-#define CORES_X 4
-#define CORES_Y 4
+/* some calculations */
 #define CORES (CORES_X * CORES_Y)
-
-/* size of per-core subgrid */
-#define BLOCKS_X 26
-#define BLOCKS_Y 26
-
-#define TIMERS 12
+#define NODES (NODES_X * NODES_Y)
+#define LATTICE_X (NODES_X * CORES_X)
+#define LATTICE_Y (NODES_Y * CORES_Y)
 
 /* pollflag values */
 #define POLL_BUSY  0x00
 #define POLL_READY 0x01
 #define POLL_DONE  0x02
 
-/* floating point type */
-typedef float FLOAT;
-
-/* node and block type (D2Q9) */
-typedef FLOAT       d2q9_node_t[9];
-typedef d2q9_node_t d2q9_block_t[BLOCKS_Y][BLOCKS_X];
+/* data types */
+typedef float    FLOAT;
+typedef FLOAT    node_t[9];
+typedef node_t   block_t[NODES_Y][NODES_X];
+typedef uint32_t times_t[TIMERS];
 
 /* shared memory structure */
 typedef struct {
-       uint32_t     pollflag;
-       uint32_t     iteration;
-       uint32_t     timers[CORES_Y][CORES_X][TIMERS];
-       d2q9_block_t lattice[CORES_Y][CORES_X];
+       uint32_t pollflag;
+       uint32_t iteration;
+       times_t  times[CORES_Y][CORES_X];
+       block_t  lattice[CORES_Y][CORES_X];
 } ALIGN(8) shm_t;
 
 #endif /* _SHARED_H_ */