/* core indices */
extern unsigned int row, col, core;
+/* ================================================================== */
+
/* velocities */
static const int d2q9_v[9][2] = { { 0, 0},
{-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
1./36., 1./9., 1./36., 1./9.,
};
-void d2q9_init(d2q9_block_t block)
+/* ================================================================== */
+
+void init(block_t f)
{
/* all with rho = 0.1 */
- for(int y = 0; y < BLOCKS_Y; y++)
- for(int x = 0; x < BLOCKS_X; x++)
+ for(int y = 0; y < NODES_Y; y++)
+ for(int x = 0; x < NODES_X; x++)
for(int q = 0; q < 9; q++)
- block[y][x][q] = 0.1 * d2q9_w[q];
+ f[y][x][q] = 0.1 * d2q9_w[q];
if(core == 0) {
/* except here with 0.2 */
for(int q = 0; q < 9; q++)
- block[0][0][q] = 0.2 * d2q9_w[q];
+ f[0][0][q] = 0.2 * d2q9_w[q];
}
return;
}
-void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
+void collide(block_t f, int x, int y, FLOAT omega)
{
if(row == 0 && y == 0) {
/* Zou/He boundary at top, with velocity */
f[y][x][1] = f[y][x][5] + tmp1 + tmp2;
f[y][x][8] = f[y][x][4] + 2 * rho * UY / 3;
- } else if(row == CORES_Y-1 && y == BLOCKS_Y-1) {
+ } else if(row == CORES_Y-1 && y == NODES_Y-1) {
/* Zou/He boundary at bottom, no velocity */
FLOAT tmp = ( f[y][x][6] - f[y][x][2] ) / 2;
f[y][x][3] = f[y][x][7] + tmp;
f[y][x][7] = f[y][x][3] - tmp;
f[y][x][6] = f[y][x][2];
- } else if(col == CORES_X-1 && x == BLOCKS_X-1) {
+ } else if(col == CORES_X-1 && x == NODES_X-1) {
/* Zou/He boundary at right, no velocity */
FLOAT tmp = ( f[y][x][8] - f[y][x][4] ) / 2;
f[y][x][1] = f[y][x][5] - tmp;
}
}
-void d2q9_stream(d2q9_block_t f, int x, int y)
+void stream(block_t f, int x, int y)
{
for(int q = 1; q <= 4; q++) {
int next_row = row;
int next_y = y + d2q9_v[q][1];
/* inner borders (extend) */
- if(next_x < 0) { next_col--; next_x += BLOCKS_X; }
- else if(next_x >= BLOCKS_X) { next_col++; next_x -= BLOCKS_X; }
- if(next_y < 0) { next_row--; next_y += BLOCKS_Y; }
- else if(next_y >= BLOCKS_Y) { next_row++; next_y -= BLOCKS_Y; }
+ if(next_x < 0) { next_col--; next_x += NODES_X; }
+ else if(next_x >= NODES_X) { next_col++; next_x -= NODES_X; }
+ if(next_y < 0) { next_row--; next_y += NODES_Y; }
+ else if(next_y >= NODES_Y) { next_row++; next_y -= NODES_Y; }
#if 0
/* outer borders (wrap around) */
#endif
/* f: local block, g: local or remote block */
- d2q9_block_t *g = (void*)f;
+ block_t *g = (void*)f;
if(next_row != row || next_col != col) {
g = e_get_global_address(next_col, next_row, (void*)f);
}
}
}
-void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
+void bulk(block_t f, FLOAT omega)
{
/* don't touch the border nodes */
- for(int x = 1; x < BLOCKS_X-1; x++) {
- for(int y = 1; y < BLOCKS_Y-1; y++) {
+ for(int x = 1; x < NODES_X-1; x++) {
+ for(int y = 1; y < NODES_Y-1; y++) {
/* macroscopic */
FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
f[y][x][3] + f[y][x][4] + f[y][x][5] +
#include "../shared.h"
-/* D2Q9 functions */
-void d2q9_init (d2q9_block_t);
-void d2q9_collide (d2q9_block_t, int x, int y, FLOAT);
-void d2q9_stream (d2q9_block_t, int x, int y);
-void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT);
+void init (block_t);
+void collide (block_t, int x, int y, FLOAT);
+void stream (block_t, int x, int y);
+void bulk (block_t, FLOAT);
/* statically allocate dummy memory and local block overlay
to prevent linker from putting stuff in banks 1..3 */
-static uint8_t dummy_bank1[8192] USED SECTION(".data_bank1");
-static uint8_t dummy_bank2[8192] USED SECTION(".data_bank2");
-static uint8_t dummy_bank3[8192] USED SECTION(".data_bank3");
-static d2q9_block_t *block = (void*)0x2000;
+static uint8_t dummy_bank1[8192] USED SECTION(".data_bank1");
+static uint8_t dummy_bank2[8192] USED SECTION(".data_bank2");
+static uint8_t dummy_bank3[8192] USED SECTION(".data_bank3");
+static block_t *block = (void*)0x2000;
/* barrier structures */
volatile e_barrier_t barriers[CORES];
/* global index variables */
unsigned int row, col, core;
-void init(void)
-{
- /* compile-time checks */
- BUILD_BUG(BLOCKS_X * BLOCKS_Y * sizeof(d2q9_node_t) > 24*1024);
- BUILD_BUG(BLOCKS_X < 3 || BLOCKS_Y < 3);
- BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
- BUILD_BUG(CORES_X > 4 || CORES_Y > 4);
-
- /* core index */
- e_coords_from_coreid(e_get_coreid(), &col, &row);
- core = row * CORES_X + col;
-
- /* barrier initialization */
- e_barrier_init(barriers, tgt_bars);
-}
-
#define READ_TIMER(X) \
do { \
- clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
+ times[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
} while(0);
int main()
{
const FLOAT omega = 1.0;
- unsigned clocks[TIMERS] = {0};
+ unsigned times[TIMERS] = {0};
- init();
- d2q9_init(*block);
+ /* compile-time checks */
+ BUILD_BUG(NODES * sizeof(node_t) > 24 * 1024);
+ BUILD_BUG(NODES_X < 3 || NODES_Y < 3);
+ BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
+ BUILD_BUG(CORES_X > 8 || CORES_Y > 8);
+
+ /* save mesh coordinates */
+ e_coords_from_coreid(e_get_coreid(), &col, &row);
+ core = row * CORES_X + col;
- for(int i = 0; i < 500; i++) {
+ /* initialize barrier */
+ e_barrier_init(barriers, tgt_bars);
+
+ /* initialize block */
+ init(*block);
+
+ /* main loop */
+ for(int iter = 0; iter < 500; iter++) {
READ_TIMER(0);
#if 1
/* collide all nodes */
- for(int y = 0; y < BLOCKS_Y; y++)
- for(int x = 0; x < BLOCKS_X; x++)
- d2q9_collide(*block, x, y, omega);
+ for(int y = 0; y < NODES_Y; y++)
+ for(int x = 0; x < NODES_X; x++)
+ collide(*block, x, y, omega);
/* synchronize */
READ_TIMER(1);
READ_TIMER(2);
/* stream all nodes */
- for(int y = 0; y < BLOCKS_Y; y++)
- for(int x = 0; x < BLOCKS_X; x++)
- d2q9_stream(*block, x, y);
+ for(int y = 0; y < NODES_Y; y++)
+ for(int x = 0; x < NODES_X; x++)
+ stream(*block, x, y);
READ_TIMER(3);
#else
/* collide boundaries: top, bottom */
- for(int x = 0; x < BLOCKS_X; x++) {
- d2q9_collide(*block, x, 0, omega);
- d2q9_collide(*block, x, BLOCKS_Y-1, omega);
+ for(int x = 0; x < NODES_X; x++) {
+ collide(*block, x, 0, omega);
+ collide(*block, x, NODES_Y-1, omega);
}
READ_TIMER(1);
/* collide boundaries: left, right */
- for(int y = 1; y < BLOCKS_Y-1; y++) {
- d2q9_collide(*block, 0, y, omega);
- d2q9_collide(*block, BLOCKS_X-1, y, omega);
+ for(int y = 1; y < NODES_Y-1; y++) {
+ collide(*block, 0, y, omega);
+ collide(*block, NODES_X-1, y, omega);
}
/* synchronize */
READ_TIMER(3);
/* collide and stream the bulk */
- d2q9_collide_stream_bulk(*block, omega);
+ collide_stream_bulk(*block, omega);
READ_TIMER(4);
/* stream the boundaries: top, bottom */
- for(int x = 0; x < BLOCKS_X; x++) {
- d2q9_stream(*block, x, 0 );
- d2q9_stream(*block, x, BLOCKS_Y-1);
+ for(int x = 0; x < NODES_X; x++) {
+ stream(*block, x, 0 );
+ stream(*block, x, NODES_Y-1);
}
READ_TIMER(5);
/* stream the boundaries: left, right */
- for(int y = 1; y < BLOCKS_Y-1; y++) {
- d2q9_stream(*block, 0, y);
- d2q9_stream(*block, BLOCKS_X-1, y);
+ for(int y = 1; y < NODES_Y-1; y++) {
+ stream(*block, 0, y);
+ stream(*block, NODES_X-1, y);
}
READ_TIMER(6);
#endif
/* copy data to shm if necessary */
- if(!(i%100)) {
- /* copy iteration, lattice and timers to shm */
- if(core == 0)
- shm.iteration = i;
-
- memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
+ if(!(iter % 1)) {
+ /* copy lattice to shm */
+ memcpy(&shm.lattice[row][col], block, sizeof(block_t));
- for(int i = 0; i < TIMERS; i++)
- shm.timers[row][col][i] = clocks[i];
+ /* copy times to shm */
+ memcpy(&shm.times[row][col], times, sizeof(times_t));
/* synchronize */
e_barrier(barriers, tgt_bars);
- /* flag host and wait */
+ /* core 0: write counter and flag host; wait */
if(core == 0) {
- shm.pollflag = POLL_READY;
+ shm.iteration = iter;
+ shm.pollflag = POLL_READY;
while(shm.pollflag == POLL_READY);
}
}
/* synchronize */
e_barrier(barriers, tgt_bars);
- READ_TIMER(TIMERS-1);
}
/* last iteration done: flag host and stop */
}
}
-
/* write a (semi-) human-readable dump of the lattice */
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+void write_populations(block_t f[CORES_Y][CORES_X], int iter)
{
FILE *file = fopen("populations.dat", "a");
if(!file) {
}
for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCKS_Y; y++) {
+ for(int y = 0; y < NODES_Y; y++) {
for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCKS_X; x++) {
+ for(int x = 0; x < NODES_X; x++) {
fprintf(file, "%3d: [%3d,%3d]: ",
iter,
- cx * BLOCKS_X + x,
- cy * BLOCKS_Y + y
+ cx * NODES_X + x,
+ cy * NODES_Y + y
);
for(int q = 0; q < 9; q++) {
- fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]);
+ fprintf(file, "%.5f ",
+ f[cy][cx][y][x][q]);
}
fprintf(file, "\n");
}
}
/* write an 8-bit grayscale, binary PPM image of the particle density */
-void write_density(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+void write_density(block_t f[CORES_Y][CORES_X], int iter)
{
char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
return;
}
fprintf(file, "P5\n%d %d\n%d\n",
- CORES_X*BLOCKS_X, CORES_Y*BLOCKS_Y, 255);
+ CORES_X*NODES_X, CORES_Y*NODES_Y, 255);
/* calculate all densities and remember min/max */
FLOAT min = 1.0, max = 0;
- FLOAT rhos[CORES_Y][BLOCKS_Y][CORES_X][BLOCKS_X];
+ FLOAT rhos[CORES_Y][NODES_Y][CORES_X][NODES_X];
for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCKS_Y; y++) {
+ for(int y = 0; y < NODES_Y; y++) {
for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCKS_X; x++) {
+ for(int x = 0; x < NODES_X; x++) {
FLOAT rho = 0;
for(int q = 0; q < 9; q++)
- rho += lattice[cy][cx][y][x][q];
+ rho += f[cy][cx][y][x][q];
rhos[cy][y][cx][x] = rho;
if(rho < min) min = rho;
/* scale values and write them to the image */
for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCKS_Y; y++) {
+ for(int y = 0; y < NODES_Y; y++) {
for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCKS_X; x++) {
+ for(int x = 0; x < NODES_X; x++) {
unsigned char gray;
gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min));
fwrite(&gray, 1, 1, file);
}
/* write an 8-bit grayscale, binary PPM image of the particle velocity */
-void write_velocity(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+void write_velocity(block_t f[CORES_Y][CORES_X], int iter)
{
char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
return;
}
fprintf(file, "P5\n%d %d\n%d\n",
- CORES_X*BLOCKS_X, CORES_Y*BLOCKS_Y, 255);
+ CORES_X*NODES_X, CORES_Y*NODES_Y, 255);
/* calculate all velocities and remember min/max */
FLOAT min = 1000, max = 0;
- FLOAT us[CORES_Y][BLOCKS_Y][CORES_X][BLOCKS_X];
+ FLOAT us[CORES_Y][NODES_Y][CORES_X][NODES_X];
for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCKS_Y; y++) {
+ for(int y = 0; y < NODES_Y; y++) {
for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCKS_X; x++) {
+ for(int x = 0; x < NODES_X; x++) {
FLOAT rho = (
- lattice[cy][cx][y][x][0] +
- lattice[cy][cx][y][x][1] +
- lattice[cy][cx][y][x][2] +
- lattice[cy][cx][y][x][3] +
- lattice[cy][cx][y][x][4] +
- lattice[cy][cx][y][x][5] +
- lattice[cy][cx][y][x][6] +
- lattice[cy][cx][y][x][7] +
- lattice[cy][cx][y][x][8]
+ f[cy][cx][y][x][0] +
+ f[cy][cx][y][x][1] +
+ f[cy][cx][y][x][2] +
+ f[cy][cx][y][x][3] +
+ f[cy][cx][y][x][4] +
+ f[cy][cx][y][x][5] +
+ f[cy][cx][y][x][6] +
+ f[cy][cx][y][x][7] +
+ f[cy][cx][y][x][8]
);
FLOAT ux = (
- lattice[cy][cx][y][x][5] +
- lattice[cy][cx][y][x][6] +
- lattice[cy][cx][y][x][7] -
- lattice[cy][cx][y][x][1] -
- lattice[cy][cx][y][x][2] -
- lattice[cy][cx][y][x][3]
+ f[cy][cx][y][x][5] +
+ f[cy][cx][y][x][6] +
+ f[cy][cx][y][x][7] -
+ f[cy][cx][y][x][1] -
+ f[cy][cx][y][x][2] -
+ f[cy][cx][y][x][3]
) / rho;
FLOAT uy = (
- lattice[cy][cx][y][x][1] +
- lattice[cy][cx][y][x][7] +
- lattice[cy][cx][y][x][8] -
- lattice[cy][cx][y][x][3] -
- lattice[cy][cx][y][x][4] -
- lattice[cy][cx][y][x][5]
+ f[cy][cx][y][x][1] +
+ f[cy][cx][y][x][7] +
+ f[cy][cx][y][x][8] -
+ f[cy][cx][y][x][3] -
+ f[cy][cx][y][x][4] -
+ f[cy][cx][y][x][5]
) / rho;
FLOAT u = sqrtf(ux*ux + uy*uy);
/* scale values and write them to the image */
for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCKS_Y; y++) {
+ for(int y = 0; y < NODES_Y; y++) {
for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCKS_X; x++) {
+ for(int x = 0; x < NODES_X; x++) {
unsigned char gray;
gray = (255. * (us[cy][y][cx][x]-min) / (max-min));
fwrite(&gray, 1, 1, file);
}
/* write timer values */
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
+void write_timers(times_t times[CORES_Y][CORES_X], uint32_t iter)
{
FILE *file = fopen("timers.dat", "ab");
if(!file) {
for(int x = 0; x < CORES_X; x++) {
fprintf(file, "[%d,%d]: ", x, y);
for(int t = 0; t < TIMERS; t++) {
- fprintf(file, "%8d ", timers[y][x][t]);
+ fprintf(file, "%8d ", times[t][y][x]);
}
fprintf(file, "\n");
}
/* helper functions */
void fixsudo(const char *filename);
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_density (d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_velocity (d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
+void write_populations(block_t f[CORES_Y][CORES_X], int iter);
+void write_density (block_t f[CORES_Y][CORES_X], int iter);
+void write_velocity (block_t f[CORES_Y][CORES_X], int iter);
void convert_to_gif(void);
void convert_to_mp4(void);
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
+void write_timers(times_t times[CORES_Y][CORES_X], uint32_t iter);
/* globals */
static shm_t shm = { 0 }; /* local shm copy */
/* ================================================================ */
printf("Polling shared memory.\n");
while(1) {
-
while(1) {
/* read polling flag */
if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
}
/* finish if done */
- if(pollflag == POLL_DONE) break;
+ if(pollflag == POLL_DONE)
+ break;
/* read full shared memory */
if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
/* write data */
//write_populations(shm.lattice, shm.iteration);
write_density(shm.lattice, shm.iteration);
- write_timers(shm.timers, shm.iteration);
+ write_timers(shm.times, shm.iteration);
}
/* ================================================================ */
#include <stdint.h>
+/* ================================================================== */
+
+/* number of cores */
+#define CORES_X 4
+#define CORES_Y 4
+
+/* number of nodes per core */
+#define NODES_X 26
+#define NODES_Y 26
+
+/* number of timer values */
+#define TIMERS 12
+
+/* ================================================================== */
+
/* preprocessor magic */
#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
#define USED __attribute__((used))
#undef ALIGN
#define ALIGN(X) __attribute__((aligned(X)))
-/* number of cores */
-#define CORES_X 4
-#define CORES_Y 4
+/* some calculations */
#define CORES (CORES_X * CORES_Y)
-
-/* size of per-core subgrid */
-#define BLOCKS_X 26
-#define BLOCKS_Y 26
-
-#define TIMERS 12
+#define NODES (NODES_X * NODES_Y)
+#define LATTICE_X (NODES_X * CORES_X)
+#define LATTICE_Y (NODES_Y * CORES_Y)
/* pollflag values */
#define POLL_BUSY 0x00
#define POLL_READY 0x01
#define POLL_DONE 0x02
-/* floating point type */
-typedef float FLOAT;
-
-/* node and block type (D2Q9) */
-typedef FLOAT d2q9_node_t[9];
-typedef d2q9_node_t d2q9_block_t[BLOCKS_Y][BLOCKS_X];
+/* data types */
+typedef float FLOAT;
+typedef FLOAT node_t[9];
+typedef node_t block_t[NODES_Y][NODES_X];
+typedef uint32_t times_t[TIMERS];
/* shared memory structure */
typedef struct {
- uint32_t pollflag;
- uint32_t iteration;
- uint32_t timers[CORES_Y][CORES_X][TIMERS];
- d2q9_block_t lattice[CORES_Y][CORES_X];
+ uint32_t pollflag;
+ uint32_t iteration;
+ times_t times[CORES_Y][CORES_X];
+ block_t lattice[CORES_Y][CORES_X];
} ALIGN(8) shm_t;
#endif /* _SHARED_H_ */