--- /dev/null
+# Template Makefile for Epiphany
+
+# host toolchain
+HCC = gcc
+HCFLAGS = -O2 -std=c99 -I$(EPIPHANY_HOME)/tools/host/include -Wall
+HLFLAGS = -L$(EPIPHANY_HOME)/tools/host/lib -le-hal
+ECHO = /bin/echo -e
+
+# target toolchain
+ECC = e-gcc
+EOC = e-objcopy
+ECFLAGS = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall -fsingle-precision-constant -ffast-math
+ELFLAGS = -T$(EPIPHANY_HOME)/bsps/current/internal.ldf -le-lib
+EOFLAGS = -R .shared_dram
+
+# host application
+HAPP = $(DEST)/ep_main
+HOBJS = $(HDEST)/main.o $(HDEST)/data.o
+
+# epiphany applications
+EAPPS = $(DEST)/main.srec
+ECOMMON = $(EDEST)/d2q9.o
+
+# folders
+HSRC = hsrc
+HDEST = hobj
+ESRC = esrc
+EDEST = eobj
+DEST = bin
+
+# === Magic begins here ===================================================
+EOBJS = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON)
+EELFS = $(EAPPS:$(DEST)%srec=$(EDEST)%elf)
+
+.SECONDARY:
+.PHONY: all help host target folders run clean
+.NOTPARALLEL: clean
+
+# === Phony Rules =========================================================
+help:
+ @$(ECHO)
+ @$(ECHO) "Epiphany Makefile - Help"
+ @$(ECHO) " help show this help"
+ @$(ECHO) " host build host application ($(HAPP))"
+ @$(ECHO) " target build epiphany applications ($(EAPPS))"
+ @$(ECHO) " all build all"
+ @$(ECHO) " run build all, then run host application"
+ @$(ECHO) " clean remove applications and intermediate files"
+ @$(ECHO)
+
+all: host target
+
+host: folders $(HAPP)
+
+target: folders $(EAPPS)
+
+folders: $(HDEST) $(EDEST) $(DEST)
+
+run: host target
+ @$(ECHO) "\tRUN"
+ @sudo LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) \
+ EPIPHANY_HDF=$(EPIPHANY_HDF) \
+ $(HAPP)
+
+clean:
+ @$(ECHO) "\tCLEAN"
+ @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS)
+ @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \
+ 2>/dev/null
+
+$(HDEST):
+ @$(ECHO) "\t(HOST) MKDIR $(HDEST)"
+ @mkdir -p $(HDEST)
+
+$(EDEST):
+ @$(ECHO) "\t(HOST) MKDIR $(EDEST)"
+ @mkdir -p $(EDEST)
+
+$(DEST):
+ @$(ECHO) "\t(HOST) MKDIR $(DEST)"
+ @mkdir -p $(DEST)
+
+# === Host Rules ==========================================================
+$(HAPP): $(HOBJS)
+ @$(ECHO) "\t(HOST) LINK\t$@"
+ @$(HCC) -o $@ $^ $(HLFLAGS)
+
+$(HDEST)/%.o: $(HSRC)/%.c
+ @$(ECHO) "\t(HOST) CC\t$@"
+ @$(HCC) $(HCFLAGS) -c -o $@ $^
+
+# === Target Rules ========================================================
+$(DEST)/%.srec: $(EDEST)/%.elf
+ @$(ECHO) "\t(TARGET) OBJCOPY $@"
+ @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@
+
+$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON)
+ @$(ECHO) "\t(TARGET) LINK\t$@"
+ @$(ECC) -o $@ $^ $(ELFLAGS)
+
+$(EDEST)/%.o: $(ESRC)/%.c
+ @$(ECHO) "\t(TARGET) CC\t$@"
+ @$(ECC) $(ECFLAGS) -c -o $@ $^
+# =========================================================================
+
--- /dev/null
+/* D2Q9 lattice boltzmann functions */
+
+#include <e-lib.h>
+#include "../shared.h"
+#include "d2q9.h"
+
+/* velocities */
+static const int d2q9_v[9][2] = { { 0, 0},
+ {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
+ { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1},
+};
+
+/* weights */
+static const FLOAT d2q9_w[9] = { 4./9.,
+ 1./36., 1./9., 1./36., 1./9.,
+ 1./36., 1./9., 1./36., 1./9.,
+};
+
+void d2q9_init(d2q9_block_t block)
+{
+ /* all with rho = 0.1 */
+ for(int y = 0; y < BLOCK_Y; y++)
+ for(int x = 0; x < BLOCK_X; x++)
+ for(int q = 0; q < 9; q++)
+ block[y][x][q] = 0.1 * d2q9_w[q];
+
+ if(core == 0) {
+ /* except here with 0.2 */
+ for(int q = 0; q < 9; q++)
+ block[0][0][q] = 0.2 * d2q9_w[q];
+ }
+
+ return;
+}
+
+void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
+{
+ /* macroscopic */
+ FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
+ f[y][x][3] + f[y][x][4] + f[y][x][5] +
+ f[y][x][6] + f[y][x][7] + f[y][x][8];
+ FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
+ f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
+ FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
+ f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
+ FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+ /* update node */
+ for(int q = 0; q < 9; q++) {
+ FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+ FLOAT eq = rho * d2q9_w[q] *
+ (1. + 3. * cu + 4.5 * cu*cu - sqr);
+ f[y][x][q] *= (1.0 - omega);
+ f[y][x][q] += omega * eq;
+ }
+
+ /* swap */
+ for(int q = 1; q <= 4; q++) {
+ FLOAT tmp = f[y][x][q];
+ f[y][x][q] = f[y][x][q+4];
+ f[y][x][q+4] = tmp;
+ }
+}
+
+void d2q9_stream(d2q9_block_t f, int x, int y)
+{
+ for(int q = 1; q <= 4; q++) {
+ int next_row = row;
+ int next_col = col;
+ int next_x = x + d2q9_v[q][0];
+ int next_y = y + d2q9_v[q][1];
+
+ /* inner borders (extend) */
+ if(next_x < 0) { next_col--; next_x += BLOCK_X; }
+ else if(next_x >= BLOCK_X) { next_col++; next_x -= BLOCK_X; }
+ if(next_y < 0) { next_row--; next_y += BLOCK_Y; }
+ else if(next_y >= BLOCK_Y) { next_row++; next_y -= BLOCK_Y; }
+
+#if 0
+ /* outer borders (wrap around) */
+ if(next_col < 0) { next_col += CORES_X; }
+ else if(next_col >= CORES_X) { next_col -= CORES_X; }
+ if(next_row < 0) { next_row += CORES_Y; }
+ else if(next_row >= CORES_Y) { next_row -= CORES_Y; }
+#else
+ /* full bounce-back on all sides */
+ if(next_col < 0) { return; }
+ else if(next_col >= CORES_X) { return; }
+ if(next_row < 0) { return; }
+ else if(next_row >= CORES_Y) { return; }
+#endif
+
+ /* f: local block, g: local or remote block */
+ d2q9_block_t *g = (void*)f;
+ if(next_row != row || next_col != col) {
+ g = e_get_global_address(next_col, next_row, (void*)f);
+ }
+
+ /* stream/swap f and g */
+ FLOAT tmp = f[y][x][q+4];
+ f[y][x][q+4] = (*g)[next_y][next_x][q];
+ (*g)[next_y][next_x][q] = tmp;
+ }
+}
+
+void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
+{
+ /* don't touch the border nodes */
+ for(int x = 1; x < BLOCK_X-1; x++) {
+ for(int y = 1; y < BLOCK_Y-1; y++) {
+ /* macroscopic */
+ FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
+ f[y][x][3] + f[y][x][4] + f[y][x][5] +
+ f[y][x][6] + f[y][x][7] + f[y][x][8];
+ FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
+ f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
+ FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
+ f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
+ FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+ /* update node */
+ for(int q = 0; q < 9; q++) {
+ FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+ FLOAT eq = rho * d2q9_w[q] *
+ (1. + 3. * cu + 4.5 * cu*cu - sqr);
+ f[y][x][q] *= (1.0 - omega);
+ f[y][x][q] += omega * eq;
+ }
+
+ /* stream */
+ for(int q = 0; q <= 4; q++) {
+ int next_x = x + d2q9_v[q][0];
+ int next_y = y + d2q9_v[q][1];
+
+ FLOAT tmp = f[y][x][q];
+ f[y][x][q] = f[y][x][q+4];
+ f[y][x][q+4] = f[next_y][next_x][q];
+ f[next_y][next_x][q] = tmp;
+ }
+ }
+ }
+}
+
--- /dev/null
+/* lattice boltzmann functions */
+
+#include "../shared.h"
+
+/* core index */
+extern unsigned int row, col, core;
+
+/* D2Q9 functions */
+void d2q9_init (d2q9_block_t);
+void d2q9_collide (d2q9_block_t, int x, int y, FLOAT);
+void d2q9_stream (d2q9_block_t, int x, int y);
+void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT);
+
--- /dev/null
+/* D2Q9 lattice boltzmann implementation */
+
+#include <e-lib.h>
+#include "../shared.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "d2q9.h"
+
+/* shared memory overlay */
+volatile shm_t shm SECTION(".shared_dram");
+
+/* statically allocate dummy memory and local block overlay
+ to prevent linker from putting stuff in banks 1..3 */
+static uint8_t dummy_bank1[8192] UNUSED SECTION(".data_bank1");
+static uint8_t dummy_bank2[8192] UNUSED SECTION(".data_bank2");
+static uint8_t dummy_bank3[8192] UNUSED SECTION(".data_bank3");
+static d2q9_block_t *block = (void*)0x2000;
+
+/* barrier structures */
+volatile e_barrier_t barriers[NUM_CORES];
+ e_barrier_t *tgt_bars[NUM_CORES];
+
+/* global index variables */
+unsigned int row, col, core;
+
+void init(void)
+{
+ /* compile-time checks */
+ BUILD_BUG(BLOCK_X * BLOCK_Y * sizeof(d2q9_node_t) > 24*1024);
+ BUILD_BUG(BLOCK_X < 3 || BLOCK_Y < 3);
+ BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
+ BUILD_BUG(CORES_X > 4 || CORES_Y > 4);
+
+ /* core index */
+ e_coords_from_coreid(e_get_coreid(), &col, &row);
+ core = row * CORES_X + col;
+
+ /* barrier initialization */
+ e_barrier_init(barriers, tgt_bars);
+}
+
+#define READ_TIMER(X) \
+ do { \
+ clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
+ e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
+ e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
+ } while(0);
+
+int main()
+{
+ const FLOAT omega = 1.0;
+ unsigned clocks[TIMERS] = {0};
+
+ init();
+ d2q9_init(*block);
+
+ for(int i = 0; i < 500; i++) {
+ READ_TIMER(0);
+
+#if 1
+ /* collide all nodes */
+ for(int y = 0; y < BLOCK_Y; y++)
+ for(int x = 0; x < BLOCK_X; x++)
+ d2q9_collide(*block, x, y, omega);
+
+ /* synchronize */
+ READ_TIMER(1);
+ e_barrier(barriers, tgt_bars);
+ READ_TIMER(2);
+
+ /* stream all nodes */
+ for(int y = 0; y < BLOCK_Y; y++)
+ for(int x = 0; x < BLOCK_X; x++)
+ d2q9_stream(*block, x, y);
+ READ_TIMER(3);
+
+#else
+ /* collide boundaries: top, bottom */
+ for(int x = 0; x < BLOCK_X; x++) {
+ d2q9_collide(*block, x, 0, omega);
+ d2q9_collide(*block, x, BLOCK_Y-1, omega);
+ }
+ READ_TIMER(1);
+
+ /* collide boundaries: left, right */
+ for(int y = 1; y < BLOCK_Y-1; y++) {
+ d2q9_collide(*block, 0, y, omega);
+ d2q9_collide(*block, BLOCK_X-1, y, omega);
+ }
+
+ /* synchronize */
+ READ_TIMER(2);
+ e_barrier(barriers, tgt_bars);
+ READ_TIMER(3);
+
+ /* collide and stream the bulk */
+ d2q9_collide_stream_bulk(*block, omega);
+ READ_TIMER(4);
+
+ /* stream the boundaries: top, bottom */
+ for(int x = 0; x < BLOCK_X; x++) {
+ d2q9_stream(*block, x, 0 );
+ d2q9_stream(*block, x, BLOCK_Y-1);
+ }
+ READ_TIMER(5);
+
+ /* stream the boundaries: left, right */
+ for(int y = 1; y < BLOCK_Y-1; y++) {
+ d2q9_stream(*block, 0, y);
+ d2q9_stream(*block, BLOCK_X-1, y);
+ }
+ READ_TIMER(6);
+#endif
+
+ /* copy data to shm if necessary */
+ if(!(i%100)) {
+ /* copy iteration, lattice and timers to shm */
+ if(core == 0)
+ shm.iteration = i;
+
+ memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
+
+ for(int i = 0; i < TIMERS; i++)
+ shm.timers[row][col][i] = clocks[i];
+
+ /* synchronize */
+ e_barrier(barriers, tgt_bars);
+
+ /* flag host and wait */
+ if(core == 0) {
+ shm.pollflag = POLL_READY;
+ while(shm.pollflag == POLL_READY);
+ }
+ }
+
+ /* synchronize */
+ e_barrier(barriers, tgt_bars);
+ READ_TIMER(TIMERS-1);
+ }
+
+ /* last iteration done: flag host and stop */
+ if(core == 0) shm.pollflag = POLL_DONE;
+ while(1) __asm__ volatile("idle");
+}
+
--- /dev/null
+/* Helper Functions to handle data (2D) */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../shared.h"
+
+/* fix file owner if run with sudo */
+void fixsudo(const char *filename)
+{
+ if(getenv("SUDO_UID") && getenv("SUDO_GID")) {
+ int uid = atoi(getenv("SUDO_UID"));
+ int gid = atoi(getenv("SUDO_GID"));
+ if(chown(filename, uid, gid)) {
+ perror("fixsudo/chown");
+ return;
+ }
+ }
+}
+
+
+/* write a (semi-) human-readable dump of the lattice */
+void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+{
+ FILE *file = fopen("populations.dat", "a");
+ if(!file) {
+ perror("write_populations/fopen");
+ return;
+ }
+
+ for(int cy = 0; cy < CORES_Y; cy++) {
+ for(int y = 0; y < BLOCK_Y; y++) {
+ for(int cx = 0; cx < CORES_X; cx++) {
+ for(int x = 0; x < BLOCK_X; x++) {
+ fprintf(file, "%3d: [%3d,%3d]: ",
+ iter,
+ cx * BLOCK_X + x,
+ cy * BLOCK_Y + y
+ );
+ for(int q = 0; q < 9; q++) {
+ fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]);
+ }
+ fprintf(file, "\n");
+ }
+ }
+ }
+ }
+ fprintf(file, "\n");
+
+ /* close */
+ fclose(file);
+
+ return;
+}
+
+/* write an 8-bit grayscale, binary PPM image of the lattice */
+void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+{
+ char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
+
+ /* open image file and write header */
+ FILE *file = fopen(name, "wb");
+ if(!file) {
+ perror("write_image/fopen");
+ return;
+ }
+ fprintf(file, "P5\n%d %d\n%d\n", CORES_X*BLOCK_X, CORES_Y*BLOCK_Y, 255);
+
+ /* calculate all densities and remember min/max */
+ FLOAT min = 1.0, max = 0;
+ FLOAT rhos[CORES_Y][BLOCK_Y][CORES_X][BLOCK_X];
+ for(int cy = 0; cy < CORES_Y; cy++) {
+ for(int y = 0; y < BLOCK_Y; y++) {
+ for(int cx = 0; cx < CORES_X; cx++) {
+ for(int x = 0; x < BLOCK_X; x++) {
+ FLOAT rho = 0;
+ for(int q = 0; q < 9; q++)
+ rho += lattice[cy][cx][y][x][q];
+ rhos[cy][y][cx][x] = rho;
+
+ if(rho < min) min = rho;
+ if(rho > max) max = rho;
+ }
+ }
+ }
+ }
+
+ /* scale values and write them to the image */
+ for(int cy = 0; cy < CORES_Y; cy++) {
+ for(int y = 0; y < BLOCK_Y; y++) {
+ for(int cx = 0; cx < CORES_X; cx++) {
+ for(int x = 0; x < BLOCK_X; x++) {
+ unsigned char gray;
+ gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min));
+ fwrite(&gray, 1, 1, file);
+ }
+ }
+ }
+ }
+
+ /* close the file and chown if run with sudo */
+ fclose(file);
+ fixsudo(name);
+
+ return;
+}
+
+/* convert image files to animated gif ./tmp/anim.gif */
+void write_animation(void)
+{
+ int result;
+
+ /* call imagemagick */
+ result = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)result;
+ fixsudo("./tmp/anim.gif");
+
+ return;
+}
+
+/* write timer values */
+void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
+{
+ FILE *file = fopen("timers.dat", "ab");
+ if(!file) {
+ perror("write_timers/fopen");
+ return;
+ }
+
+ fprintf(file, "Timers: i=%d\n", iter);
+ for(int y = 0; y < CORES_Y; y++) {
+ for(int x = 0; x < CORES_X; x++) {
+ fprintf(file, "[%d,%d]: ", x, y);
+ for(int t = 0; t < TIMERS; t++) {
+ fprintf(file, "%8d ", timers[y][x][t]);
+ }
+ fprintf(file, "\n");
+ }
+ }
+
+ fclose(file);
+}
--- /dev/null
+/* Host Application */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <e-hal.h>
+#include "../shared.h"
+
+#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
+#define SHM_OFFSET 0x01000000
+
+/* helper functions */
+void fixsudo(const char *filename);
+void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
+void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
+void write_animation(void);
+void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
+
+/* globals */
+static shm_t shm = { 0 }; /* local shm copy */
+static uint32_t pollflag;
+
+int main()
+{
+ char *filename = "bin/main.srec";
+
+ /* remove old results */
+ int dummy = system("rm -f ./tmp/i*.ppm ./tmp/anim.gif populations.dat timers.dat");
+ (void)dummy;
+
+ e_epiphany_t dev;
+ e_mem_t mem;
+
+ e_set_host_verbosity(H_D0);
+ e_set_loader_verbosity(L_D0);
+
+ /* initialize workgroup, allocate and clear shared memory */
+ if(e_init(NULL) != E_OK)
+ FAIL("Can't init!\n");
+ e_reset_system();
+ if(e_open(&dev, 0, 0, CORES_X, CORES_Y) != E_OK)
+ FAIL("Can't open!\n");
+ if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK)
+ FAIL("Can't alloc!\n");
+ if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+ FAIL("Can't clear shm!\n");
+
+ /* load programs */
+ printf("Starting cores:\n");
+ for(int y = 0; y < CORES_Y; y++) {
+ for(int x = 0; x < CORES_X; x++) {
+ printf("(%02d,%02d) ", x, y);
+ if(e_load(filename, &dev, x, y, E_TRUE) != E_OK)
+ FAIL("Can't load!\n");
+ }
+ printf("\n");
+ }
+
+ /* ================================================================ */
+ printf("Polling shared memory.\n");
+ while(1) {
+
+ while(1) {
+ /* read polling flag */
+ if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
+ sizeof(uint32_t)) == E_ERR)
+ FAIL("Can't read pollflag!\n");
+
+ /* wait for something */
+ if(pollflag != POLL_BUSY) break;
+ }
+
+ /* finish if done */
+ if(pollflag == POLL_DONE) break;
+
+ /* read full shared memory */
+ if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+ FAIL("Can't read full shm!\n");
+
+ /* reset pollflag */
+ pollflag = 0;
+ if(e_write(&mem, 0, 0, (off_t)0, &pollflag,
+ sizeof(uint32_t)) == E_ERR) {
+ FAIL("Can't reset pollflag!\n");
+ }
+
+ /* print iteration */
+ printf("0x%08x\r", shm.iteration); fflush(stdout);
+
+ /* write data */
+ //write_populations(shm.lattice, shm.iteration);
+ write_image(shm.lattice, shm.iteration);
+ write_timers(shm.timers, shm.iteration);
+ }
+ /* ================================================================ */
+
+ if(e_free(&mem) != E_OK) FAIL("Can't free!\n");
+ if(e_close(&dev) != E_OK) FAIL("Can't close!\n");
+ if(e_finalize() != E_OK) FAIL("Can't finalize!\n");
+
+ fixsudo("populations.dat");
+ fixsudo("timers.dat");
+
+ printf("\nProgram finished successfully.\n");
+ printf("Convert ...\n");
+ write_animation();
+
+ return(0);
+}
+
--- /dev/null
+/* shared data types and external memory layout */
+#ifndef _SHARED_H_
+#define _SHARED_H_
+
+#include <stdint.h>
+
+/* preprocessor magic */
+#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
+#define UNUSED __attribute__((unused))
+#undef PACKED
+#define PACKED __attribute__((packed))
+#undef ALIGN
+#define ALIGN(X) __attribute__((aligned(X)))
+
+/* number of cores */
+#define CORES_X 4
+#define CORES_Y 4
+#define NUM_CORES (CORES_X * CORES_Y)
+
+/* size of per-core subgrid */
+#define BLOCK_X 26
+#define BLOCK_Y 26
+
+#define TIMERS 12
+
+/* pollflag values */
+#define POLL_BUSY 0x00
+#define POLL_READY 0x01
+#define POLL_DONE 0x02
+
+/* floating point type */
+typedef float FLOAT;
+
+/* node and block type (D2Q9) */
+typedef FLOAT d2q9_node_t[9];
+typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X];
+
+/* shared memory structure */
+typedef struct {
+ uint32_t pollflag;
+ uint32_t iteration;
+ uint32_t timers[CORES_Y][CORES_X][TIMERS];
+ d2q9_block_t lattice[CORES_Y][CORES_X];
+} ALIGN(8) shm_t;
+
+#endif /* _SHARED_H_ */
+
+++ /dev/null
-# Template Makefile for Epiphany
-
-# host toolchain
-HCC = gcc
-HCFLAGS = -O2 -std=c99 -I$(EPIPHANY_HOME)/tools/host/include -Wall
-HLFLAGS = -L$(EPIPHANY_HOME)/tools/host/lib -le-hal
-ECHO = /bin/echo -e
-
-# target toolchain
-ECC = e-gcc
-EOC = e-objcopy
-ECFLAGS = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall -fsingle-precision-constant -ffast-math
-ELFLAGS = -T$(EPIPHANY_HOME)/bsps/current/internal.ldf -le-lib
-EOFLAGS = -R .shared_dram
-
-# host application
-HAPP = $(DEST)/ep_main
-HOBJS = $(HDEST)/main.o $(HDEST)/data.o
-
-# epiphany applications
-EAPPS = $(DEST)/lb_2d.srec
-ECOMMON = $(EDEST)/d2q9.o
-
-# folders
-HSRC = hsrc
-HDEST = hobj
-ESRC = esrc
-EDEST = eobj
-DEST = bin
-
-# === Magic begins here ===================================================
-EOBJS = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON)
-EELFS = $(EAPPS:$(DEST)%srec=$(EDEST)%elf)
-
-.SECONDARY:
-.PHONY: all help host target folders run clean
-.NOTPARALLEL: clean
-
-# === Phony Rules =========================================================
-help:
- @$(ECHO)
- @$(ECHO) "Epiphany Makefile - Help"
- @$(ECHO) " help show this help"
- @$(ECHO) " host build host application ($(HAPP))"
- @$(ECHO) " target build epiphany applications ($(EAPPS))"
- @$(ECHO) " all build all"
- @$(ECHO) " run build all, then run host application"
- @$(ECHO) " clean remove applications and intermediate files"
- @$(ECHO)
-
-all: host target
-
-host: folders $(HAPP)
-
-target: folders $(EAPPS)
-
-folders: $(HDEST) $(EDEST) $(DEST)
-
-run: host target
- @$(ECHO) "\tRUN"
- @sudo LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) \
- EPIPHANY_HDF=$(EPIPHANY_HDF) \
- $(HAPP)
-
-clean:
- @$(ECHO) "\tCLEAN"
- @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS)
- @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \
- 2>/dev/null
-
-$(HDEST):
- @$(ECHO) "\t(HOST) MKDIR $(HDEST)"
- @mkdir -p $(HDEST)
-
-$(EDEST):
- @$(ECHO) "\t(HOST) MKDIR $(EDEST)"
- @mkdir -p $(EDEST)
-
-$(DEST):
- @$(ECHO) "\t(HOST) MKDIR $(DEST)"
- @mkdir -p $(DEST)
-
-# === Host Rules ==========================================================
-$(HAPP): $(HOBJS)
- @$(ECHO) "\t(HOST) LINK\t$@"
- @$(HCC) -o $@ $^ $(HLFLAGS)
-
-$(HDEST)/%.o: $(HSRC)/%.c
- @$(ECHO) "\t(HOST) CC\t$@"
- @$(HCC) $(HCFLAGS) -c -o $@ $^
-
-# === Target Rules ========================================================
-$(DEST)/%.srec: $(EDEST)/%.elf
- @$(ECHO) "\t(TARGET) OBJCOPY $@"
- @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@
-
-$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON)
- @$(ECHO) "\t(TARGET) LINK\t$@"
- @$(ECC) -o $@ $^ $(ELFLAGS)
-
-$(EDEST)/%.o: $(ESRC)/%.c
- @$(ECHO) "\t(TARGET) CC\t$@"
- @$(ECC) $(ECFLAGS) -c -o $@ $^
-# =========================================================================
-
+++ /dev/null
-/* D2Q9 lattice boltzmann functions */
-
-#include <e-lib.h>
-#include "../shared.h"
-#include "lb.h"
-
-/* velocities */
-static const int d2q9_v[9][2] = { { 0, 0},
- {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
- { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1},
-};
-
-/* weights */
-static const FLOAT d2q9_w[9] = { 4./9.,
- 1./36., 1./9., 1./36., 1./9.,
- 1./36., 1./9., 1./36., 1./9.,
-};
-
-void d2q9_init(d2q9_block_t block)
-{
- /* all with rho = 0.1 */
- for(int y = 0; y < BLOCK_Y; y++)
- for(int x = 0; x < BLOCK_X; x++)
- for(int q = 0; q < 9; q++)
- block[y][x][q] = 0.1 * d2q9_w[q];
-
- if(core == 0) {
- /* except here with 0.2 */
- for(int q = 0; q < 9; q++)
- block[0][0][q] = 0.2 * d2q9_w[q];
- }
-
- return;
-}
-
-void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
-{
- /* macroscopic */
- FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
- f[y][x][3] + f[y][x][4] + f[y][x][5] +
- f[y][x][6] + f[y][x][7] + f[y][x][8];
- FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
- f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
- FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
- f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
- FLOAT sqr = 1.5 * (ux*ux + uy*uy);
-
- /* update node */
- for(int q = 0; q < 9; q++) {
- FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
- FLOAT eq = rho * d2q9_w[q] *
- (1. + 3. * cu + 4.5 * cu*cu - sqr);
- f[y][x][q] *= (1.0 - omega);
- f[y][x][q] += omega * eq;
- }
-
- /* swap */
- for(int q = 1; q <= 4; q++) {
- FLOAT tmp = f[y][x][q];
- f[y][x][q] = f[y][x][q+4];
- f[y][x][q+4] = tmp;
- }
-}
-
-void d2q9_stream(d2q9_block_t f, int x, int y)
-{
- for(int q = 1; q <= 4; q++) {
- int next_row = row;
- int next_col = col;
- int next_x = x + d2q9_v[q][0];
- int next_y = y + d2q9_v[q][1];
-
- /* inner borders (extend) */
- if(next_x < 0) { next_col--; next_x += BLOCK_X; }
- else if(next_x >= BLOCK_X) { next_col++; next_x -= BLOCK_X; }
- if(next_y < 0) { next_row--; next_y += BLOCK_Y; }
- else if(next_y >= BLOCK_Y) { next_row++; next_y -= BLOCK_Y; }
-
-#if 0
- /* outer borders (wrap around) */
- if(next_col < 0) { next_col += CORES_X; }
- else if(next_col >= CORES_X) { next_col -= CORES_X; }
- if(next_row < 0) { next_row += CORES_Y; }
- else if(next_row >= CORES_Y) { next_row -= CORES_Y; }
-#else
- /* full bounce-back on all sides */
- if(next_col < 0) { return; }
- else if(next_col >= CORES_X) { return; }
- if(next_row < 0) { return; }
- else if(next_row >= CORES_Y) { return; }
-#endif
-
- /* f: local block, g: local or remote block */
- d2q9_block_t *g = (void*)f;
- if(next_row != row || next_col != col) {
- g = e_get_global_address(next_col, next_row, (void*)f);
- }
-
- /* stream/swap f and g */
- FLOAT tmp = f[y][x][q+4];
- f[y][x][q+4] = (*g)[next_y][next_x][q];
- (*g)[next_y][next_x][q] = tmp;
- }
-}
-
-void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
-{
- /* don't touch the border nodes */
- for(int x = 1; x < BLOCK_X-1; x++) {
- for(int y = 1; y < BLOCK_Y-1; y++) {
- /* macroscopic */
- FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
- f[y][x][3] + f[y][x][4] + f[y][x][5] +
- f[y][x][6] + f[y][x][7] + f[y][x][8];
- FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
- f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
- FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
- f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
- FLOAT sqr = 1.5 * (ux*ux + uy*uy);
-
- /* update node */
- for(int q = 0; q < 9; q++) {
- FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
- FLOAT eq = rho * d2q9_w[q] *
- (1. + 3. * cu + 4.5 * cu*cu - sqr);
- f[y][x][q] *= (1.0 - omega);
- f[y][x][q] += omega * eq;
- }
-
- /* stream */
- for(int q = 0; q <= 4; q++) {
- int next_x = x + d2q9_v[q][0];
- int next_y = y + d2q9_v[q][1];
-
- FLOAT tmp = f[y][x][q];
- f[y][x][q] = f[y][x][q+4];
- f[y][x][q+4] = f[next_y][next_x][q];
- f[next_y][next_x][q] = tmp;
- }
- }
- }
-}
-
+++ /dev/null
-/* lattice boltzmann functions */
-
-#include "../shared.h"
-
-/* core index */
-extern unsigned int row, col, core;
-
-/* D2Q9 functions */
-void d2q9_init (d2q9_block_t);
-void d2q9_collide (d2q9_block_t, int x, int y, FLOAT);
-void d2q9_stream (d2q9_block_t, int x, int y);
-void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT);
-
+++ /dev/null
-/* D2Q9 lattice boltzmann implementation */
-
-#include <e-lib.h>
-#include "../shared.h"
-
-#include <stdint.h>
-#include <string.h>
-
-#include "lb.h"
-
-/* shared memory overlay */
-volatile shm_t shm SECTION(".shared_dram");
-
-/* statically allocate dummy memory and local block overlay
- to prevent linker from putting stuff in banks 1..3 */
-static uint8_t dummy_bank1[8192] UNUSED SECTION(".data_bank1");
-static uint8_t dummy_bank2[8192] UNUSED SECTION(".data_bank2");
-static uint8_t dummy_bank3[8192] UNUSED SECTION(".data_bank3");
-static d2q9_block_t *block = (void*)0x2000;
-
-/* barrier structures */
-volatile e_barrier_t barriers[NUM_CORES];
- e_barrier_t *tgt_bars[NUM_CORES];
-
-/* global index variables */
-unsigned int row, col, core;
-
-void init(void)
-{
- /* compile-time checks */
- BUILD_BUG(BLOCK_X * BLOCK_Y * sizeof(d2q9_node_t) > 24*1024);
- BUILD_BUG(BLOCK_X < 3 || BLOCK_Y < 3);
- BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
- BUILD_BUG(CORES_X > 4 || CORES_Y > 4);
-
- /* core index */
- e_coords_from_coreid(e_get_coreid(), &col, &row);
- core = row * CORES_X + col;
-
- /* barrier initialization */
- e_barrier_init(barriers, tgt_bars);
-}
-
-#define READ_TIMER(X) \
- do { \
- clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
- e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
- e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
- } while(0);
-
-int main()
-{
- const FLOAT omega = 1.0;
- unsigned clocks[TIMERS] = {0};
-
- init();
- d2q9_init(*block);
-
- for(int i = 0; i < 500; i++) {
- READ_TIMER(0);
-
-#if 1
- /* collide all nodes */
- for(int y = 0; y < BLOCK_Y; y++)
- for(int x = 0; x < BLOCK_X; x++)
- d2q9_collide(*block, x, y, omega);
-
- /* synchronize */
- READ_TIMER(1);
- e_barrier(barriers, tgt_bars);
- READ_TIMER(2);
-
- /* stream all nodes */
- for(int y = 0; y < BLOCK_Y; y++)
- for(int x = 0; x < BLOCK_X; x++)
- d2q9_stream(*block, x, y);
- READ_TIMER(3);
-
-#else
- /* collide boundaries: top, bottom */
- for(int x = 0; x < BLOCK_X; x++) {
- d2q9_collide(*block, x, 0, omega);
- d2q9_collide(*block, x, BLOCK_Y-1, omega);
- }
- READ_TIMER(1);
-
- /* collide boundaries: left, right */
- for(int y = 1; y < BLOCK_Y-1; y++) {
- d2q9_collide(*block, 0, y, omega);
- d2q9_collide(*block, BLOCK_X-1, y, omega);
- }
-
- /* synchronize */
- READ_TIMER(2);
- e_barrier(barriers, tgt_bars);
- READ_TIMER(3);
-
- /* collide and stream the bulk */
- d2q9_collide_stream_bulk(*block, omega);
- READ_TIMER(4);
-
- /* stream the boundaries: top, bottom */
- for(int x = 0; x < BLOCK_X; x++) {
- d2q9_stream(*block, x, 0 );
- d2q9_stream(*block, x, BLOCK_Y-1);
- }
- READ_TIMER(5);
-
- /* stream the boundaries: left, right */
- for(int y = 1; y < BLOCK_Y-1; y++) {
- d2q9_stream(*block, 0, y);
- d2q9_stream(*block, BLOCK_X-1, y);
- }
- READ_TIMER(6);
-#endif
-
- /* copy data to shm if necessary */
- if(!(i%100)) {
- /* copy iteration, lattice and timers to shm */
- if(core == 0)
- shm.iteration = i;
-
- memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
-
- for(int i = 0; i < TIMERS; i++)
- shm.timers[row][col][i] = clocks[i];
-
- /* synchronize */
- e_barrier(barriers, tgt_bars);
-
- /* flag host and wait */
- if(core == 0) {
- shm.pollflag = POLL_READY;
- while(shm.pollflag == POLL_READY);
- }
- }
-
- /* synchronize */
- e_barrier(barriers, tgt_bars);
- READ_TIMER(TIMERS-1);
- }
-
- /* last iteration done: flag host and stop */
- if(core == 0) shm.pollflag = POLL_DONE;
- while(1) __asm__ volatile("idle");
-}
-
+++ /dev/null
-/* Helper Functions to handle data (2D) */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../shared.h"
-
-/* fix file owner if run with sudo */
-void fixsudo(const char *filename)
-{
- if(getenv("SUDO_UID") && getenv("SUDO_GID")) {
- int uid = atoi(getenv("SUDO_UID"));
- int gid = atoi(getenv("SUDO_GID"));
- if(chown(filename, uid, gid)) {
- perror("fixsudo/chown");
- return;
- }
- }
-}
-
-
-/* write a (semi-) human-readable dump of the lattice */
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
-{
- FILE *file = fopen("populations.dat", "a");
- if(!file) {
- perror("write_populations/fopen");
- return;
- }
-
- for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCK_Y; y++) {
- for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCK_X; x++) {
- fprintf(file, "%3d: [%3d,%3d]: ",
- iter,
- cx * BLOCK_X + x,
- cy * BLOCK_Y + y
- );
- for(int q = 0; q < 9; q++) {
- fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]);
- }
- fprintf(file, "\n");
- }
- }
- }
- }
- fprintf(file, "\n");
-
- /* close */
- fclose(file);
-
- return;
-}
-
-/* write an 8-bit grayscale, binary PPM image of the lattice */
-void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
-{
- char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
-
- /* open image file and write header */
- FILE *file = fopen(name, "wb");
- if(!file) {
- perror("write_image/fopen");
- return;
- }
- fprintf(file, "P5\n%d %d\n%d\n", CORES_X*BLOCK_X, CORES_Y*BLOCK_Y, 255);
-
- /* calculate all densities and remember min/max */
- FLOAT min = 1.0, max = 0;
- FLOAT rhos[CORES_Y][BLOCK_Y][CORES_X][BLOCK_X];
- for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCK_Y; y++) {
- for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCK_X; x++) {
- FLOAT rho = 0;
- for(int q = 0; q < 9; q++)
- rho += lattice[cy][cx][y][x][q];
- rhos[cy][y][cx][x] = rho;
-
- if(rho < min) min = rho;
- if(rho > max) max = rho;
- }
- }
- }
- }
-
- /* scale values and write them to the image */
- for(int cy = 0; cy < CORES_Y; cy++) {
- for(int y = 0; y < BLOCK_Y; y++) {
- for(int cx = 0; cx < CORES_X; cx++) {
- for(int x = 0; x < BLOCK_X; x++) {
- unsigned char gray;
- gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min));
- fwrite(&gray, 1, 1, file);
- }
- }
- }
- }
-
- /* close the file and chown if run with sudo */
- fclose(file);
- fixsudo(name);
-
- return;
-}
-
-/* convert image files to animated gif ./tmp/anim.gif */
-void write_animation(void)
-{
- int result;
-
- /* call imagemagick */
- result = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)result;
- fixsudo("./tmp/anim.gif");
-
- return;
-}
-
-/* write timer values */
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
-{
- FILE *file = fopen("timers.dat", "ab");
- if(!file) {
- perror("write_timers/fopen");
- return;
- }
-
- fprintf(file, "Timers: i=%d\n", iter);
- for(int y = 0; y < CORES_Y; y++) {
- for(int x = 0; x < CORES_X; x++) {
- fprintf(file, "[%d,%d]: ", x, y);
- for(int t = 0; t < TIMERS; t++) {
- fprintf(file, "%8d ", timers[y][x][t]);
- }
- fprintf(file, "\n");
- }
- }
-
- fclose(file);
-}
+++ /dev/null
-/* Host Application */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include <e-hal.h>
-#include "../shared.h"
-
-#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
-#define SHM_OFFSET 0x01000000
-
-/* helper functions */
-void fixsudo(const char *filename);
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_animation(void);
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
-
-/* globals */
-static shm_t shm = { 0 }; /* local shm copy */
-static uint32_t pollflag;
-
-int main()
-{
- char *filename = "bin/lb_2d.srec";
-
- /* remove old results */
- int dummy = system("rm -f ./tmp/i*.ppm ./tmp/anim.gif populations.dat timers.dat");
- (void)dummy;
-
- e_epiphany_t dev;
- e_mem_t mem;
-
- e_set_host_verbosity(H_D0);
- e_set_loader_verbosity(L_D0);
-
- /* initialize workgroup, allocate and clear shared memory */
- if(e_init(NULL) != E_OK)
- FAIL("Can't init!\n");
- e_reset_system();
- if(e_open(&dev, 0, 0, CORES_X, CORES_Y) != E_OK)
- FAIL("Can't open!\n");
- if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK)
- FAIL("Can't alloc!\n");
- if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
- FAIL("Can't clear shm!\n");
-
- /* load programs */
- printf("Starting cores:\n");
- for(int y = 0; y < CORES_Y; y++) {
- for(int x = 0; x < CORES_X; x++) {
- printf("(%02d,%02d) ", x, y);
- if(e_load(filename, &dev, x, y, E_TRUE) != E_OK)
- FAIL("Can't load!\n");
- }
- printf("\n");
- }
-
- /* ================================================================ */
- printf("Polling shared memory.\n");
- while(1) {
-
- while(1) {
- /* read polling flag */
- if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
- sizeof(uint32_t)) == E_ERR)
- FAIL("Can't read pollflag!\n");
-
- /* wait for something */
- if(pollflag != POLL_BUSY) break;
- }
-
- /* finish if done */
- if(pollflag == POLL_DONE) break;
-
- /* read full shared memory */
- if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
- FAIL("Can't read full shm!\n");
-
- /* reset pollflag */
- pollflag = 0;
- if(e_write(&mem, 0, 0, (off_t)0, &pollflag,
- sizeof(uint32_t)) == E_ERR) {
- FAIL("Can't reset pollflag!\n");
- }
-
- /* print iteration */
- printf("0x%08x\r", shm.iteration); fflush(stdout);
-
- /* write data */
- //write_populations(shm.lattice, shm.iteration);
- write_image(shm.lattice, shm.iteration);
- write_timers(shm.timers, shm.iteration);
- }
- /* ================================================================ */
-
- if(e_free(&mem) != E_OK) FAIL("Can't free!\n");
- if(e_close(&dev) != E_OK) FAIL("Can't close!\n");
- if(e_finalize() != E_OK) FAIL("Can't finalize!\n");
-
- fixsudo("populations.dat");
- fixsudo("timers.dat");
-
- printf("\nProgram finished successfully.\n");
- printf("Convert ...\n");
- write_animation();
-
- return(0);
-}
-
+++ /dev/null
-/* shared data types and external memory layout */
-#ifndef _SHARED_H_
-#define _SHARED_H_
-
-#include <stdint.h>
-
-/* preprocessor magic */
-#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
-#define UNUSED __attribute__((unused))
-#undef PACKED
-#define PACKED __attribute__((packed))
-#undef ALIGN
-#define ALIGN(X) __attribute__((aligned(X)))
-
-/* number of cores */
-#define CORES_X 4
-#define CORES_Y 4
-#define NUM_CORES (CORES_X * CORES_Y)
-
-/* size of per-core subgrid */
-#define BLOCK_X 26
-#define BLOCK_Y 26
-
-#define TIMERS 12
-
-/* pollflag values */
-#define POLL_BUSY 0x00
-#define POLL_READY 0x01
-#define POLL_DONE 0x02
-
-/* floating point type */
-typedef float FLOAT;
-
-/* node and block type (D2Q9) */
-typedef FLOAT d2q9_node_t[9];
-typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X];
-
-/* shared memory structure */
-typedef struct {
- uint32_t pollflag;
- uint32_t iteration;
- uint32_t timers[CORES_Y][CORES_X][TIMERS];
- d2q9_block_t lattice[CORES_Y][CORES_X];
-} ALIGN(8) shm_t;
-
-#endif /* _SHARED_H_ */
-