From: Sebastian Date: Wed, 13 Aug 2014 14:15:43 +0000 (+0000) Subject: rename folder X-Git-Url: http://sraa.de/git/%3CTMPL_VAR%20NAME=SCRIPT%3E?a=commitdiff_plain;h=a013d0af9bf0e61a4f10601ae8bd34a83ecb4f75;p=lattice-boltzmann-epiphany.git rename folder The main code and the underlying data structures are too different to bother building a 2D and a 3D simulation from the same code base. --- diff --git a/d2q9/Makefile b/d2q9/Makefile new file mode 100644 index 0000000..e4a6bb8 --- /dev/null +++ b/d2q9/Makefile @@ -0,0 +1,105 @@ +# Template Makefile for Epiphany + +# host toolchain +HCC = gcc +HCFLAGS = -O2 -std=c99 -I$(EPIPHANY_HOME)/tools/host/include -Wall +HLFLAGS = -L$(EPIPHANY_HOME)/tools/host/lib -le-hal +ECHO = /bin/echo -e + +# target toolchain +ECC = e-gcc +EOC = e-objcopy +ECFLAGS = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall -fsingle-precision-constant -ffast-math +ELFLAGS = -T$(EPIPHANY_HOME)/bsps/current/internal.ldf -le-lib +EOFLAGS = -R .shared_dram + +# host application +HAPP = $(DEST)/ep_main +HOBJS = $(HDEST)/main.o $(HDEST)/data.o + +# epiphany applications +EAPPS = $(DEST)/main.srec +ECOMMON = $(EDEST)/d2q9.o + +# folders +HSRC = hsrc +HDEST = hobj +ESRC = esrc +EDEST = eobj +DEST = bin + +# === Magic begins here =================================================== +EOBJS = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON) +EELFS = $(EAPPS:$(DEST)%srec=$(EDEST)%elf) + +.SECONDARY: +.PHONY: all help host target folders run clean +.NOTPARALLEL: clean + +# === Phony Rules ========================================================= +help: + @$(ECHO) + @$(ECHO) "Epiphany Makefile - Help" + @$(ECHO) " help show this help" + @$(ECHO) " host build host application ($(HAPP))" + @$(ECHO) " target build epiphany applications ($(EAPPS))" + @$(ECHO) " all build all" + @$(ECHO) " run build all, then run host application" + @$(ECHO) " clean remove applications and intermediate files" + @$(ECHO) + +all: host target + +host: folders $(HAPP) + +target: folders $(EAPPS) + +folders: $(HDEST) $(EDEST) $(DEST) + +run: host target + @$(ECHO) "\tRUN" + @sudo LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) \ + EPIPHANY_HDF=$(EPIPHANY_HDF) \ + $(HAPP) + +clean: + @$(ECHO) "\tCLEAN" + @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS) + @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \ + 2>/dev/null + +$(HDEST): + @$(ECHO) "\t(HOST) MKDIR $(HDEST)" + @mkdir -p $(HDEST) + +$(EDEST): + @$(ECHO) "\t(HOST) MKDIR $(EDEST)" + @mkdir -p $(EDEST) + +$(DEST): + @$(ECHO) "\t(HOST) MKDIR $(DEST)" + @mkdir -p $(DEST) + +# === Host Rules ========================================================== +$(HAPP): $(HOBJS) + @$(ECHO) "\t(HOST) LINK\t$@" + @$(HCC) -o $@ $^ $(HLFLAGS) + +$(HDEST)/%.o: $(HSRC)/%.c + @$(ECHO) "\t(HOST) CC\t$@" + @$(HCC) $(HCFLAGS) -c -o $@ $^ + +# === Target Rules ======================================================== +$(DEST)/%.srec: $(EDEST)/%.elf + @$(ECHO) "\t(TARGET) OBJCOPY $@" + @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@ + +$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON) + @$(ECHO) "\t(TARGET) LINK\t$@" + @$(ECC) -o $@ $^ $(ELFLAGS) + +$(EDEST)/%.o: $(ESRC)/%.c + @$(ECHO) "\t(TARGET) CC\t$@" + @$(ECC) $(ECFLAGS) -c -o $@ $^ +# ========================================================================= + diff --git a/d2q9/esrc/d2q9.c b/d2q9/esrc/d2q9.c new file mode 100644 index 0000000..9b3b94a --- /dev/null +++ b/d2q9/esrc/d2q9.c @@ -0,0 +1,143 @@ +/* D2Q9 lattice boltzmann functions */ + +#include +#include "../shared.h" +#include "d2q9.h" + +/* velocities */ +static const int d2q9_v[9][2] = { { 0, 0}, + {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1}, + { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1}, +}; + +/* weights */ +static const FLOAT d2q9_w[9] = { 4./9., + 1./36., 1./9., 1./36., 1./9., + 1./36., 1./9., 1./36., 1./9., +}; + +void d2q9_init(d2q9_block_t block) +{ + /* all with rho = 0.1 */ + for(int y = 0; y < BLOCK_Y; y++) + for(int x = 0; x < BLOCK_X; x++) + for(int q = 0; q < 9; q++) + block[y][x][q] = 0.1 * d2q9_w[q]; + + if(core == 0) { + /* except here with 0.2 */ + for(int q = 0; q < 9; q++) + block[0][0][q] = 0.2 * d2q9_w[q]; + } + + return; +} + +void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega) +{ + /* macroscopic */ + FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] + + f[y][x][3] + f[y][x][4] + f[y][x][5] + + f[y][x][6] + f[y][x][7] + f[y][x][8]; + FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] - + f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho; + FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] - + f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho; + FLOAT sqr = 1.5 * (ux*ux + uy*uy); + + /* update node */ + for(int q = 0; q < 9; q++) { + FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; + FLOAT eq = rho * d2q9_w[q] * + (1. + 3. * cu + 4.5 * cu*cu - sqr); + f[y][x][q] *= (1.0 - omega); + f[y][x][q] += omega * eq; + } + + /* swap */ + for(int q = 1; q <= 4; q++) { + FLOAT tmp = f[y][x][q]; + f[y][x][q] = f[y][x][q+4]; + f[y][x][q+4] = tmp; + } +} + +void d2q9_stream(d2q9_block_t f, int x, int y) +{ + for(int q = 1; q <= 4; q++) { + int next_row = row; + int next_col = col; + int next_x = x + d2q9_v[q][0]; + int next_y = y + d2q9_v[q][1]; + + /* inner borders (extend) */ + if(next_x < 0) { next_col--; next_x += BLOCK_X; } + else if(next_x >= BLOCK_X) { next_col++; next_x -= BLOCK_X; } + if(next_y < 0) { next_row--; next_y += BLOCK_Y; } + else if(next_y >= BLOCK_Y) { next_row++; next_y -= BLOCK_Y; } + +#if 0 + /* outer borders (wrap around) */ + if(next_col < 0) { next_col += CORES_X; } + else if(next_col >= CORES_X) { next_col -= CORES_X; } + if(next_row < 0) { next_row += CORES_Y; } + else if(next_row >= CORES_Y) { next_row -= CORES_Y; } +#else + /* full bounce-back on all sides */ + if(next_col < 0) { return; } + else if(next_col >= CORES_X) { return; } + if(next_row < 0) { return; } + else if(next_row >= CORES_Y) { return; } +#endif + + /* f: local block, g: local or remote block */ + d2q9_block_t *g = (void*)f; + if(next_row != row || next_col != col) { + g = e_get_global_address(next_col, next_row, (void*)f); + } + + /* stream/swap f and g */ + FLOAT tmp = f[y][x][q+4]; + f[y][x][q+4] = (*g)[next_y][next_x][q]; + (*g)[next_y][next_x][q] = tmp; + } +} + +void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega) +{ + /* don't touch the border nodes */ + for(int x = 1; x < BLOCK_X-1; x++) { + for(int y = 1; y < BLOCK_Y-1; y++) { + /* macroscopic */ + FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] + + f[y][x][3] + f[y][x][4] + f[y][x][5] + + f[y][x][6] + f[y][x][7] + f[y][x][8]; + FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] - + f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho; + FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] - + f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho; + FLOAT sqr = 1.5 * (ux*ux + uy*uy); + + /* update node */ + for(int q = 0; q < 9; q++) { + FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; + FLOAT eq = rho * d2q9_w[q] * + (1. + 3. * cu + 4.5 * cu*cu - sqr); + f[y][x][q] *= (1.0 - omega); + f[y][x][q] += omega * eq; + } + + /* stream */ + for(int q = 0; q <= 4; q++) { + int next_x = x + d2q9_v[q][0]; + int next_y = y + d2q9_v[q][1]; + + FLOAT tmp = f[y][x][q]; + f[y][x][q] = f[y][x][q+4]; + f[y][x][q+4] = f[next_y][next_x][q]; + f[next_y][next_x][q] = tmp; + } + } + } +} + diff --git a/d2q9/esrc/d2q9.h b/d2q9/esrc/d2q9.h new file mode 100644 index 0000000..a72b39b --- /dev/null +++ b/d2q9/esrc/d2q9.h @@ -0,0 +1,13 @@ +/* lattice boltzmann functions */ + +#include "../shared.h" + +/* core index */ +extern unsigned int row, col, core; + +/* D2Q9 functions */ +void d2q9_init (d2q9_block_t); +void d2q9_collide (d2q9_block_t, int x, int y, FLOAT); +void d2q9_stream (d2q9_block_t, int x, int y); +void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT); + diff --git a/d2q9/esrc/main.c b/d2q9/esrc/main.c new file mode 100644 index 0000000..fbd21cc --- /dev/null +++ b/d2q9/esrc/main.c @@ -0,0 +1,147 @@ +/* D2Q9 lattice boltzmann implementation */ + +#include +#include "../shared.h" + +#include +#include + +#include "d2q9.h" + +/* shared memory overlay */ +volatile shm_t shm SECTION(".shared_dram"); + +/* statically allocate dummy memory and local block overlay + to prevent linker from putting stuff in banks 1..3 */ +static uint8_t dummy_bank1[8192] UNUSED SECTION(".data_bank1"); +static uint8_t dummy_bank2[8192] UNUSED SECTION(".data_bank2"); +static uint8_t dummy_bank3[8192] UNUSED SECTION(".data_bank3"); +static d2q9_block_t *block = (void*)0x2000; + +/* barrier structures */ +volatile e_barrier_t barriers[NUM_CORES]; + e_barrier_t *tgt_bars[NUM_CORES]; + +/* global index variables */ +unsigned int row, col, core; + +void init(void) +{ + /* compile-time checks */ + BUILD_BUG(BLOCK_X * BLOCK_Y * sizeof(d2q9_node_t) > 24*1024); + BUILD_BUG(BLOCK_X < 3 || BLOCK_Y < 3); + BUILD_BUG(CORES_X < 1 || CORES_Y < 1); + BUILD_BUG(CORES_X > 4 || CORES_Y > 4); + + /* core index */ + e_coords_from_coreid(e_get_coreid(), &col, &row); + core = row * CORES_X + col; + + /* barrier initialization */ + e_barrier_init(barriers, tgt_bars); +} + +#define READ_TIMER(X) \ + do { \ + clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \ + e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \ + e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \ + } while(0); + +int main() +{ + const FLOAT omega = 1.0; + unsigned clocks[TIMERS] = {0}; + + init(); + d2q9_init(*block); + + for(int i = 0; i < 500; i++) { + READ_TIMER(0); + +#if 1 + /* collide all nodes */ + for(int y = 0; y < BLOCK_Y; y++) + for(int x = 0; x < BLOCK_X; x++) + d2q9_collide(*block, x, y, omega); + + /* synchronize */ + READ_TIMER(1); + e_barrier(barriers, tgt_bars); + READ_TIMER(2); + + /* stream all nodes */ + for(int y = 0; y < BLOCK_Y; y++) + for(int x = 0; x < BLOCK_X; x++) + d2q9_stream(*block, x, y); + READ_TIMER(3); + +#else + /* collide boundaries: top, bottom */ + for(int x = 0; x < BLOCK_X; x++) { + d2q9_collide(*block, x, 0, omega); + d2q9_collide(*block, x, BLOCK_Y-1, omega); + } + READ_TIMER(1); + + /* collide boundaries: left, right */ + for(int y = 1; y < BLOCK_Y-1; y++) { + d2q9_collide(*block, 0, y, omega); + d2q9_collide(*block, BLOCK_X-1, y, omega); + } + + /* synchronize */ + READ_TIMER(2); + e_barrier(barriers, tgt_bars); + READ_TIMER(3); + + /* collide and stream the bulk */ + d2q9_collide_stream_bulk(*block, omega); + READ_TIMER(4); + + /* stream the boundaries: top, bottom */ + for(int x = 0; x < BLOCK_X; x++) { + d2q9_stream(*block, x, 0 ); + d2q9_stream(*block, x, BLOCK_Y-1); + } + READ_TIMER(5); + + /* stream the boundaries: left, right */ + for(int y = 1; y < BLOCK_Y-1; y++) { + d2q9_stream(*block, 0, y); + d2q9_stream(*block, BLOCK_X-1, y); + } + READ_TIMER(6); +#endif + + /* copy data to shm if necessary */ + if(!(i%100)) { + /* copy iteration, lattice and timers to shm */ + if(core == 0) + shm.iteration = i; + + memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t)); + + for(int i = 0; i < TIMERS; i++) + shm.timers[row][col][i] = clocks[i]; + + /* synchronize */ + e_barrier(barriers, tgt_bars); + + /* flag host and wait */ + if(core == 0) { + shm.pollflag = POLL_READY; + while(shm.pollflag == POLL_READY); + } + } + + /* synchronize */ + e_barrier(barriers, tgt_bars); + READ_TIMER(TIMERS-1); + } + + /* last iteration done: flag host and stop */ + if(core == 0) shm.pollflag = POLL_DONE; + while(1) __asm__ volatile("idle"); +} + diff --git a/d2q9/hsrc/data.c b/d2q9/hsrc/data.c new file mode 100644 index 0000000..675fe50 --- /dev/null +++ b/d2q9/hsrc/data.c @@ -0,0 +1,142 @@ +/* Helper Functions to handle data (2D) */ + +#include +#include +#include + +#include "../shared.h" + +/* fix file owner if run with sudo */ +void fixsudo(const char *filename) +{ + if(getenv("SUDO_UID") && getenv("SUDO_GID")) { + int uid = atoi(getenv("SUDO_UID")); + int gid = atoi(getenv("SUDO_GID")); + if(chown(filename, uid, gid)) { + perror("fixsudo/chown"); + return; + } + } +} + + +/* write a (semi-) human-readable dump of the lattice */ +void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter) +{ + FILE *file = fopen("populations.dat", "a"); + if(!file) { + perror("write_populations/fopen"); + return; + } + + for(int cy = 0; cy < CORES_Y; cy++) { + for(int y = 0; y < BLOCK_Y; y++) { + for(int cx = 0; cx < CORES_X; cx++) { + for(int x = 0; x < BLOCK_X; x++) { + fprintf(file, "%3d: [%3d,%3d]: ", + iter, + cx * BLOCK_X + x, + cy * BLOCK_Y + y + ); + for(int q = 0; q < 9; q++) { + fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]); + } + fprintf(file, "\n"); + } + } + } + } + fprintf(file, "\n"); + + /* close */ + fclose(file); + + return; +} + +/* write an 8-bit grayscale, binary PPM image of the lattice */ +void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter) +{ + char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter); + + /* open image file and write header */ + FILE *file = fopen(name, "wb"); + if(!file) { + perror("write_image/fopen"); + return; + } + fprintf(file, "P5\n%d %d\n%d\n", CORES_X*BLOCK_X, CORES_Y*BLOCK_Y, 255); + + /* calculate all densities and remember min/max */ + FLOAT min = 1.0, max = 0; + FLOAT rhos[CORES_Y][BLOCK_Y][CORES_X][BLOCK_X]; + for(int cy = 0; cy < CORES_Y; cy++) { + for(int y = 0; y < BLOCK_Y; y++) { + for(int cx = 0; cx < CORES_X; cx++) { + for(int x = 0; x < BLOCK_X; x++) { + FLOAT rho = 0; + for(int q = 0; q < 9; q++) + rho += lattice[cy][cx][y][x][q]; + rhos[cy][y][cx][x] = rho; + + if(rho < min) min = rho; + if(rho > max) max = rho; + } + } + } + } + + /* scale values and write them to the image */ + for(int cy = 0; cy < CORES_Y; cy++) { + for(int y = 0; y < BLOCK_Y; y++) { + for(int cx = 0; cx < CORES_X; cx++) { + for(int x = 0; x < BLOCK_X; x++) { + unsigned char gray; + gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min)); + fwrite(&gray, 1, 1, file); + } + } + } + } + + /* close the file and chown if run with sudo */ + fclose(file); + fixsudo(name); + + return; +} + +/* convert image files to animated gif ./tmp/anim.gif */ +void write_animation(void) +{ + int result; + + /* call imagemagick */ + result = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)result; + fixsudo("./tmp/anim.gif"); + + return; +} + +/* write timer values */ +void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter) +{ + FILE *file = fopen("timers.dat", "ab"); + if(!file) { + perror("write_timers/fopen"); + return; + } + + fprintf(file, "Timers: i=%d\n", iter); + for(int y = 0; y < CORES_Y; y++) { + for(int x = 0; x < CORES_X; x++) { + fprintf(file, "[%d,%d]: ", x, y); + for(int t = 0; t < TIMERS; t++) { + fprintf(file, "%8d ", timers[y][x][t]); + } + fprintf(file, "\n"); + } + } + + fclose(file); +} diff --git a/d2q9/hsrc/main.c b/d2q9/hsrc/main.c new file mode 100644 index 0000000..48ec00a --- /dev/null +++ b/d2q9/hsrc/main.c @@ -0,0 +1,113 @@ +/* Host Application */ + +#include +#include +#include +#include +#include + +#include +#include "../shared.h" + +#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); } +#define SHM_OFFSET 0x01000000 + +/* helper functions */ +void fixsudo(const char *filename); +void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter); +void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter); +void write_animation(void); +void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter); + +/* globals */ +static shm_t shm = { 0 }; /* local shm copy */ +static uint32_t pollflag; + +int main() +{ + char *filename = "bin/main.srec"; + + /* remove old results */ + int dummy = system("rm -f ./tmp/i*.ppm ./tmp/anim.gif populations.dat timers.dat"); + (void)dummy; + + e_epiphany_t dev; + e_mem_t mem; + + e_set_host_verbosity(H_D0); + e_set_loader_verbosity(L_D0); + + /* initialize workgroup, allocate and clear shared memory */ + if(e_init(NULL) != E_OK) + FAIL("Can't init!\n"); + e_reset_system(); + if(e_open(&dev, 0, 0, CORES_X, CORES_Y) != E_OK) + FAIL("Can't open!\n"); + if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK) + FAIL("Can't alloc!\n"); + if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) + FAIL("Can't clear shm!\n"); + + /* load programs */ + printf("Starting cores:\n"); + for(int y = 0; y < CORES_Y; y++) { + for(int x = 0; x < CORES_X; x++) { + printf("(%02d,%02d) ", x, y); + if(e_load(filename, &dev, x, y, E_TRUE) != E_OK) + FAIL("Can't load!\n"); + } + printf("\n"); + } + + /* ================================================================ */ + printf("Polling shared memory.\n"); + while(1) { + + while(1) { + /* read polling flag */ + if(e_read(&mem, 0, 0, (off_t)0, &pollflag, + sizeof(uint32_t)) == E_ERR) + FAIL("Can't read pollflag!\n"); + + /* wait for something */ + if(pollflag != POLL_BUSY) break; + } + + /* finish if done */ + if(pollflag == POLL_DONE) break; + + /* read full shared memory */ + if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) + FAIL("Can't read full shm!\n"); + + /* reset pollflag */ + pollflag = 0; + if(e_write(&mem, 0, 0, (off_t)0, &pollflag, + sizeof(uint32_t)) == E_ERR) { + FAIL("Can't reset pollflag!\n"); + } + + /* print iteration */ + printf("0x%08x\r", shm.iteration); fflush(stdout); + + /* write data */ + //write_populations(shm.lattice, shm.iteration); + write_image(shm.lattice, shm.iteration); + write_timers(shm.timers, shm.iteration); + } + /* ================================================================ */ + + if(e_free(&mem) != E_OK) FAIL("Can't free!\n"); + if(e_close(&dev) != E_OK) FAIL("Can't close!\n"); + if(e_finalize() != E_OK) FAIL("Can't finalize!\n"); + + fixsudo("populations.dat"); + fixsudo("timers.dat"); + + printf("\nProgram finished successfully.\n"); + printf("Convert ...\n"); + write_animation(); + + return(0); +} + diff --git a/d2q9/shared.h b/d2q9/shared.h new file mode 100644 index 0000000..607a3e1 --- /dev/null +++ b/d2q9/shared.h @@ -0,0 +1,47 @@ +/* shared data types and external memory layout */ +#ifndef _SHARED_H_ +#define _SHARED_H_ + +#include + +/* preprocessor magic */ +#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0); +#define UNUSED __attribute__((unused)) +#undef PACKED +#define PACKED __attribute__((packed)) +#undef ALIGN +#define ALIGN(X) __attribute__((aligned(X))) + +/* number of cores */ +#define CORES_X 4 +#define CORES_Y 4 +#define NUM_CORES (CORES_X * CORES_Y) + +/* size of per-core subgrid */ +#define BLOCK_X 26 +#define BLOCK_Y 26 + +#define TIMERS 12 + +/* pollflag values */ +#define POLL_BUSY 0x00 +#define POLL_READY 0x01 +#define POLL_DONE 0x02 + +/* floating point type */ +typedef float FLOAT; + +/* node and block type (D2Q9) */ +typedef FLOAT d2q9_node_t[9]; +typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X]; + +/* shared memory structure */ +typedef struct { + uint32_t pollflag; + uint32_t iteration; + uint32_t timers[CORES_Y][CORES_X][TIMERS]; + d2q9_block_t lattice[CORES_Y][CORES_X]; +} ALIGN(8) shm_t; + +#endif /* _SHARED_H_ */ + diff --git a/lb/Makefile b/lb/Makefile deleted file mode 100644 index c18c0d6..0000000 --- a/lb/Makefile +++ /dev/null @@ -1,105 +0,0 @@ -# Template Makefile for Epiphany - -# host toolchain -HCC = gcc -HCFLAGS = -O2 -std=c99 -I$(EPIPHANY_HOME)/tools/host/include -Wall -HLFLAGS = -L$(EPIPHANY_HOME)/tools/host/lib -le-hal -ECHO = /bin/echo -e - -# target toolchain -ECC = e-gcc -EOC = e-objcopy -ECFLAGS = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall -fsingle-precision-constant -ffast-math -ELFLAGS = -T$(EPIPHANY_HOME)/bsps/current/internal.ldf -le-lib -EOFLAGS = -R .shared_dram - -# host application -HAPP = $(DEST)/ep_main -HOBJS = $(HDEST)/main.o $(HDEST)/data.o - -# epiphany applications -EAPPS = $(DEST)/lb_2d.srec -ECOMMON = $(EDEST)/d2q9.o - -# folders -HSRC = hsrc -HDEST = hobj -ESRC = esrc -EDEST = eobj -DEST = bin - -# === Magic begins here =================================================== -EOBJS = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON) -EELFS = $(EAPPS:$(DEST)%srec=$(EDEST)%elf) - -.SECONDARY: -.PHONY: all help host target folders run clean -.NOTPARALLEL: clean - -# === Phony Rules ========================================================= -help: - @$(ECHO) - @$(ECHO) "Epiphany Makefile - Help" - @$(ECHO) " help show this help" - @$(ECHO) " host build host application ($(HAPP))" - @$(ECHO) " target build epiphany applications ($(EAPPS))" - @$(ECHO) " all build all" - @$(ECHO) " run build all, then run host application" - @$(ECHO) " clean remove applications and intermediate files" - @$(ECHO) - -all: host target - -host: folders $(HAPP) - -target: folders $(EAPPS) - -folders: $(HDEST) $(EDEST) $(DEST) - -run: host target - @$(ECHO) "\tRUN" - @sudo LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) \ - EPIPHANY_HDF=$(EPIPHANY_HDF) \ - $(HAPP) - -clean: - @$(ECHO) "\tCLEAN" - @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS) - @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \ - 2>/dev/null - -$(HDEST): - @$(ECHO) "\t(HOST) MKDIR $(HDEST)" - @mkdir -p $(HDEST) - -$(EDEST): - @$(ECHO) "\t(HOST) MKDIR $(EDEST)" - @mkdir -p $(EDEST) - -$(DEST): - @$(ECHO) "\t(HOST) MKDIR $(DEST)" - @mkdir -p $(DEST) - -# === Host Rules ========================================================== -$(HAPP): $(HOBJS) - @$(ECHO) "\t(HOST) LINK\t$@" - @$(HCC) -o $@ $^ $(HLFLAGS) - -$(HDEST)/%.o: $(HSRC)/%.c - @$(ECHO) "\t(HOST) CC\t$@" - @$(HCC) $(HCFLAGS) -c -o $@ $^ - -# === Target Rules ======================================================== -$(DEST)/%.srec: $(EDEST)/%.elf - @$(ECHO) "\t(TARGET) OBJCOPY $@" - @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@ - -$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON) - @$(ECHO) "\t(TARGET) LINK\t$@" - @$(ECC) -o $@ $^ $(ELFLAGS) - -$(EDEST)/%.o: $(ESRC)/%.c - @$(ECHO) "\t(TARGET) CC\t$@" - @$(ECC) $(ECFLAGS) -c -o $@ $^ -# ========================================================================= - diff --git a/lb/esrc/d2q9.c b/lb/esrc/d2q9.c deleted file mode 100644 index 7a9d0b5..0000000 --- a/lb/esrc/d2q9.c +++ /dev/null @@ -1,143 +0,0 @@ -/* D2Q9 lattice boltzmann functions */ - -#include -#include "../shared.h" -#include "lb.h" - -/* velocities */ -static const int d2q9_v[9][2] = { { 0, 0}, - {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1}, - { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1}, -}; - -/* weights */ -static const FLOAT d2q9_w[9] = { 4./9., - 1./36., 1./9., 1./36., 1./9., - 1./36., 1./9., 1./36., 1./9., -}; - -void d2q9_init(d2q9_block_t block) -{ - /* all with rho = 0.1 */ - for(int y = 0; y < BLOCK_Y; y++) - for(int x = 0; x < BLOCK_X; x++) - for(int q = 0; q < 9; q++) - block[y][x][q] = 0.1 * d2q9_w[q]; - - if(core == 0) { - /* except here with 0.2 */ - for(int q = 0; q < 9; q++) - block[0][0][q] = 0.2 * d2q9_w[q]; - } - - return; -} - -void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega) -{ - /* macroscopic */ - FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] + - f[y][x][3] + f[y][x][4] + f[y][x][5] + - f[y][x][6] + f[y][x][7] + f[y][x][8]; - FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] - - f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho; - FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] - - f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho; - FLOAT sqr = 1.5 * (ux*ux + uy*uy); - - /* update node */ - for(int q = 0; q < 9; q++) { - FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; - FLOAT eq = rho * d2q9_w[q] * - (1. + 3. * cu + 4.5 * cu*cu - sqr); - f[y][x][q] *= (1.0 - omega); - f[y][x][q] += omega * eq; - } - - /* swap */ - for(int q = 1; q <= 4; q++) { - FLOAT tmp = f[y][x][q]; - f[y][x][q] = f[y][x][q+4]; - f[y][x][q+4] = tmp; - } -} - -void d2q9_stream(d2q9_block_t f, int x, int y) -{ - for(int q = 1; q <= 4; q++) { - int next_row = row; - int next_col = col; - int next_x = x + d2q9_v[q][0]; - int next_y = y + d2q9_v[q][1]; - - /* inner borders (extend) */ - if(next_x < 0) { next_col--; next_x += BLOCK_X; } - else if(next_x >= BLOCK_X) { next_col++; next_x -= BLOCK_X; } - if(next_y < 0) { next_row--; next_y += BLOCK_Y; } - else if(next_y >= BLOCK_Y) { next_row++; next_y -= BLOCK_Y; } - -#if 0 - /* outer borders (wrap around) */ - if(next_col < 0) { next_col += CORES_X; } - else if(next_col >= CORES_X) { next_col -= CORES_X; } - if(next_row < 0) { next_row += CORES_Y; } - else if(next_row >= CORES_Y) { next_row -= CORES_Y; } -#else - /* full bounce-back on all sides */ - if(next_col < 0) { return; } - else if(next_col >= CORES_X) { return; } - if(next_row < 0) { return; } - else if(next_row >= CORES_Y) { return; } -#endif - - /* f: local block, g: local or remote block */ - d2q9_block_t *g = (void*)f; - if(next_row != row || next_col != col) { - g = e_get_global_address(next_col, next_row, (void*)f); - } - - /* stream/swap f and g */ - FLOAT tmp = f[y][x][q+4]; - f[y][x][q+4] = (*g)[next_y][next_x][q]; - (*g)[next_y][next_x][q] = tmp; - } -} - -void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega) -{ - /* don't touch the border nodes */ - for(int x = 1; x < BLOCK_X-1; x++) { - for(int y = 1; y < BLOCK_Y-1; y++) { - /* macroscopic */ - FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] + - f[y][x][3] + f[y][x][4] + f[y][x][5] + - f[y][x][6] + f[y][x][7] + f[y][x][8]; - FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] - - f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho; - FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] - - f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho; - FLOAT sqr = 1.5 * (ux*ux + uy*uy); - - /* update node */ - for(int q = 0; q < 9; q++) { - FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; - FLOAT eq = rho * d2q9_w[q] * - (1. + 3. * cu + 4.5 * cu*cu - sqr); - f[y][x][q] *= (1.0 - omega); - f[y][x][q] += omega * eq; - } - - /* stream */ - for(int q = 0; q <= 4; q++) { - int next_x = x + d2q9_v[q][0]; - int next_y = y + d2q9_v[q][1]; - - FLOAT tmp = f[y][x][q]; - f[y][x][q] = f[y][x][q+4]; - f[y][x][q+4] = f[next_y][next_x][q]; - f[next_y][next_x][q] = tmp; - } - } - } -} - diff --git a/lb/esrc/lb.h b/lb/esrc/lb.h deleted file mode 100644 index a72b39b..0000000 --- a/lb/esrc/lb.h +++ /dev/null @@ -1,13 +0,0 @@ -/* lattice boltzmann functions */ - -#include "../shared.h" - -/* core index */ -extern unsigned int row, col, core; - -/* D2Q9 functions */ -void d2q9_init (d2q9_block_t); -void d2q9_collide (d2q9_block_t, int x, int y, FLOAT); -void d2q9_stream (d2q9_block_t, int x, int y); -void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT); - diff --git a/lb/esrc/lb_2d.c b/lb/esrc/lb_2d.c deleted file mode 100644 index c254298..0000000 --- a/lb/esrc/lb_2d.c +++ /dev/null @@ -1,147 +0,0 @@ -/* D2Q9 lattice boltzmann implementation */ - -#include -#include "../shared.h" - -#include -#include - -#include "lb.h" - -/* shared memory overlay */ -volatile shm_t shm SECTION(".shared_dram"); - -/* statically allocate dummy memory and local block overlay - to prevent linker from putting stuff in banks 1..3 */ -static uint8_t dummy_bank1[8192] UNUSED SECTION(".data_bank1"); -static uint8_t dummy_bank2[8192] UNUSED SECTION(".data_bank2"); -static uint8_t dummy_bank3[8192] UNUSED SECTION(".data_bank3"); -static d2q9_block_t *block = (void*)0x2000; - -/* barrier structures */ -volatile e_barrier_t barriers[NUM_CORES]; - e_barrier_t *tgt_bars[NUM_CORES]; - -/* global index variables */ -unsigned int row, col, core; - -void init(void) -{ - /* compile-time checks */ - BUILD_BUG(BLOCK_X * BLOCK_Y * sizeof(d2q9_node_t) > 24*1024); - BUILD_BUG(BLOCK_X < 3 || BLOCK_Y < 3); - BUILD_BUG(CORES_X < 1 || CORES_Y < 1); - BUILD_BUG(CORES_X > 4 || CORES_Y > 4); - - /* core index */ - e_coords_from_coreid(e_get_coreid(), &col, &row); - core = row * CORES_X + col; - - /* barrier initialization */ - e_barrier_init(barriers, tgt_bars); -} - -#define READ_TIMER(X) \ - do { \ - clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \ - e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \ - e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \ - } while(0); - -int main() -{ - const FLOAT omega = 1.0; - unsigned clocks[TIMERS] = {0}; - - init(); - d2q9_init(*block); - - for(int i = 0; i < 500; i++) { - READ_TIMER(0); - -#if 1 - /* collide all nodes */ - for(int y = 0; y < BLOCK_Y; y++) - for(int x = 0; x < BLOCK_X; x++) - d2q9_collide(*block, x, y, omega); - - /* synchronize */ - READ_TIMER(1); - e_barrier(barriers, tgt_bars); - READ_TIMER(2); - - /* stream all nodes */ - for(int y = 0; y < BLOCK_Y; y++) - for(int x = 0; x < BLOCK_X; x++) - d2q9_stream(*block, x, y); - READ_TIMER(3); - -#else - /* collide boundaries: top, bottom */ - for(int x = 0; x < BLOCK_X; x++) { - d2q9_collide(*block, x, 0, omega); - d2q9_collide(*block, x, BLOCK_Y-1, omega); - } - READ_TIMER(1); - - /* collide boundaries: left, right */ - for(int y = 1; y < BLOCK_Y-1; y++) { - d2q9_collide(*block, 0, y, omega); - d2q9_collide(*block, BLOCK_X-1, y, omega); - } - - /* synchronize */ - READ_TIMER(2); - e_barrier(barriers, tgt_bars); - READ_TIMER(3); - - /* collide and stream the bulk */ - d2q9_collide_stream_bulk(*block, omega); - READ_TIMER(4); - - /* stream the boundaries: top, bottom */ - for(int x = 0; x < BLOCK_X; x++) { - d2q9_stream(*block, x, 0 ); - d2q9_stream(*block, x, BLOCK_Y-1); - } - READ_TIMER(5); - - /* stream the boundaries: left, right */ - for(int y = 1; y < BLOCK_Y-1; y++) { - d2q9_stream(*block, 0, y); - d2q9_stream(*block, BLOCK_X-1, y); - } - READ_TIMER(6); -#endif - - /* copy data to shm if necessary */ - if(!(i%100)) { - /* copy iteration, lattice and timers to shm */ - if(core == 0) - shm.iteration = i; - - memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t)); - - for(int i = 0; i < TIMERS; i++) - shm.timers[row][col][i] = clocks[i]; - - /* synchronize */ - e_barrier(barriers, tgt_bars); - - /* flag host and wait */ - if(core == 0) { - shm.pollflag = POLL_READY; - while(shm.pollflag == POLL_READY); - } - } - - /* synchronize */ - e_barrier(barriers, tgt_bars); - READ_TIMER(TIMERS-1); - } - - /* last iteration done: flag host and stop */ - if(core == 0) shm.pollflag = POLL_DONE; - while(1) __asm__ volatile("idle"); -} - diff --git a/lb/hsrc/data.c b/lb/hsrc/data.c deleted file mode 100644 index 675fe50..0000000 --- a/lb/hsrc/data.c +++ /dev/null @@ -1,142 +0,0 @@ -/* Helper Functions to handle data (2D) */ - -#include -#include -#include - -#include "../shared.h" - -/* fix file owner if run with sudo */ -void fixsudo(const char *filename) -{ - if(getenv("SUDO_UID") && getenv("SUDO_GID")) { - int uid = atoi(getenv("SUDO_UID")); - int gid = atoi(getenv("SUDO_GID")); - if(chown(filename, uid, gid)) { - perror("fixsudo/chown"); - return; - } - } -} - - -/* write a (semi-) human-readable dump of the lattice */ -void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter) -{ - FILE *file = fopen("populations.dat", "a"); - if(!file) { - perror("write_populations/fopen"); - return; - } - - for(int cy = 0; cy < CORES_Y; cy++) { - for(int y = 0; y < BLOCK_Y; y++) { - for(int cx = 0; cx < CORES_X; cx++) { - for(int x = 0; x < BLOCK_X; x++) { - fprintf(file, "%3d: [%3d,%3d]: ", - iter, - cx * BLOCK_X + x, - cy * BLOCK_Y + y - ); - for(int q = 0; q < 9; q++) { - fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]); - } - fprintf(file, "\n"); - } - } - } - } - fprintf(file, "\n"); - - /* close */ - fclose(file); - - return; -} - -/* write an 8-bit grayscale, binary PPM image of the lattice */ -void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter) -{ - char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter); - - /* open image file and write header */ - FILE *file = fopen(name, "wb"); - if(!file) { - perror("write_image/fopen"); - return; - } - fprintf(file, "P5\n%d %d\n%d\n", CORES_X*BLOCK_X, CORES_Y*BLOCK_Y, 255); - - /* calculate all densities and remember min/max */ - FLOAT min = 1.0, max = 0; - FLOAT rhos[CORES_Y][BLOCK_Y][CORES_X][BLOCK_X]; - for(int cy = 0; cy < CORES_Y; cy++) { - for(int y = 0; y < BLOCK_Y; y++) { - for(int cx = 0; cx < CORES_X; cx++) { - for(int x = 0; x < BLOCK_X; x++) { - FLOAT rho = 0; - for(int q = 0; q < 9; q++) - rho += lattice[cy][cx][y][x][q]; - rhos[cy][y][cx][x] = rho; - - if(rho < min) min = rho; - if(rho > max) max = rho; - } - } - } - } - - /* scale values and write them to the image */ - for(int cy = 0; cy < CORES_Y; cy++) { - for(int y = 0; y < BLOCK_Y; y++) { - for(int cx = 0; cx < CORES_X; cx++) { - for(int x = 0; x < BLOCK_X; x++) { - unsigned char gray; - gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min)); - fwrite(&gray, 1, 1, file); - } - } - } - } - - /* close the file and chown if run with sudo */ - fclose(file); - fixsudo(name); - - return; -} - -/* convert image files to animated gif ./tmp/anim.gif */ -void write_animation(void) -{ - int result; - - /* call imagemagick */ - result = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)result; - fixsudo("./tmp/anim.gif"); - - return; -} - -/* write timer values */ -void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter) -{ - FILE *file = fopen("timers.dat", "ab"); - if(!file) { - perror("write_timers/fopen"); - return; - } - - fprintf(file, "Timers: i=%d\n", iter); - for(int y = 0; y < CORES_Y; y++) { - for(int x = 0; x < CORES_X; x++) { - fprintf(file, "[%d,%d]: ", x, y); - for(int t = 0; t < TIMERS; t++) { - fprintf(file, "%8d ", timers[y][x][t]); - } - fprintf(file, "\n"); - } - } - - fclose(file); -} diff --git a/lb/hsrc/main.c b/lb/hsrc/main.c deleted file mode 100644 index ca244a4..0000000 --- a/lb/hsrc/main.c +++ /dev/null @@ -1,113 +0,0 @@ -/* Host Application */ - -#include -#include -#include -#include -#include - -#include -#include "../shared.h" - -#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); } -#define SHM_OFFSET 0x01000000 - -/* helper functions */ -void fixsudo(const char *filename); -void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter); -void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter); -void write_animation(void); -void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter); - -/* globals */ -static shm_t shm = { 0 }; /* local shm copy */ -static uint32_t pollflag; - -int main() -{ - char *filename = "bin/lb_2d.srec"; - - /* remove old results */ - int dummy = system("rm -f ./tmp/i*.ppm ./tmp/anim.gif populations.dat timers.dat"); - (void)dummy; - - e_epiphany_t dev; - e_mem_t mem; - - e_set_host_verbosity(H_D0); - e_set_loader_verbosity(L_D0); - - /* initialize workgroup, allocate and clear shared memory */ - if(e_init(NULL) != E_OK) - FAIL("Can't init!\n"); - e_reset_system(); - if(e_open(&dev, 0, 0, CORES_X, CORES_Y) != E_OK) - FAIL("Can't open!\n"); - if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK) - FAIL("Can't alloc!\n"); - if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) - FAIL("Can't clear shm!\n"); - - /* load programs */ - printf("Starting cores:\n"); - for(int y = 0; y < CORES_Y; y++) { - for(int x = 0; x < CORES_X; x++) { - printf("(%02d,%02d) ", x, y); - if(e_load(filename, &dev, x, y, E_TRUE) != E_OK) - FAIL("Can't load!\n"); - } - printf("\n"); - } - - /* ================================================================ */ - printf("Polling shared memory.\n"); - while(1) { - - while(1) { - /* read polling flag */ - if(e_read(&mem, 0, 0, (off_t)0, &pollflag, - sizeof(uint32_t)) == E_ERR) - FAIL("Can't read pollflag!\n"); - - /* wait for something */ - if(pollflag != POLL_BUSY) break; - } - - /* finish if done */ - if(pollflag == POLL_DONE) break; - - /* read full shared memory */ - if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) - FAIL("Can't read full shm!\n"); - - /* reset pollflag */ - pollflag = 0; - if(e_write(&mem, 0, 0, (off_t)0, &pollflag, - sizeof(uint32_t)) == E_ERR) { - FAIL("Can't reset pollflag!\n"); - } - - /* print iteration */ - printf("0x%08x\r", shm.iteration); fflush(stdout); - - /* write data */ - //write_populations(shm.lattice, shm.iteration); - write_image(shm.lattice, shm.iteration); - write_timers(shm.timers, shm.iteration); - } - /* ================================================================ */ - - if(e_free(&mem) != E_OK) FAIL("Can't free!\n"); - if(e_close(&dev) != E_OK) FAIL("Can't close!\n"); - if(e_finalize() != E_OK) FAIL("Can't finalize!\n"); - - fixsudo("populations.dat"); - fixsudo("timers.dat"); - - printf("\nProgram finished successfully.\n"); - printf("Convert ...\n"); - write_animation(); - - return(0); -} - diff --git a/lb/shared.h b/lb/shared.h deleted file mode 100644 index 607a3e1..0000000 --- a/lb/shared.h +++ /dev/null @@ -1,47 +0,0 @@ -/* shared data types and external memory layout */ -#ifndef _SHARED_H_ -#define _SHARED_H_ - -#include - -/* preprocessor magic */ -#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0); -#define UNUSED __attribute__((unused)) -#undef PACKED -#define PACKED __attribute__((packed)) -#undef ALIGN -#define ALIGN(X) __attribute__((aligned(X))) - -/* number of cores */ -#define CORES_X 4 -#define CORES_Y 4 -#define NUM_CORES (CORES_X * CORES_Y) - -/* size of per-core subgrid */ -#define BLOCK_X 26 -#define BLOCK_Y 26 - -#define TIMERS 12 - -/* pollflag values */ -#define POLL_BUSY 0x00 -#define POLL_READY 0x01 -#define POLL_DONE 0x02 - -/* floating point type */ -typedef float FLOAT; - -/* node and block type (D2Q9) */ -typedef FLOAT d2q9_node_t[9]; -typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X]; - -/* shared memory structure */ -typedef struct { - uint32_t pollflag; - uint32_t iteration; - uint32_t timers[CORES_Y][CORES_X][TIMERS]; - d2q9_block_t lattice[CORES_Y][CORES_X]; -} ALIGN(8) shm_t; - -#endif /* _SHARED_H_ */ -