rename folder
authorSebastian <git@sraa.de>
Wed, 13 Aug 2014 14:15:43 +0000 (14:15 +0000)
committerSebastian <git@sraa.de>
Wed, 13 Aug 2014 14:15:43 +0000 (14:15 +0000)
The main code and the underlying data structures are too different
to bother building a 2D and a 3D simulation from the same code base.

14 files changed:
d2q9/Makefile [new file with mode: 0644]
d2q9/esrc/d2q9.c [new file with mode: 0644]
d2q9/esrc/d2q9.h [new file with mode: 0644]
d2q9/esrc/main.c [new file with mode: 0644]
d2q9/hsrc/data.c [new file with mode: 0644]
d2q9/hsrc/main.c [new file with mode: 0644]
d2q9/shared.h [new file with mode: 0644]
lb/Makefile [deleted file]
lb/esrc/d2q9.c [deleted file]
lb/esrc/lb.h [deleted file]
lb/esrc/lb_2d.c [deleted file]
lb/hsrc/data.c [deleted file]
lb/hsrc/main.c [deleted file]
lb/shared.h [deleted file]

diff --git a/d2q9/Makefile b/d2q9/Makefile
new file mode 100644 (file)
index 0000000..e4a6bb8
--- /dev/null
@@ -0,0 +1,105 @@
+# Template Makefile for Epiphany
+
+# host toolchain
+HCC    = gcc
+HCFLAGS        = -O2 -std=c99 -I$(EPIPHANY_HOME)/tools/host/include -Wall
+HLFLAGS        = -L$(EPIPHANY_HOME)/tools/host/lib -le-hal
+ECHO   = /bin/echo -e
+
+# target toolchain
+ECC    = e-gcc
+EOC    = e-objcopy
+ECFLAGS        = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall -fsingle-precision-constant -ffast-math
+ELFLAGS        = -T$(EPIPHANY_HOME)/bsps/current/internal.ldf -le-lib
+EOFLAGS        = -R .shared_dram
+
+# host application
+HAPP   = $(DEST)/ep_main
+HOBJS  = $(HDEST)/main.o $(HDEST)/data.o
+
+# epiphany applications
+EAPPS  = $(DEST)/main.srec
+ECOMMON        = $(EDEST)/d2q9.o
+
+# folders
+HSRC   = hsrc
+HDEST  = hobj
+ESRC   = esrc
+EDEST  = eobj
+DEST   = bin
+
+# === Magic begins here ===================================================
+EOBJS  = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON)
+EELFS  = $(EAPPS:$(DEST)%srec=$(EDEST)%elf)
+
+.SECONDARY:
+.PHONY: all help host target folders run clean
+.NOTPARALLEL: clean
+
+# === Phony Rules =========================================================
+help:
+       @$(ECHO)
+       @$(ECHO) "Epiphany Makefile - Help"
+       @$(ECHO) "  help    show this help"
+       @$(ECHO) "  host    build host application      ($(HAPP))"
+       @$(ECHO) "  target  build epiphany applications ($(EAPPS))"
+       @$(ECHO) "  all     build all"
+       @$(ECHO) "  run     build all, then run host application"
+       @$(ECHO) "  clean   remove applications and intermediate files"
+       @$(ECHO)
+
+all: host target
+
+host: folders $(HAPP)
+
+target: folders $(EAPPS)
+
+folders: $(HDEST) $(EDEST) $(DEST)
+
+run: host target
+       @$(ECHO) "\tRUN"
+       @sudo LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) \
+             EPIPHANY_HDF=$(EPIPHANY_HDF) \
+             $(HAPP)
+
+clean:
+       @$(ECHO) "\tCLEAN"
+       @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS)
+       @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \
+               2>/dev/null
+
+$(HDEST):
+       @$(ECHO) "\t(HOST)   MKDIR $(HDEST)"
+       @mkdir -p $(HDEST)
+
+$(EDEST):
+       @$(ECHO) "\t(HOST)   MKDIR $(EDEST)"
+       @mkdir -p $(EDEST)
+
+$(DEST):
+       @$(ECHO) "\t(HOST)   MKDIR $(DEST)"
+       @mkdir -p $(DEST)
+
+# === Host Rules ==========================================================
+$(HAPP): $(HOBJS)
+       @$(ECHO) "\t(HOST)   LINK\t$@"
+       @$(HCC) -o $@ $^ $(HLFLAGS)
+
+$(HDEST)/%.o: $(HSRC)/%.c
+       @$(ECHO) "\t(HOST)   CC\t$@"
+       @$(HCC) $(HCFLAGS) -c -o $@ $^
+
+# === Target Rules ========================================================
+$(DEST)/%.srec: $(EDEST)/%.elf
+       @$(ECHO) "\t(TARGET) OBJCOPY $@"
+       @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@
+
+$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON)
+       @$(ECHO) "\t(TARGET) LINK\t$@"
+       @$(ECC) -o $@ $^ $(ELFLAGS)
+
+$(EDEST)/%.o: $(ESRC)/%.c
+       @$(ECHO) "\t(TARGET) CC\t$@"
+       @$(ECC) $(ECFLAGS) -c -o $@ $^
+# =========================================================================
+
diff --git a/d2q9/esrc/d2q9.c b/d2q9/esrc/d2q9.c
new file mode 100644 (file)
index 0000000..9b3b94a
--- /dev/null
@@ -0,0 +1,143 @@
+/* D2Q9 lattice boltzmann functions */
+
+#include <e-lib.h>
+#include "../shared.h"
+#include "d2q9.h"
+
+/* velocities */
+static const int d2q9_v[9][2] = { { 0, 0},
+       {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
+       { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1},
+};
+
+/* weights */
+static const FLOAT d2q9_w[9] = { 4./9.,
+       1./36., 1./9., 1./36., 1./9.,
+       1./36., 1./9., 1./36., 1./9.,
+};
+
+void d2q9_init(d2q9_block_t block)
+{
+       /* all with rho = 0.1 */
+       for(int y = 0; y < BLOCK_Y; y++)
+               for(int x = 0; x < BLOCK_X; x++)
+                       for(int q = 0; q < 9; q++)
+                               block[y][x][q] = 0.1 * d2q9_w[q];
+
+       if(core == 0) {
+               /* except here with 0.2 */
+               for(int q = 0; q < 9; q++)
+                       block[0][0][q] = 0.2 * d2q9_w[q];
+       }
+
+       return;
+}
+
+void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
+{
+       /* macroscopic */
+       FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
+               f[y][x][3] + f[y][x][4] + f[y][x][5] +
+               f[y][x][6] + f[y][x][7] + f[y][x][8];
+       FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
+               f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
+       FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
+               f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
+       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+       /* update node */
+       for(int q = 0; q < 9; q++) {
+               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+               FLOAT eq = rho * d2q9_w[q] *
+                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
+               f[y][x][q] *= (1.0 - omega);
+               f[y][x][q] += omega * eq;
+       }
+
+       /* swap */
+       for(int q = 1; q <= 4; q++) {
+               FLOAT tmp    = f[y][x][q];
+               f[y][x][q]   = f[y][x][q+4];
+               f[y][x][q+4] = tmp;
+       }
+}
+
+void d2q9_stream(d2q9_block_t f, int x, int y)
+{
+       for(int q = 1; q <= 4; q++) {
+               int next_row = row;
+               int next_col = col;
+               int next_x   = x + d2q9_v[q][0];
+               int next_y   = y + d2q9_v[q][1];
+
+               /* inner borders (extend) */
+               if(next_x < 0)             { next_col--; next_x += BLOCK_X; }
+               else if(next_x >= BLOCK_X) { next_col++; next_x -= BLOCK_X; }
+               if(next_y < 0)             { next_row--; next_y += BLOCK_Y; }
+               else if(next_y >= BLOCK_Y) { next_row++; next_y -= BLOCK_Y; }
+
+#if 0
+               /* outer borders (wrap around) */
+               if(next_col < 0)             { next_col += CORES_X; }
+               else if(next_col >= CORES_X) { next_col -= CORES_X; }
+               if(next_row < 0)             { next_row += CORES_Y; }
+               else if(next_row >= CORES_Y) { next_row -= CORES_Y; }
+#else
+               /* full bounce-back on all sides */
+               if(next_col < 0)             { return; }
+               else if(next_col >= CORES_X) { return; }
+               if(next_row < 0)             { return; }
+               else if(next_row >= CORES_Y) { return; }
+#endif
+
+               /* f: local block, g: local or remote block */
+               d2q9_block_t *g = (void*)f;
+               if(next_row != row || next_col != col) {
+                       g = e_get_global_address(next_col, next_row, (void*)f);
+               }
+
+               /* stream/swap f and g */
+               FLOAT tmp    = f[y][x][q+4];
+               f[y][x][q+4] = (*g)[next_y][next_x][q];
+               (*g)[next_y][next_x][q] = tmp;
+       }
+}
+
+void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
+{
+       /* don't touch the border nodes */
+       for(int x = 1; x < BLOCK_X-1; x++) {
+               for(int y = 1; y < BLOCK_Y-1; y++) {
+                       /* macroscopic */
+                       FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
+                               f[y][x][3] + f[y][x][4] + f[y][x][5] +
+                               f[y][x][6] + f[y][x][7] + f[y][x][8];
+                       FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
+                               f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
+                       FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
+                               f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
+                       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+                       /* update node */
+                       for(int q = 0; q < 9; q++) {
+                               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+                               FLOAT eq = rho * d2q9_w[q] *
+                                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
+                               f[y][x][q] *= (1.0 - omega);
+                               f[y][x][q] += omega * eq;
+                       }
+
+                       /* stream */
+                       for(int q = 0; q <= 4; q++) {
+                               int next_x = x + d2q9_v[q][0];
+                               int next_y = y + d2q9_v[q][1];
+
+                               FLOAT tmp    = f[y][x][q];
+                               f[y][x][q]   = f[y][x][q+4];
+                               f[y][x][q+4] = f[next_y][next_x][q];
+                               f[next_y][next_x][q] = tmp;
+                       }
+               }
+       }
+}
+
diff --git a/d2q9/esrc/d2q9.h b/d2q9/esrc/d2q9.h
new file mode 100644 (file)
index 0000000..a72b39b
--- /dev/null
@@ -0,0 +1,13 @@
+/* lattice boltzmann functions */
+
+#include "../shared.h"
+
+/* core index */
+extern unsigned int row, col, core;
+
+/* D2Q9 functions */
+void d2q9_init               (d2q9_block_t);
+void d2q9_collide            (d2q9_block_t, int x, int y,  FLOAT);
+void d2q9_stream             (d2q9_block_t, int x, int y);
+void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT);
+
diff --git a/d2q9/esrc/main.c b/d2q9/esrc/main.c
new file mode 100644 (file)
index 0000000..fbd21cc
--- /dev/null
@@ -0,0 +1,147 @@
+/* D2Q9 lattice boltzmann implementation */
+
+#include <e-lib.h>
+#include "../shared.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "d2q9.h"
+
+/* shared memory overlay */
+volatile shm_t shm SECTION(".shared_dram");
+
+/* statically allocate dummy memory and local block overlay
+   to prevent linker from putting stuff in banks 1..3 */
+static uint8_t      dummy_bank1[8192] UNUSED SECTION(".data_bank1");
+static uint8_t      dummy_bank2[8192] UNUSED SECTION(".data_bank2");
+static uint8_t      dummy_bank3[8192] UNUSED SECTION(".data_bank3");
+static d2q9_block_t *block = (void*)0x2000;
+
+/* barrier structures */
+volatile e_barrier_t  barriers[NUM_CORES];
+         e_barrier_t *tgt_bars[NUM_CORES];
+
+/* global index variables */
+unsigned int row, col, core;
+
+void init(void)
+{
+       /* compile-time checks */
+       BUILD_BUG(BLOCK_X * BLOCK_Y * sizeof(d2q9_node_t) > 24*1024);
+       BUILD_BUG(BLOCK_X < 3 || BLOCK_Y < 3);
+       BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
+       BUILD_BUG(CORES_X > 4 || CORES_Y > 4);
+
+       /* core index */
+       e_coords_from_coreid(e_get_coreid(), &col, &row);
+       core = row * CORES_X + col;
+
+       /* barrier initialization */
+       e_barrier_init(barriers, tgt_bars);
+}
+
+#define READ_TIMER(X) \
+       do { \
+               clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
+               e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
+               e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
+       } while(0);
+
+int main()
+{
+       const FLOAT omega = 1.0;
+       unsigned clocks[TIMERS] = {0};
+
+       init();
+       d2q9_init(*block);
+
+       for(int i = 0; i < 500; i++) {
+               READ_TIMER(0);
+
+#if 1
+               /* collide all nodes */
+               for(int y = 0; y < BLOCK_Y; y++)
+                       for(int x = 0; x < BLOCK_X; x++)
+                               d2q9_collide(*block, x, y, omega);
+
+               /* synchronize */
+               READ_TIMER(1);
+               e_barrier(barriers, tgt_bars);
+               READ_TIMER(2);
+
+               /* stream all nodes */
+               for(int y = 0; y < BLOCK_Y; y++)
+                       for(int x = 0; x < BLOCK_X; x++)
+                               d2q9_stream(*block, x, y);
+               READ_TIMER(3);
+
+#else
+               /* collide boundaries: top, bottom */
+               for(int x = 0; x < BLOCK_X; x++) {
+                       d2q9_collide(*block, x, 0,         omega);
+                       d2q9_collide(*block, x, BLOCK_Y-1, omega);
+               }
+               READ_TIMER(1);
+
+               /* collide boundaries: left, right */
+               for(int y = 1; y < BLOCK_Y-1; y++) {
+                       d2q9_collide(*block, 0,         y, omega);
+                       d2q9_collide(*block, BLOCK_X-1, y, omega);
+               }
+
+               /* synchronize */
+               READ_TIMER(2);
+               e_barrier(barriers, tgt_bars);
+               READ_TIMER(3);
+
+               /* collide and stream the bulk */
+               d2q9_collide_stream_bulk(*block, omega);
+               READ_TIMER(4);
+
+               /* stream the boundaries: top, bottom */
+               for(int x = 0; x < BLOCK_X; x++) {
+                       d2q9_stream(*block, x, 0        );
+                       d2q9_stream(*block, x, BLOCK_Y-1);
+               }
+               READ_TIMER(5);
+
+               /* stream the boundaries: left, right */
+               for(int y = 1; y < BLOCK_Y-1; y++) {
+                       d2q9_stream(*block, 0,         y);
+                       d2q9_stream(*block, BLOCK_X-1, y);
+               }
+               READ_TIMER(6);
+#endif
+
+               /* copy data to shm if necessary */
+               if(!(i%100)) {
+                       /* copy iteration, lattice and timers to shm */
+                       if(core == 0)
+                               shm.iteration = i;
+
+                       memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
+
+                       for(int i = 0; i < TIMERS; i++)
+                               shm.timers[row][col][i] = clocks[i];
+
+                       /* synchronize */
+                       e_barrier(barriers, tgt_bars);
+
+                       /* flag host and wait */
+                       if(core == 0) {
+                               shm.pollflag = POLL_READY;
+                               while(shm.pollflag == POLL_READY);
+                       }
+               }
+
+               /* synchronize */
+               e_barrier(barriers, tgt_bars);
+               READ_TIMER(TIMERS-1);
+       }
+
+       /* last iteration done: flag host and stop */
+       if(core == 0) shm.pollflag = POLL_DONE;
+       while(1) __asm__ volatile("idle");
+}
+
diff --git a/d2q9/hsrc/data.c b/d2q9/hsrc/data.c
new file mode 100644 (file)
index 0000000..675fe50
--- /dev/null
@@ -0,0 +1,142 @@
+/* Helper Functions to handle data (2D) */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../shared.h"
+
+/* fix file owner if run with sudo */
+void fixsudo(const char *filename)
+{
+       if(getenv("SUDO_UID") && getenv("SUDO_GID")) {
+               int uid = atoi(getenv("SUDO_UID"));
+               int gid = atoi(getenv("SUDO_GID"));
+               if(chown(filename, uid, gid)) {
+                       perror("fixsudo/chown");
+                       return;
+               }
+       }
+}
+
+
+/* write a (semi-) human-readable dump of the lattice */
+void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+{
+       FILE *file = fopen("populations.dat", "a");
+       if(!file) {
+               perror("write_populations/fopen");
+               return;
+       }
+
+       for(int cy = 0; cy < CORES_Y; cy++) {
+               for(int y = 0; y < BLOCK_Y; y++) {
+                       for(int cx = 0; cx < CORES_X; cx++) {
+                               for(int x = 0; x < BLOCK_X; x++) {
+                                       fprintf(file, "%3d: [%3d,%3d]: ",
+                                               iter,
+                                               cx * BLOCK_X + x,
+                                               cy * BLOCK_Y + y
+                                       );
+                                       for(int q = 0; q < 9; q++) {
+                                               fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]);
+                                       }
+                                       fprintf(file, "\n");
+                               }
+                       }
+               }
+       }
+       fprintf(file, "\n");
+
+       /* close */
+       fclose(file);
+
+       return;
+}
+
+/* write an 8-bit grayscale, binary PPM image of the lattice */
+void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
+{
+       char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
+
+       /* open image file and write header */
+       FILE *file = fopen(name, "wb");
+       if(!file) {
+               perror("write_image/fopen");
+               return;
+       }
+       fprintf(file, "P5\n%d %d\n%d\n", CORES_X*BLOCK_X, CORES_Y*BLOCK_Y, 255);
+
+       /* calculate all densities and remember min/max */
+       FLOAT min = 1.0, max = 0;
+       FLOAT rhos[CORES_Y][BLOCK_Y][CORES_X][BLOCK_X];
+       for(int cy = 0; cy < CORES_Y; cy++) {
+               for(int y = 0; y < BLOCK_Y; y++) {
+                       for(int cx = 0; cx < CORES_X; cx++) {
+                               for(int x = 0; x < BLOCK_X; x++) {
+                                       FLOAT rho = 0;
+                                       for(int q = 0; q < 9; q++)
+                                               rho += lattice[cy][cx][y][x][q];
+                                       rhos[cy][y][cx][x] = rho;
+
+                                       if(rho < min) min = rho;
+                                       if(rho > max) max = rho;
+                               }
+                       }
+               }
+       }
+
+       /* scale values and write them to the image */
+       for(int cy = 0; cy < CORES_Y; cy++) {
+               for(int y = 0; y < BLOCK_Y; y++) {
+                       for(int cx = 0; cx < CORES_X; cx++) {
+                               for(int x = 0; x < BLOCK_X; x++) {
+                                       unsigned char gray;
+                                       gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min));
+                                       fwrite(&gray, 1, 1, file);
+                               }
+                       }
+               }
+       }
+
+       /* close the file and chown if run with sudo */
+       fclose(file);
+       fixsudo(name);
+
+       return;
+}
+
+/* convert image files to animated gif ./tmp/anim.gif */
+void write_animation(void)
+{
+       int result;
+
+       /* call imagemagick */
+       result = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)result;
+       fixsudo("./tmp/anim.gif");
+
+       return;
+}
+
+/* write timer values */
+void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
+{
+       FILE *file = fopen("timers.dat", "ab");
+       if(!file) {
+               perror("write_timers/fopen");
+               return;
+       }
+
+       fprintf(file, "Timers: i=%d\n", iter);
+       for(int y = 0; y < CORES_Y; y++) {
+               for(int x = 0; x < CORES_X; x++) {
+                       fprintf(file, "[%d,%d]: ", x, y);
+                       for(int t = 0; t < TIMERS; t++) {
+                               fprintf(file, "%8d ", timers[y][x][t]);
+                       }
+                       fprintf(file, "\n");
+               }
+       }
+
+       fclose(file);
+}
diff --git a/d2q9/hsrc/main.c b/d2q9/hsrc/main.c
new file mode 100644 (file)
index 0000000..48ec00a
--- /dev/null
@@ -0,0 +1,113 @@
+/* Host Application */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <e-hal.h>
+#include "../shared.h"
+
+#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
+#define SHM_OFFSET 0x01000000
+
+/* helper functions */
+void fixsudo(const char *filename);
+void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
+void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
+void write_animation(void);
+void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
+
+/* globals */
+static shm_t    shm = { 0 };   /* local shm copy */
+static uint32_t pollflag;
+
+int main()
+{
+       char *filename = "bin/main.srec";
+
+       /* remove old results */
+       int dummy = system("rm -f ./tmp/i*.ppm ./tmp/anim.gif populations.dat timers.dat");
+       (void)dummy;
+
+       e_epiphany_t dev;
+       e_mem_t      mem;
+
+       e_set_host_verbosity(H_D0);
+       e_set_loader_verbosity(L_D0);
+
+       /* initialize workgroup, allocate and clear shared memory */
+       if(e_init(NULL) != E_OK)
+               FAIL("Can't init!\n");
+       e_reset_system();
+       if(e_open(&dev, 0, 0, CORES_X, CORES_Y) != E_OK)
+               FAIL("Can't open!\n");
+       if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK)
+               FAIL("Can't alloc!\n");
+       if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+               FAIL("Can't clear shm!\n");
+
+       /* load programs */
+       printf("Starting cores:\n");
+       for(int y = 0; y < CORES_Y; y++) {
+               for(int x = 0; x < CORES_X; x++) {
+                       printf("(%02d,%02d) ", x, y);
+                       if(e_load(filename, &dev, x, y, E_TRUE) != E_OK)
+                               FAIL("Can't load!\n");
+               }
+               printf("\n");
+       }
+
+       /* ================================================================ */
+       printf("Polling shared memory.\n");
+       while(1) {
+
+               while(1) {
+                       /* read polling flag */
+                       if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
+                               sizeof(uint32_t)) == E_ERR)
+                                       FAIL("Can't read pollflag!\n");
+
+                       /* wait for something */
+                       if(pollflag != POLL_BUSY) break;
+               }
+
+               /* finish if done */
+               if(pollflag == POLL_DONE) break;
+
+               /* read full shared memory */
+               if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+                       FAIL("Can't read full shm!\n");
+
+               /* reset pollflag */
+               pollflag = 0;
+               if(e_write(&mem, 0, 0, (off_t)0, &pollflag,
+                       sizeof(uint32_t)) == E_ERR) {
+                               FAIL("Can't reset pollflag!\n");
+               }
+
+               /* print iteration */
+               printf("0x%08x\r", shm.iteration); fflush(stdout);
+
+               /* write data */
+               //write_populations(shm.lattice, shm.iteration);
+               write_image(shm.lattice, shm.iteration);
+               write_timers(shm.timers, shm.iteration);
+       }
+       /* ================================================================ */
+
+       if(e_free(&mem)  != E_OK) FAIL("Can't free!\n");
+       if(e_close(&dev) != E_OK) FAIL("Can't close!\n");
+       if(e_finalize()  != E_OK) FAIL("Can't finalize!\n");
+
+       fixsudo("populations.dat");
+       fixsudo("timers.dat");
+
+       printf("\nProgram finished successfully.\n");
+       printf("Convert ...\n");
+       write_animation();
+
+       return(0);
+}
+
diff --git a/d2q9/shared.h b/d2q9/shared.h
new file mode 100644 (file)
index 0000000..607a3e1
--- /dev/null
@@ -0,0 +1,47 @@
+/* shared data types and external memory layout */
+#ifndef _SHARED_H_
+#define _SHARED_H_
+
+#include <stdint.h>
+
+/* preprocessor magic */
+#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
+#define UNUSED __attribute__((unused))
+#undef PACKED
+#define PACKED __attribute__((packed))
+#undef ALIGN
+#define ALIGN(X) __attribute__((aligned(X)))
+
+/* number of cores */
+#define CORES_X 4
+#define CORES_Y 4
+#define NUM_CORES (CORES_X * CORES_Y)
+
+/* size of per-core subgrid */
+#define BLOCK_X 26
+#define BLOCK_Y 26
+
+#define TIMERS 12
+
+/* pollflag values */
+#define POLL_BUSY  0x00
+#define POLL_READY 0x01
+#define POLL_DONE  0x02
+
+/* floating point type */
+typedef float FLOAT;
+
+/* node and block type (D2Q9) */
+typedef FLOAT       d2q9_node_t[9];
+typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X];
+
+/* shared memory structure */
+typedef struct {
+       uint32_t     pollflag;
+       uint32_t     iteration;
+       uint32_t     timers[CORES_Y][CORES_X][TIMERS];
+       d2q9_block_t lattice[CORES_Y][CORES_X];
+} ALIGN(8) shm_t;
+
+#endif /* _SHARED_H_ */
+
diff --git a/lb/Makefile b/lb/Makefile
deleted file mode 100644 (file)
index c18c0d6..0000000
+++ /dev/null
@@ -1,105 +0,0 @@
-# Template Makefile for Epiphany
-
-# host toolchain
-HCC    = gcc
-HCFLAGS        = -O2 -std=c99 -I$(EPIPHANY_HOME)/tools/host/include -Wall
-HLFLAGS        = -L$(EPIPHANY_HOME)/tools/host/lib -le-hal
-ECHO   = /bin/echo -e
-
-# target toolchain
-ECC    = e-gcc
-EOC    = e-objcopy
-ECFLAGS        = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall -fsingle-precision-constant -ffast-math
-ELFLAGS        = -T$(EPIPHANY_HOME)/bsps/current/internal.ldf -le-lib
-EOFLAGS        = -R .shared_dram
-
-# host application
-HAPP   = $(DEST)/ep_main
-HOBJS  = $(HDEST)/main.o $(HDEST)/data.o
-
-# epiphany applications
-EAPPS  = $(DEST)/lb_2d.srec
-ECOMMON        = $(EDEST)/d2q9.o
-
-# folders
-HSRC   = hsrc
-HDEST  = hobj
-ESRC   = esrc
-EDEST  = eobj
-DEST   = bin
-
-# === Magic begins here ===================================================
-EOBJS  = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON)
-EELFS  = $(EAPPS:$(DEST)%srec=$(EDEST)%elf)
-
-.SECONDARY:
-.PHONY: all help host target folders run clean
-.NOTPARALLEL: clean
-
-# === Phony Rules =========================================================
-help:
-       @$(ECHO)
-       @$(ECHO) "Epiphany Makefile - Help"
-       @$(ECHO) "  help    show this help"
-       @$(ECHO) "  host    build host application      ($(HAPP))"
-       @$(ECHO) "  target  build epiphany applications ($(EAPPS))"
-       @$(ECHO) "  all     build all"
-       @$(ECHO) "  run     build all, then run host application"
-       @$(ECHO) "  clean   remove applications and intermediate files"
-       @$(ECHO)
-
-all: host target
-
-host: folders $(HAPP)
-
-target: folders $(EAPPS)
-
-folders: $(HDEST) $(EDEST) $(DEST)
-
-run: host target
-       @$(ECHO) "\tRUN"
-       @sudo LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) \
-             EPIPHANY_HDF=$(EPIPHANY_HDF) \
-             $(HAPP)
-
-clean:
-       @$(ECHO) "\tCLEAN"
-       @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS)
-       @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \
-               2>/dev/null
-
-$(HDEST):
-       @$(ECHO) "\t(HOST)   MKDIR $(HDEST)"
-       @mkdir -p $(HDEST)
-
-$(EDEST):
-       @$(ECHO) "\t(HOST)   MKDIR $(EDEST)"
-       @mkdir -p $(EDEST)
-
-$(DEST):
-       @$(ECHO) "\t(HOST)   MKDIR $(DEST)"
-       @mkdir -p $(DEST)
-
-# === Host Rules ==========================================================
-$(HAPP): $(HOBJS)
-       @$(ECHO) "\t(HOST)   LINK\t$@"
-       @$(HCC) -o $@ $^ $(HLFLAGS)
-
-$(HDEST)/%.o: $(HSRC)/%.c
-       @$(ECHO) "\t(HOST)   CC\t$@"
-       @$(HCC) $(HCFLAGS) -c -o $@ $^
-
-# === Target Rules ========================================================
-$(DEST)/%.srec: $(EDEST)/%.elf
-       @$(ECHO) "\t(TARGET) OBJCOPY $@"
-       @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@
-
-$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON)
-       @$(ECHO) "\t(TARGET) LINK\t$@"
-       @$(ECC) -o $@ $^ $(ELFLAGS)
-
-$(EDEST)/%.o: $(ESRC)/%.c
-       @$(ECHO) "\t(TARGET) CC\t$@"
-       @$(ECC) $(ECFLAGS) -c -o $@ $^
-# =========================================================================
-
diff --git a/lb/esrc/d2q9.c b/lb/esrc/d2q9.c
deleted file mode 100644 (file)
index 7a9d0b5..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-/* D2Q9 lattice boltzmann functions */
-
-#include <e-lib.h>
-#include "../shared.h"
-#include "lb.h"
-
-/* velocities */
-static const int d2q9_v[9][2] = { { 0, 0},
-       {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
-       { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1},
-};
-
-/* weights */
-static const FLOAT d2q9_w[9] = { 4./9.,
-       1./36., 1./9., 1./36., 1./9.,
-       1./36., 1./9., 1./36., 1./9.,
-};
-
-void d2q9_init(d2q9_block_t block)
-{
-       /* all with rho = 0.1 */
-       for(int y = 0; y < BLOCK_Y; y++)
-               for(int x = 0; x < BLOCK_X; x++)
-                       for(int q = 0; q < 9; q++)
-                               block[y][x][q] = 0.1 * d2q9_w[q];
-
-       if(core == 0) {
-               /* except here with 0.2 */
-               for(int q = 0; q < 9; q++)
-                       block[0][0][q] = 0.2 * d2q9_w[q];
-       }
-
-       return;
-}
-
-void d2q9_collide(d2q9_block_t f, int x, int y, FLOAT omega)
-{
-       /* macroscopic */
-       FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
-               f[y][x][3] + f[y][x][4] + f[y][x][5] +
-               f[y][x][6] + f[y][x][7] + f[y][x][8];
-       FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
-               f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
-       FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
-               f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
-       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
-
-       /* update node */
-       for(int q = 0; q < 9; q++) {
-               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
-               FLOAT eq = rho * d2q9_w[q] *
-                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
-               f[y][x][q] *= (1.0 - omega);
-               f[y][x][q] += omega * eq;
-       }
-
-       /* swap */
-       for(int q = 1; q <= 4; q++) {
-               FLOAT tmp    = f[y][x][q];
-               f[y][x][q]   = f[y][x][q+4];
-               f[y][x][q+4] = tmp;
-       }
-}
-
-void d2q9_stream(d2q9_block_t f, int x, int y)
-{
-       for(int q = 1; q <= 4; q++) {
-               int next_row = row;
-               int next_col = col;
-               int next_x   = x + d2q9_v[q][0];
-               int next_y   = y + d2q9_v[q][1];
-
-               /* inner borders (extend) */
-               if(next_x < 0)             { next_col--; next_x += BLOCK_X; }
-               else if(next_x >= BLOCK_X) { next_col++; next_x -= BLOCK_X; }
-               if(next_y < 0)             { next_row--; next_y += BLOCK_Y; }
-               else if(next_y >= BLOCK_Y) { next_row++; next_y -= BLOCK_Y; }
-
-#if 0
-               /* outer borders (wrap around) */
-               if(next_col < 0)             { next_col += CORES_X; }
-               else if(next_col >= CORES_X) { next_col -= CORES_X; }
-               if(next_row < 0)             { next_row += CORES_Y; }
-               else if(next_row >= CORES_Y) { next_row -= CORES_Y; }
-#else
-               /* full bounce-back on all sides */
-               if(next_col < 0)             { return; }
-               else if(next_col >= CORES_X) { return; }
-               if(next_row < 0)             { return; }
-               else if(next_row >= CORES_Y) { return; }
-#endif
-
-               /* f: local block, g: local or remote block */
-               d2q9_block_t *g = (void*)f;
-               if(next_row != row || next_col != col) {
-                       g = e_get_global_address(next_col, next_row, (void*)f);
-               }
-
-               /* stream/swap f and g */
-               FLOAT tmp    = f[y][x][q+4];
-               f[y][x][q+4] = (*g)[next_y][next_x][q];
-               (*g)[next_y][next_x][q] = tmp;
-       }
-}
-
-void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
-{
-       /* don't touch the border nodes */
-       for(int x = 1; x < BLOCK_X-1; x++) {
-               for(int y = 1; y < BLOCK_Y-1; y++) {
-                       /* macroscopic */
-                       FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
-                               f[y][x][3] + f[y][x][4] + f[y][x][5] +
-                               f[y][x][6] + f[y][x][7] + f[y][x][8];
-                       FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
-                               f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
-                       FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
-                               f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
-                       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
-
-                       /* update node */
-                       for(int q = 0; q < 9; q++) {
-                               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
-                               FLOAT eq = rho * d2q9_w[q] *
-                                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
-                               f[y][x][q] *= (1.0 - omega);
-                               f[y][x][q] += omega * eq;
-                       }
-
-                       /* stream */
-                       for(int q = 0; q <= 4; q++) {
-                               int next_x = x + d2q9_v[q][0];
-                               int next_y = y + d2q9_v[q][1];
-
-                               FLOAT tmp    = f[y][x][q];
-                               f[y][x][q]   = f[y][x][q+4];
-                               f[y][x][q+4] = f[next_y][next_x][q];
-                               f[next_y][next_x][q] = tmp;
-                       }
-               }
-       }
-}
-
diff --git a/lb/esrc/lb.h b/lb/esrc/lb.h
deleted file mode 100644 (file)
index a72b39b..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* lattice boltzmann functions */
-
-#include "../shared.h"
-
-/* core index */
-extern unsigned int row, col, core;
-
-/* D2Q9 functions */
-void d2q9_init               (d2q9_block_t);
-void d2q9_collide            (d2q9_block_t, int x, int y,  FLOAT);
-void d2q9_stream             (d2q9_block_t, int x, int y);
-void d2q9_collide_stream_bulk(d2q9_block_t, FLOAT);
-
diff --git a/lb/esrc/lb_2d.c b/lb/esrc/lb_2d.c
deleted file mode 100644 (file)
index c254298..0000000
+++ /dev/null
@@ -1,147 +0,0 @@
-/* D2Q9 lattice boltzmann implementation */
-
-#include <e-lib.h>
-#include "../shared.h"
-
-#include <stdint.h>
-#include <string.h>
-
-#include "lb.h"
-
-/* shared memory overlay */
-volatile shm_t shm SECTION(".shared_dram");
-
-/* statically allocate dummy memory and local block overlay
-   to prevent linker from putting stuff in banks 1..3 */
-static uint8_t      dummy_bank1[8192] UNUSED SECTION(".data_bank1");
-static uint8_t      dummy_bank2[8192] UNUSED SECTION(".data_bank2");
-static uint8_t      dummy_bank3[8192] UNUSED SECTION(".data_bank3");
-static d2q9_block_t *block = (void*)0x2000;
-
-/* barrier structures */
-volatile e_barrier_t  barriers[NUM_CORES];
-         e_barrier_t *tgt_bars[NUM_CORES];
-
-/* global index variables */
-unsigned int row, col, core;
-
-void init(void)
-{
-       /* compile-time checks */
-       BUILD_BUG(BLOCK_X * BLOCK_Y * sizeof(d2q9_node_t) > 24*1024);
-       BUILD_BUG(BLOCK_X < 3 || BLOCK_Y < 3);
-       BUILD_BUG(CORES_X < 1 || CORES_Y < 1);
-       BUILD_BUG(CORES_X > 4 || CORES_Y > 4);
-
-       /* core index */
-       e_coords_from_coreid(e_get_coreid(), &col, &row);
-       core = row * CORES_X + col;
-
-       /* barrier initialization */
-       e_barrier_init(barriers, tgt_bars);
-}
-
-#define READ_TIMER(X) \
-       do { \
-               clocks[X] = E_CTIMER_MAX - e_ctimer_stop(E_CTIMER_0); \
-               e_ctimer_set(E_CTIMER_0, E_CTIMER_MAX); \
-               e_ctimer_start(E_CTIMER_0, E_CTIMER_CLK); \
-       } while(0);
-
-int main()
-{
-       const FLOAT omega = 1.0;
-       unsigned clocks[TIMERS] = {0};
-
-       init();
-       d2q9_init(*block);
-
-       for(int i = 0; i < 500; i++) {
-               READ_TIMER(0);
-
-#if 1
-               /* collide all nodes */
-               for(int y = 0; y < BLOCK_Y; y++)
-                       for(int x = 0; x < BLOCK_X; x++)
-                               d2q9_collide(*block, x, y, omega);
-
-               /* synchronize */
-               READ_TIMER(1);
-               e_barrier(barriers, tgt_bars);
-               READ_TIMER(2);
-
-               /* stream all nodes */
-               for(int y = 0; y < BLOCK_Y; y++)
-                       for(int x = 0; x < BLOCK_X; x++)
-                               d2q9_stream(*block, x, y);
-               READ_TIMER(3);
-
-#else
-               /* collide boundaries: top, bottom */
-               for(int x = 0; x < BLOCK_X; x++) {
-                       d2q9_collide(*block, x, 0,         omega);
-                       d2q9_collide(*block, x, BLOCK_Y-1, omega);
-               }
-               READ_TIMER(1);
-
-               /* collide boundaries: left, right */
-               for(int y = 1; y < BLOCK_Y-1; y++) {
-                       d2q9_collide(*block, 0,         y, omega);
-                       d2q9_collide(*block, BLOCK_X-1, y, omega);
-               }
-
-               /* synchronize */
-               READ_TIMER(2);
-               e_barrier(barriers, tgt_bars);
-               READ_TIMER(3);
-
-               /* collide and stream the bulk */
-               d2q9_collide_stream_bulk(*block, omega);
-               READ_TIMER(4);
-
-               /* stream the boundaries: top, bottom */
-               for(int x = 0; x < BLOCK_X; x++) {
-                       d2q9_stream(*block, x, 0        );
-                       d2q9_stream(*block, x, BLOCK_Y-1);
-               }
-               READ_TIMER(5);
-
-               /* stream the boundaries: left, right */
-               for(int y = 1; y < BLOCK_Y-1; y++) {
-                       d2q9_stream(*block, 0,         y);
-                       d2q9_stream(*block, BLOCK_X-1, y);
-               }
-               READ_TIMER(6);
-#endif
-
-               /* copy data to shm if necessary */
-               if(!(i%100)) {
-                       /* copy iteration, lattice and timers to shm */
-                       if(core == 0)
-                               shm.iteration = i;
-
-                       memcpy(&shm.lattice[row][col], block, sizeof(d2q9_block_t));
-
-                       for(int i = 0; i < TIMERS; i++)
-                               shm.timers[row][col][i] = clocks[i];
-
-                       /* synchronize */
-                       e_barrier(barriers, tgt_bars);
-
-                       /* flag host and wait */
-                       if(core == 0) {
-                               shm.pollflag = POLL_READY;
-                               while(shm.pollflag == POLL_READY);
-                       }
-               }
-
-               /* synchronize */
-               e_barrier(barriers, tgt_bars);
-               READ_TIMER(TIMERS-1);
-       }
-
-       /* last iteration done: flag host and stop */
-       if(core == 0) shm.pollflag = POLL_DONE;
-       while(1) __asm__ volatile("idle");
-}
-
diff --git a/lb/hsrc/data.c b/lb/hsrc/data.c
deleted file mode 100644 (file)
index 675fe50..0000000
+++ /dev/null
@@ -1,142 +0,0 @@
-/* Helper Functions to handle data (2D) */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../shared.h"
-
-/* fix file owner if run with sudo */
-void fixsudo(const char *filename)
-{
-       if(getenv("SUDO_UID") && getenv("SUDO_GID")) {
-               int uid = atoi(getenv("SUDO_UID"));
-               int gid = atoi(getenv("SUDO_GID"));
-               if(chown(filename, uid, gid)) {
-                       perror("fixsudo/chown");
-                       return;
-               }
-       }
-}
-
-
-/* write a (semi-) human-readable dump of the lattice */
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
-{
-       FILE *file = fopen("populations.dat", "a");
-       if(!file) {
-               perror("write_populations/fopen");
-               return;
-       }
-
-       for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCK_Y; y++) {
-                       for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCK_X; x++) {
-                                       fprintf(file, "%3d: [%3d,%3d]: ",
-                                               iter,
-                                               cx * BLOCK_X + x,
-                                               cy * BLOCK_Y + y
-                                       );
-                                       for(int q = 0; q < 9; q++) {
-                                               fprintf(file, "%.5f ", lattice[cy][cx][y][x][q]);
-                                       }
-                                       fprintf(file, "\n");
-                               }
-                       }
-               }
-       }
-       fprintf(file, "\n");
-
-       /* close */
-       fclose(file);
-
-       return;
-}
-
-/* write an 8-bit grayscale, binary PPM image of the lattice */
-void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter)
-{
-       char name[32]; snprintf(name, 32, "./tmp/i%06d.ppm", iter);
-
-       /* open image file and write header */
-       FILE *file = fopen(name, "wb");
-       if(!file) {
-               perror("write_image/fopen");
-               return;
-       }
-       fprintf(file, "P5\n%d %d\n%d\n", CORES_X*BLOCK_X, CORES_Y*BLOCK_Y, 255);
-
-       /* calculate all densities and remember min/max */
-       FLOAT min = 1.0, max = 0;
-       FLOAT rhos[CORES_Y][BLOCK_Y][CORES_X][BLOCK_X];
-       for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCK_Y; y++) {
-                       for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCK_X; x++) {
-                                       FLOAT rho = 0;
-                                       for(int q = 0; q < 9; q++)
-                                               rho += lattice[cy][cx][y][x][q];
-                                       rhos[cy][y][cx][x] = rho;
-
-                                       if(rho < min) min = rho;
-                                       if(rho > max) max = rho;
-                               }
-                       }
-               }
-       }
-
-       /* scale values and write them to the image */
-       for(int cy = 0; cy < CORES_Y; cy++) {
-               for(int y = 0; y < BLOCK_Y; y++) {
-                       for(int cx = 0; cx < CORES_X; cx++) {
-                               for(int x = 0; x < BLOCK_X; x++) {
-                                       unsigned char gray;
-                                       gray = (255. * (rhos[cy][y][cx][x]-min) / (max-min));
-                                       fwrite(&gray, 1, 1, file);
-                               }
-                       }
-               }
-       }
-
-       /* close the file and chown if run with sudo */
-       fclose(file);
-       fixsudo(name);
-
-       return;
-}
-
-/* convert image files to animated gif ./tmp/anim.gif */
-void write_animation(void)
-{
-       int result;
-
-       /* call imagemagick */
-       result = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)result;
-       fixsudo("./tmp/anim.gif");
-
-       return;
-}
-
-/* write timer values */
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter)
-{
-       FILE *file = fopen("timers.dat", "ab");
-       if(!file) {
-               perror("write_timers/fopen");
-               return;
-       }
-
-       fprintf(file, "Timers: i=%d\n", iter);
-       for(int y = 0; y < CORES_Y; y++) {
-               for(int x = 0; x < CORES_X; x++) {
-                       fprintf(file, "[%d,%d]: ", x, y);
-                       for(int t = 0; t < TIMERS; t++) {
-                               fprintf(file, "%8d ", timers[y][x][t]);
-                       }
-                       fprintf(file, "\n");
-               }
-       }
-
-       fclose(file);
-}
diff --git a/lb/hsrc/main.c b/lb/hsrc/main.c
deleted file mode 100644 (file)
index ca244a4..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-/* Host Application */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-
-#include <e-hal.h>
-#include "../shared.h"
-
-#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
-#define SHM_OFFSET 0x01000000
-
-/* helper functions */
-void fixsudo(const char *filename);
-void write_populations(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_image(d2q9_block_t lattice[CORES_Y][CORES_X], int iter);
-void write_animation(void);
-void write_timers(uint32_t timers[CORES_Y][CORES_X][TIMERS], uint32_t iter);
-
-/* globals */
-static shm_t    shm = { 0 };   /* local shm copy */
-static uint32_t pollflag;
-
-int main()
-{
-       char *filename = "bin/lb_2d.srec";
-
-       /* remove old results */
-       int dummy = system("rm -f ./tmp/i*.ppm ./tmp/anim.gif populations.dat timers.dat");
-       (void)dummy;
-
-       e_epiphany_t dev;
-       e_mem_t      mem;
-
-       e_set_host_verbosity(H_D0);
-       e_set_loader_verbosity(L_D0);
-
-       /* initialize workgroup, allocate and clear shared memory */
-       if(e_init(NULL) != E_OK)
-               FAIL("Can't init!\n");
-       e_reset_system();
-       if(e_open(&dev, 0, 0, CORES_X, CORES_Y) != E_OK)
-               FAIL("Can't open!\n");
-       if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK)
-               FAIL("Can't alloc!\n");
-       if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
-               FAIL("Can't clear shm!\n");
-
-       /* load programs */
-       printf("Starting cores:\n");
-       for(int y = 0; y < CORES_Y; y++) {
-               for(int x = 0; x < CORES_X; x++) {
-                       printf("(%02d,%02d) ", x, y);
-                       if(e_load(filename, &dev, x, y, E_TRUE) != E_OK)
-                               FAIL("Can't load!\n");
-               }
-               printf("\n");
-       }
-
-       /* ================================================================ */
-       printf("Polling shared memory.\n");
-       while(1) {
-
-               while(1) {
-                       /* read polling flag */
-                       if(e_read(&mem, 0, 0, (off_t)0, &pollflag,
-                               sizeof(uint32_t)) == E_ERR)
-                                       FAIL("Can't read pollflag!\n");
-
-                       /* wait for something */
-                       if(pollflag != POLL_BUSY) break;
-               }
-
-               /* finish if done */
-               if(pollflag == POLL_DONE) break;
-
-               /* read full shared memory */
-               if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
-                       FAIL("Can't read full shm!\n");
-
-               /* reset pollflag */
-               pollflag = 0;
-               if(e_write(&mem, 0, 0, (off_t)0, &pollflag,
-                       sizeof(uint32_t)) == E_ERR) {
-                               FAIL("Can't reset pollflag!\n");
-               }
-
-               /* print iteration */
-               printf("0x%08x\r", shm.iteration); fflush(stdout);
-
-               /* write data */
-               //write_populations(shm.lattice, shm.iteration);
-               write_image(shm.lattice, shm.iteration);
-               write_timers(shm.timers, shm.iteration);
-       }
-       /* ================================================================ */
-
-       if(e_free(&mem)  != E_OK) FAIL("Can't free!\n");
-       if(e_close(&dev) != E_OK) FAIL("Can't close!\n");
-       if(e_finalize()  != E_OK) FAIL("Can't finalize!\n");
-
-       fixsudo("populations.dat");
-       fixsudo("timers.dat");
-
-       printf("\nProgram finished successfully.\n");
-       printf("Convert ...\n");
-       write_animation();
-
-       return(0);
-}
-
diff --git a/lb/shared.h b/lb/shared.h
deleted file mode 100644 (file)
index 607a3e1..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/* shared data types and external memory layout */
-#ifndef _SHARED_H_
-#define _SHARED_H_
-
-#include <stdint.h>
-
-/* preprocessor magic */
-#define BUILD_BUG(c) do { ((void)sizeof(char[1 - 2*!!(c)])); } while(0);
-#define UNUSED __attribute__((unused))
-#undef PACKED
-#define PACKED __attribute__((packed))
-#undef ALIGN
-#define ALIGN(X) __attribute__((aligned(X)))
-
-/* number of cores */
-#define CORES_X 4
-#define CORES_Y 4
-#define NUM_CORES (CORES_X * CORES_Y)
-
-/* size of per-core subgrid */
-#define BLOCK_X 26
-#define BLOCK_Y 26
-
-#define TIMERS 12
-
-/* pollflag values */
-#define POLL_BUSY  0x00
-#define POLL_READY 0x01
-#define POLL_DONE  0x02
-
-/* floating point type */
-typedef float FLOAT;
-
-/* node and block type (D2Q9) */
-typedef FLOAT       d2q9_node_t[9];
-typedef d2q9_node_t d2q9_block_t[BLOCK_Y][BLOCK_X];
-
-/* shared memory structure */
-typedef struct {
-       uint32_t     pollflag;
-       uint32_t     iteration;
-       uint32_t     timers[CORES_Y][CORES_X][TIMERS];
-       d2q9_block_t lattice[CORES_Y][CORES_X];
-} ALIGN(8) shm_t;
-
-#endif /* _SHARED_H_ */
-