-# Makefile
+# Template Makefile for Epiphany
# host toolchain
HCC = gcc
ELFLAGS = -T$(EPIPHANY_HOME)/bsps/current/fast.ldf -le-lib
EOFLAGS = -R .shared_dram
+# host application
+HAPP = $(DEST)/ep_main
+HOBJS = $(HDEST)/main.o
+
+# epiphany applications
+EAPPS = $(DEST)/lb_2d.srec
+ECOMMON = $(EDEST)/d2q9.o
+
# folders
HSRC = hsrc
HDEST = hobj
EDEST = eobj
DEST = bin
-# applications (host binary and epiphany SREC files) to build
-HAPP = $(DEST)/ep_main
-EAPPS = # $(DEST)/test.srec
-
-# object files to build
-HOBJS = $(HDEST)/main.o
-EOBJS = # $(EDEST)/test.o
+# === Magic begins here ===================================================
+EOBJS = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON)
+EELFS = $(EAPPS:$(DEST)%srec=$(EDEST)%elf)
-# === Rules ===============================================================
.SECONDARY:
-.PHONY: all host target folders run clean
-
-all: run
+.PHONY: all help host target folders run clean
+
+# === Phony Rules =========================================================
+help:
+ @echo $(EOBJS)
+ @$(ECHO)
+ @$(ECHO) "Epiphany Makefile - Help"
+ @$(ECHO) " help show this help"
+ @$(ECHO) " host build host application ($(HAPP))"
+ @$(ECHO) " target build epiphany applications ($(EAPPS))"
+ @$(ECHO) " all build all"
+ @$(ECHO) " run build all, then run host application"
+ @$(ECHO) " clean remove applications and intermediate files"
+ @$(ECHO)
+
+all: host target
host: folders $(HAPP)
clean:
@$(ECHO) "\tCLEAN"
- @rm -v -f $(HOBJS) $(EOBJS) $(EAPPS) $(HAPP)
- @rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST)
+ @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS)
+ @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \
+ 2>/dev/null
$(HDEST):
@$(ECHO) "\tMKDIR $(HDEST)"
@$(ECHO) "\tMKDIR $(DEST)"
@mkdir -p $(DEST)
-# === Host Toolchain ======================================================
+# === Host Rules ==========================================================
$(HAPP): $(HOBJS)
@$(ECHO) "\t(HOST) LINK\t$@"
@$(HCC) -o $@ $^ $(HLFLAGS)
@$(ECHO) "\t(HOST) CC\t$@"
@$(HCC) $(HCFLAGS) -c -o $@ $^
-# === Target Toolchain ====================================================
+# === Target Rules ========================================================
$(DEST)/%.srec: $(EDEST)/%.elf
@$(ECHO) "\t(TARGET) OBJCOPY $@"
@$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@
-$(EDEST)/%.elf: $(EDEST)/%.o
+$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON)
@$(ECHO) "\t(TARGET) LINK\t$@"
- @$(ECC) -o $@ $^ $(ECFLAGS)
+ @$(ECC) -o $@ $^ $(ELFLAGS)
$(EDEST)/%.o: $(ESRC)/%.c
@$(ECHO) "\t(TARGET) CC\t$@"
--- /dev/null
+/* D2Q9 lattice boltzmann functions */
+
+#include "../shared.h"
+
+/* velocities */
+static const int d2q9_v[9][2] = { { 0, 0},
+ {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
+ { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1},
+};
+
+/* weights */
+static const FLOAT d2q9_w[9] = { 4./9.,
+ 1./36., 1./9., 1./36., 1./9.,
+ 1./36., 1./9., 1./36., 1./9.,
+};
+
+void init_block(d2q9_block_t block)
+{
+ /* all with rho = 0.1 */
+ for(int x = 0; x < BLOCK_X; x++)
+ for(int y = 0; y < BLOCK_Y; y++)
+ for(int q = 0; q < 9; q++)
+ block[x][y][q] = 0.1 * d2q9_w[q];
+
+ /* except here with 0.2 */
+ for(int q = 0; q < 9; q++)
+ block[0][0][q] = 0.2 * d2q9_w[q];
+
+ return;
+}
+
+void collide_and_swap(d2q9_block_t f, int x, int y, FLOAT omega)
+{
+ /* macroscopic */
+ FLOAT rho = f[x][y][0] + f[x][y][1] + f[x][y][2] +
+ f[x][y][3] + f[x][y][4] + f[x][y][5] +
+ f[x][y][6] + f[x][y][7] + f[x][y][8];
+ FLOAT ux = (f[x][y][7] + f[x][y][6] + f[x][y][5] -
+ f[x][y][1] - f[x][y][2] - f[x][y][3]) / rho;
+ FLOAT uy = (f[x][y][1] + f[x][y][8] + f[x][y][7] -
+ f[x][y][3] - f[x][y][4] - f[x][y][5]) / rho;
+ FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+ /* update node */
+ for(int q = 0; q < 9; q++) {
+ FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+ FLOAT eq = rho * d2q9_w[q] *
+ (1. + 3. * cu + 4.5 * cu*cu - sqr);
+ f[x][y][q] *= (1.0 - omega);
+ f[x][y][q] += omega * eq;
+ }
+
+ /* swap */
+ for(int q = 1; q <= 4; q++) {
+ FLOAT tmp = f[x][y][q];
+ f[x][y][q] = f[x][y][q+4];
+ f[x][y][q+4] = tmp;
+ }
+}
+
+void stream_node(d2q9_block_t f, int x, int y)
+{
+ for(int q = 1; q <= 4; q++) {
+ int next_x = x + d2q9_v[q][0];
+ int next_y = y + d2q9_v[q][1];
+
+ /* wrap around */
+ if(next_x < 0) next_x += BLOCK_X;
+ else if(next_x >= BLOCK_X) next_x -= BLOCK_X;
+ if(next_y < 0) next_y += BLOCK_Y;
+ else if(next_y >= BLOCK_Y) next_y -= BLOCK_Y;
+
+ FLOAT tmp = f[x][y][q+4];
+ f[x][y][q+4] = f[next_x][next_y][q];
+ f[next_x][next_y][q] = tmp;
+ }
+}
+
+void collide_and_stream_bulk(d2q9_block_t f, FLOAT omega)
+{
+ /* don't touch the border nodes */
+ for(int x = 1; x < BLOCK_X-1; x++) {
+ for(int y = 1; y < BLOCK_Y-1; y++) {
+ /* macroscopic */
+ FLOAT rho = f[x][y][0] + f[x][y][1] + f[x][y][2] +
+ f[x][y][3] + f[x][y][4] + f[x][y][5] +
+ f[x][y][6] + f[x][y][7] + f[x][y][8];
+ FLOAT ux = (f[x][y][7] + f[x][y][6] + f[x][y][5] -
+ f[x][y][1] - f[x][y][2] - f[x][y][3]) / rho;
+ FLOAT uy = (f[x][y][1] + f[x][y][8] + f[x][y][7] -
+ f[x][y][3] - f[x][y][4] - f[x][y][5]) / rho;
+ FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+ /* update node */
+ for(int q = 0; q < 9; q++) {
+ FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+ FLOAT eq = rho * d2q9_w[q] *
+ (1. + 3. * cu + 4.5 * cu*cu - sqr);
+ f[x][y][q] *= (1.0 - omega);
+ f[x][y][q] += omega * eq;
+ }
+
+ /* stream */
+ for(int q = 0; q <= 4; q++) {
+ int next_x = x + d2q9_v[q][0];
+ int next_y = y + d2q9_v[q][1];
+
+ FLOAT tmp = f[x][y][q];
+ f[x][y][q] = f[x][y][q+4];
+ f[x][y][q+4] = f[next_x][next_y][q];
+ f[next_x][next_y][q] = tmp;
+ }
+ }
+ }
+}
+
--- /dev/null
+/* D2Q9 lattice boltzmann functions */
+
+#include "../shared.h"
+
+void init_block (d2q9_block_t);
+
+void collide_and_swap (d2q9_block_t, int x, int y, FLOAT);
+void stream_node (d2q9_block_t, int x, int y);
+
+void collide_and_stream_bulk(d2q9_block_t, FLOAT);
+
+
+
+
--- /dev/null
+/* D2Q9 lattice boltzmann implementation */
+
+#include <e-lib.h>
+#include "../shared.h"
+
+#include <string.h>
+
+#include "lb.h"
+
+/* shared memory overlay */
+volatile shm_t shm SECTION(".shared_dram");
+
+/* local block */
+d2q9_block_t block SECTION(".data_bank2");
+
+void delay(void)
+{
+ for(volatile int i = 0; i < 1000000; i++)
+ for(volatile int j = 0; j < 10; j++)
+ ;
+}
+
+int main()
+{
+ const FLOAT omega = 1.0;
+
+ init_block(block);
+
+ while(1) {
+#if 0
+ /* collide all nodes */
+ for(int x = 0; x < BLOCK_X; x++)
+ for(int y = 0; y < BLOCK_Y; y++)
+ collide_and_swap(block, x, y, omega);
+
+ /* XXX synchronize */
+
+ /* stream all nodes */
+ for(int x = 0; x < BLOCK_X; x++)
+ for(int y = 0; y < BLOCK_Y; y++)
+ stream_node(block, x, y);
+
+ /* XXX synchronize */
+
+#else
+ /* collide boundaries: top, bottom */
+ for(int x = 0; x < BLOCK_X; x++) {
+ collide_and_swap(block, x, 0, omega);
+ collide_and_swap(block, x, BLOCK_Y-1, omega);
+ }
+
+ /* collide boundaries: left, right */
+ for(int y = 1; y < BLOCK_Y-1; y++) {
+ collide_and_swap(block, 0, y, omega);
+ collide_and_swap(block, BLOCK_X-1, y, omega);
+ }
+
+ /* XXX synchronize */
+
+ /* collide and stream the bulk */
+ collide_and_stream_bulk(block, omega);
+
+ /* stream the boundaries: left, right */
+ for(int x = 0; x < BLOCK_X; x++) {
+ stream_node(block, x, 0 );
+ stream_node(block, x, BLOCK_Y-1);
+ }
+
+ /* stream the boundaries: left, right */
+ for(int y = 1; y < BLOCK_Y-1; y++) {
+ stream_node(block, 0, y);
+ stream_node(block, BLOCK_X-1, y);
+ }
+
+ /* XXX synchronize */
+#endif
+
+
+ /* copy grid to shm */
+ memcpy(&shm.lattice[0], &block, sizeof(d2q9_block_t));
+
+ /* flag host */
+ shm.states[0]++;
+ }
+
+ while(1);
+}
+
+/* Host Application */
+
#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <e-hal.h>
+#include "../shared.h"
+
+#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
+#define SHM_OFFSET 0x01000000
+
+static states_t laststates, states; /* old state value */
+static shm_t shm = {{ 0 }}; /* local shm copy */
+
+void write_populations(FILE *file, int core, int iter)
+{
+ for(int x = 0; x < BLOCK_X; x++) {
+ for(int y = 0; y < BLOCK_Y; y++) {
+ fprintf(file, "%03d: [%02d,%02d]: ", iter, x, y);
+ for(int q = 0; q < 9; q++) {
+ fprintf(file, "%.5f\t",
+ shm.lattice[core][x][y][q]);
+ }
+ fprintf(file, "\n");
+ }
+ }
+ fprintf(file, "\n");
+
+ return;
+}
+
+void write_image(int iter)
+{
+ FILE *file; char name[32];
+
+ FLOAT rhos[4][4][BLOCK_X][BLOCK_Y];
+ FLOAT min = 1, max = 0;
+ uint8_t gray;
+
+ snprintf(name, 32, "./tmp/i%06d.ppm", iter);
+ file = fopen(name, "wb");
+ if(!file) exit(-1);
+
+ fprintf(file, "P5\n%d %d\n%d\n", BLOCK_X, BLOCK_Y, 255);
+
+ /* calculate all densities and remember min/max */
+ int cx = 0, cy = 0;
+// for(int cy = 0; cy < 1; cy++) {
+// for(int cx = 0; cx < 1; cx++) {
+ for(int x = 0; x < BLOCK_X; x++) {
+ for(int y = 0; y < BLOCK_Y; y++) {
+ rhos[cy][cx][x][y] = 0;
+ for(int q = 0; q < 9; q++) {
+ rhos[cy][cx][x][y] +=
+ shm.lattice[cy*4+cx][x][y][q];
+ }
+
+ if(rhos[cy][cx][x][y] < min)
+ min = rhos[cy][cx][x][y];
+ if(rhos[cy][cx][x][y] > max)
+ max = rhos[cy][cx][x][y];
+ }
+ }
+// }
+// }
+
+ /* now scale values and write to image file */
+// for(int cy = 0; cy < 4; cy++) {
+// for(int cx = 0; cx < 4; cx++) {
+ for(int x = 0; x < BLOCK_X; x++) {
+ for(int y = 0; y < BLOCK_Y; y++) {
+ gray = (int)(255.*(rhos[cy][cx][x][y]-min) / (max-min));
+ fwrite(&gray, 1, 1, file);
+ }
+ }
+// }
+// }
+
+ fclose(file);
+ if(chown(name, atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) {
+ perror("chown");
+ }
+
+ return;
+}
int main()
{
- printf("Hello, World!\n");
+ char *filename = "bin/lb_2d.srec";
+ FILE *datfile; char *datname = "populations.dat";
+ int dummy, old0 = 0;
+
+ e_epiphany_t dev;
+ e_mem_t mem;
+
+ e_set_host_verbosity(H_D0);
+ e_set_loader_verbosity(L_D0);
+
+ dummy = system("rm ./tmp/i*.ppm ./tmp/anim.gif"); (void)dummy;
+
+ /* overwrite results file */
+ datfile = fopen(datname, "w");
+ if(!datfile)
+ FAIL("Can't write result file: %s\n", strerror(errno));
+
+ /* initialize workgroup, allocate and clear shared memory */
+ if(e_init(NULL) != E_OK)
+ FAIL("Can't init!\n");
+ e_reset_system();
+ if(e_open(&dev, 0, 0, 4, 4) != E_OK)
+ FAIL("Can't open!\n");
+ if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK)
+ FAIL("Can't alloc!\n");
+ if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+ FAIL("Can't clear shm!\n");
+
+ /* load programs */
+ if(e_load(filename, &dev, 0, 0, E_TRUE) != E_OK)
+ FAIL("Can't load!\n");
+
+ /* ================================================================ */
+ while(1) {
+ printf("Polling shared memory.\n");
+
+ while(1) {
+ /* read states */
+ if(e_read(&mem, 0, 0, (off_t)0, &states,
+ sizeof(states_t)) == E_ERR)
+ FAIL("Can't poll!\n");
+
+ /* compare with old values */
+ if(memcmp(&laststates, &states, sizeof(states_t)))
+ break;
+ }
+
+ /* read full shared memory */
+ if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+ FAIL("Can't read shm!\n");
+
+ /* save (updated) states */
+ memcpy(&states, &shm, sizeof(states_t));
+ memcpy(&laststates, &shm, sizeof(states_t));
+
+ /* print states */
+ printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+ states[0], states[1], states[2], states[3]);
+ printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+ states[4], states[5], states[6], states[7]);
+ printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+ states[8], states[9], states[10], states[11]);
+ printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+ states[12], states[13], states[14], states[15]);
+
+ /* write populations */
+ if(states[0] != old0) {
+ write_populations(datfile, 0, states[0]);
+ write_image(states[0]);
+ old0 = states[0];
+ }
+
+ if(states[0] >= 21) break;
+ }
+ /* ================================================================ */
+
+ if(e_free(&mem) != E_OK) FAIL("Can't free!\n");
+ if(e_close(&dev) != E_OK) FAIL("Can't close!\n");
+ if(e_finalize() != E_OK) FAIL("Can't finalize!\n");
+
+ fclose(datfile);
+ if(chown(datname, atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) {
+ FAIL("Can't chown populations!\n");
+ }
+
+ printf("\nProgram finished successfully.\n");
+ printf("Convert ...\n");
+ dummy = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)dummy;
+ if(chown("./tmp/anim.gif", atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) {
+ FAIL("Can't chown anim!\n");
+ }
+
return(0);
}
+
--- /dev/null
+/* shared data types and external memory layout */
+#ifndef _SHARED_H_
+#define _SHARED_H_
+
+#include <stdint.h>
+#ifndef PACKED
+#define PACKED __attribute__((packed))
+#endif /* PACKED */
+
+/* number of cores */
+#define NUM_CORES 16
+
+/* size of per-core subgrid */
+#define BLOCK_X 15
+#define BLOCK_Y 15
+
+/* floating point type */
+typedef float FLOAT;
+
+/* state type */
+typedef uint32_t states_t[NUM_CORES];
+
+/* node and block type (D2Q9) */
+typedef FLOAT d2q9_node_t[9];
+typedef d2q9_node_t d2q9_block_t[BLOCK_X][BLOCK_Y];
+
+/* shared memory structure */
+typedef struct {
+ states_t states;
+ d2q9_block_t lattice[NUM_CORES];
+} PACKED shm_t;
+
+#endif /* _SHARED_H_ */
+