lb: D2Q9 working, single bank, single core
authorSebastian <git@sraa.de>
Wed, 25 Jun 2014 22:05:27 +0000 (22:05 +0000)
committerSebastian <git@sraa.de>
Wed, 25 Jun 2014 22:05:27 +0000 (22:05 +0000)
limited to 10x10 (double precision) or 15x15 (single precision)

lb/Makefile
lb/esrc/d2q9.c [new file with mode: 0644]
lb/esrc/lb.h [new file with mode: 0644]
lb/esrc/lb_2d.c [new file with mode: 0644]
lb/hsrc/main.c
lb/shared.h [new file with mode: 0644]

index badbdd8cf368982be3176cceae3c06fe6c2f3d6a..949e1a6b933316acfd249a68d821e299098c5c68 100644 (file)
@@ -1,4 +1,4 @@
-# Makefile
+# Template Makefile for Epiphany
 
 # host toolchain
 HCC    = gcc
@@ -13,6 +13,14 @@ ECFLAGS      = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall
 ELFLAGS        = -T$(EPIPHANY_HOME)/bsps/current/fast.ldf -le-lib
 EOFLAGS        = -R .shared_dram
 
+# host application
+HAPP   = $(DEST)/ep_main
+HOBJS  = $(HDEST)/main.o
+
+# epiphany applications
+EAPPS  = $(DEST)/lb_2d.srec
+ECOMMON        = $(EDEST)/d2q9.o
+
 # folders
 HSRC   = hsrc
 HDEST  = hobj
@@ -20,19 +28,27 @@ ESRC        = esrc
 EDEST  = eobj
 DEST   = bin
 
-# applications (host binary and epiphany SREC files) to build
-HAPP   = $(DEST)/ep_main
-EAPPS  = # $(DEST)/test.srec
-
-# object files to build
-HOBJS  = $(HDEST)/main.o
-EOBJS  = # $(EDEST)/test.o
+# === Magic begins here ===================================================
+EOBJS  = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON)
+EELFS  = $(EAPPS:$(DEST)%srec=$(EDEST)%elf)
 
-# === Rules ===============================================================
 .SECONDARY:
-.PHONY: all host target folders run clean
-
-all: run
+.PHONY: all help host target folders run clean
+
+# === Phony Rules =========================================================
+help:
+       @echo $(EOBJS)
+       @$(ECHO)
+       @$(ECHO) "Epiphany Makefile - Help"
+       @$(ECHO) "  help    show this help"
+       @$(ECHO) "  host    build host application      ($(HAPP))"
+       @$(ECHO) "  target  build epiphany applications ($(EAPPS))"
+       @$(ECHO) "  all     build all"
+       @$(ECHO) "  run     build all, then run host application"
+       @$(ECHO) "  clean   remove applications and intermediate files"
+       @$(ECHO)
+
+all: host target
 
 host: folders $(HAPP)
 
@@ -48,8 +64,9 @@ run: host target
 
 clean:
        @$(ECHO) "\tCLEAN"
-       @rm -v -f $(HOBJS) $(EOBJS) $(EAPPS) $(HAPP)
-       @rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST)
+       @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS)
+       @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \
+               2>/dev/null
 
 $(HDEST):
        @$(ECHO) "\tMKDIR $(HDEST)"
@@ -63,7 +80,7 @@ $(DEST):
        @$(ECHO) "\tMKDIR $(DEST)"
        @mkdir -p $(DEST)
 
-# === Host Toolchain ======================================================
+# === Host Rules ==========================================================
 $(HAPP): $(HOBJS)
        @$(ECHO) "\t(HOST)   LINK\t$@"
        @$(HCC) -o $@ $^ $(HLFLAGS)
@@ -72,14 +89,14 @@ $(HDEST)/%.o: $(HSRC)/%.c
        @$(ECHO) "\t(HOST)   CC\t$@"
        @$(HCC) $(HCFLAGS) -c -o $@ $^
 
-# === Target Toolchain ====================================================
+# === Target Rules ========================================================
 $(DEST)/%.srec: $(EDEST)/%.elf
        @$(ECHO) "\t(TARGET) OBJCOPY $@"
        @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@
 
-$(EDEST)/%.elf: $(EDEST)/%.o
+$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON)
        @$(ECHO) "\t(TARGET) LINK\t$@"
-       @$(ECC) -o $@ $^ $(ECFLAGS)
+       @$(ECC) -o $@ $^ $(ELFLAGS)
 
 $(EDEST)/%.o: $(ESRC)/%.c
        @$(ECHO) "\t(TARGET) CC\t$@"
diff --git a/lb/esrc/d2q9.c b/lb/esrc/d2q9.c
new file mode 100644 (file)
index 0000000..22f5f5f
--- /dev/null
@@ -0,0 +1,116 @@
+/* D2Q9 lattice boltzmann functions */
+
+#include "../shared.h"
+
+/* velocities */
+static const int d2q9_v[9][2] = { { 0, 0},
+       {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1},
+       { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1},
+};
+
+/* weights */
+static const FLOAT d2q9_w[9] = { 4./9.,
+       1./36., 1./9., 1./36., 1./9.,
+       1./36., 1./9., 1./36., 1./9.,
+};
+
+void init_block(d2q9_block_t block)
+{
+       /* all with rho = 0.1 */
+       for(int x = 0; x < BLOCK_X; x++)
+               for(int y = 0; y < BLOCK_Y; y++)
+                       for(int q = 0; q < 9; q++)
+                               block[x][y][q] = 0.1 * d2q9_w[q];
+
+       /* except here with 0.2 */
+       for(int q = 0; q < 9; q++)
+               block[0][0][q] = 0.2 * d2q9_w[q];
+
+       return;
+}
+
+void collide_and_swap(d2q9_block_t f, int x, int y, FLOAT omega)
+{
+       /* macroscopic */
+       FLOAT rho = f[x][y][0] + f[x][y][1] + f[x][y][2] +
+               f[x][y][3] + f[x][y][4] + f[x][y][5] +
+               f[x][y][6] + f[x][y][7] + f[x][y][8];
+       FLOAT ux = (f[x][y][7] + f[x][y][6] + f[x][y][5] -
+               f[x][y][1] - f[x][y][2] - f[x][y][3]) / rho;
+       FLOAT uy = (f[x][y][1] + f[x][y][8] + f[x][y][7] -
+               f[x][y][3] - f[x][y][4] - f[x][y][5]) / rho;
+       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+       /* update node */
+       for(int q = 0; q < 9; q++) {
+               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+               FLOAT eq = rho * d2q9_w[q] *
+                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
+               f[x][y][q] *= (1.0 - omega);
+               f[x][y][q] += omega * eq;
+       }
+
+       /* swap */
+       for(int q = 1; q <= 4; q++) {
+               FLOAT tmp    = f[x][y][q];
+               f[x][y][q]   = f[x][y][q+4];
+               f[x][y][q+4] = tmp;
+       }
+}
+
+void stream_node(d2q9_block_t f, int x, int y)
+{
+       for(int q = 1; q <= 4; q++) {
+               int next_x = x + d2q9_v[q][0];
+               int next_y = y + d2q9_v[q][1];
+
+               /* wrap around */
+               if(next_x < 0)             next_x += BLOCK_X;
+               else if(next_x >= BLOCK_X) next_x -= BLOCK_X;
+               if(next_y < 0)             next_y += BLOCK_Y;
+               else if(next_y >= BLOCK_Y) next_y -= BLOCK_Y;
+
+               FLOAT tmp    = f[x][y][q+4];
+               f[x][y][q+4] = f[next_x][next_y][q];
+               f[next_x][next_y][q] = tmp;
+       }
+}
+
+void collide_and_stream_bulk(d2q9_block_t f, FLOAT omega)
+{
+       /* don't touch the border nodes */
+       for(int x = 1; x < BLOCK_X-1; x++) {
+               for(int y = 1; y < BLOCK_Y-1; y++) {
+                       /* macroscopic */
+                       FLOAT rho = f[x][y][0] + f[x][y][1] + f[x][y][2] +
+                               f[x][y][3] + f[x][y][4] + f[x][y][5] +
+                               f[x][y][6] + f[x][y][7] + f[x][y][8];
+                       FLOAT ux = (f[x][y][7] + f[x][y][6] + f[x][y][5] -
+                               f[x][y][1] - f[x][y][2] - f[x][y][3]) / rho;
+                       FLOAT uy = (f[x][y][1] + f[x][y][8] + f[x][y][7] -
+                               f[x][y][3] - f[x][y][4] - f[x][y][5]) / rho;
+                       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
+
+                       /* update node */
+                       for(int q = 0; q < 9; q++) {
+                               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
+                               FLOAT eq = rho * d2q9_w[q] *
+                                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
+                               f[x][y][q] *= (1.0 - omega);
+                               f[x][y][q] += omega * eq;
+                       }
+
+                       /* stream */
+                       for(int q = 0; q <= 4; q++) {
+                               int next_x = x + d2q9_v[q][0];
+                               int next_y = y + d2q9_v[q][1];
+
+                               FLOAT tmp     = f[x][y][q];
+                               f[x][y][q]   = f[x][y][q+4];
+                               f[x][y][q+4] = f[next_x][next_y][q];
+                               f[next_x][next_y][q] = tmp;
+                       }
+               }
+       }
+}
+
diff --git a/lb/esrc/lb.h b/lb/esrc/lb.h
new file mode 100644 (file)
index 0000000..9c099bf
--- /dev/null
@@ -0,0 +1,14 @@
+/* D2Q9 lattice boltzmann functions */
+
+#include "../shared.h"
+
+void init_block             (d2q9_block_t);
+
+void collide_and_swap       (d2q9_block_t, int x, int y,  FLOAT);
+void stream_node            (d2q9_block_t, int x, int y);
+
+void collide_and_stream_bulk(d2q9_block_t, FLOAT);
+
+
+
+
diff --git a/lb/esrc/lb_2d.c b/lb/esrc/lb_2d.c
new file mode 100644 (file)
index 0000000..99f6550
--- /dev/null
@@ -0,0 +1,88 @@
+/* D2Q9 lattice boltzmann implementation */
+
+#include <e-lib.h>
+#include "../shared.h"
+
+#include <string.h>
+
+#include "lb.h"
+
+/* shared memory overlay */
+volatile shm_t shm SECTION(".shared_dram");
+
+/* local block */
+d2q9_block_t block SECTION(".data_bank2");
+
+void delay(void)
+{
+       for(volatile int i = 0; i < 1000000; i++)
+               for(volatile int j = 0; j < 10; j++)
+                       ;
+}
+
+int main()
+{
+       const FLOAT omega = 1.0;
+
+       init_block(block);
+
+       while(1) {
+#if 0
+               /* collide all nodes */
+               for(int x = 0; x < BLOCK_X; x++)
+                       for(int y = 0; y < BLOCK_Y; y++)
+                               collide_and_swap(block, x, y, omega);
+
+               /* XXX synchronize */
+
+               /* stream all nodes */
+               for(int x = 0; x < BLOCK_X; x++)
+                       for(int y = 0; y < BLOCK_Y; y++)
+                               stream_node(block, x, y);
+
+               /* XXX synchronize */
+
+#else
+               /* collide boundaries: top, bottom */
+               for(int x = 0; x < BLOCK_X; x++) {
+                       collide_and_swap(block, x, 0,         omega);
+                       collide_and_swap(block, x, BLOCK_Y-1, omega);
+               }
+
+               /* collide boundaries: left, right */
+               for(int y = 1; y < BLOCK_Y-1; y++) {
+                       collide_and_swap(block, 0,         y, omega);
+                       collide_and_swap(block, BLOCK_X-1, y, omega);
+               }
+
+               /* XXX synchronize */
+
+               /* collide and stream the bulk */
+               collide_and_stream_bulk(block, omega);
+
+               /* stream the boundaries: left, right */
+               for(int x = 0; x < BLOCK_X; x++) {
+                       stream_node(block, x, 0        );
+                       stream_node(block, x, BLOCK_Y-1);
+               }
+
+               /* stream the boundaries: left, right */
+               for(int y = 1; y < BLOCK_Y-1; y++) {
+                       stream_node(block, 0,         y);
+                       stream_node(block, BLOCK_X-1, y);
+               }
+
+               /* XXX synchronize */
+#endif
+
+
+               /* copy grid to shm */
+               memcpy(&shm.lattice[0], &block, sizeof(d2q9_block_t));
+
+               /* flag host */
+               shm.states[0]++;
+       }
+
+       while(1);
+}
+
index 3394467e708ca573e0714ea5534bfb6962fc4605..971425a9c12fe74f419a630057dae8c19adff35b 100644 (file)
@@ -1,7 +1,187 @@
+/* Host Application */
+
 #include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <e-hal.h>
+#include "../shared.h"
+
+#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); }
+#define SHM_OFFSET 0x01000000
+
+static states_t laststates, states;    /* old state value */
+static shm_t    shm = {{ 0 }};         /* local shm copy */
+
+void write_populations(FILE *file, int core, int iter)
+{
+       for(int x = 0; x < BLOCK_X; x++) {
+               for(int y = 0; y < BLOCK_Y; y++) {
+                       fprintf(file, "%03d: [%02d,%02d]: ", iter, x, y);
+                       for(int q = 0; q < 9; q++) {
+                               fprintf(file, "%.5f\t",
+                                       shm.lattice[core][x][y][q]);
+                       }
+                       fprintf(file, "\n");
+               }
+       }
+       fprintf(file, "\n");
+
+       return;
+}
+
+void write_image(int iter)
+{
+       FILE *file; char name[32];
+
+       FLOAT rhos[4][4][BLOCK_X][BLOCK_Y];
+       FLOAT min = 1, max = 0;
+       uint8_t gray;
+
+       snprintf(name, 32, "./tmp/i%06d.ppm", iter);
+       file = fopen(name, "wb");
+       if(!file) exit(-1);
+
+       fprintf(file, "P5\n%d %d\n%d\n", BLOCK_X, BLOCK_Y, 255);
+
+       /* calculate all densities and remember min/max */
+       int cx = 0, cy = 0;
+//     for(int cy = 0; cy < 1; cy++) {
+//             for(int cx = 0; cx < 1; cx++) {
+                       for(int x = 0; x < BLOCK_X; x++) {
+                               for(int y = 0; y < BLOCK_Y; y++) {
+                                       rhos[cy][cx][x][y] = 0;
+                                       for(int q = 0; q < 9; q++) {
+                                               rhos[cy][cx][x][y] +=
+                                               shm.lattice[cy*4+cx][x][y][q];
+                                       }
+
+                                       if(rhos[cy][cx][x][y] < min)
+                                               min = rhos[cy][cx][x][y];
+                                       if(rhos[cy][cx][x][y] > max)
+                                               max = rhos[cy][cx][x][y];
+                               }
+                       }
+//             }
+//     }
+
+       /* now scale values and write to image file */
+//     for(int cy = 0; cy < 4; cy++) {
+//             for(int cx = 0; cx < 4; cx++) {
+                       for(int x = 0; x < BLOCK_X; x++) {
+                               for(int y = 0; y < BLOCK_Y; y++) {
+                                       gray = (int)(255.*(rhos[cy][cx][x][y]-min) / (max-min));
+                                       fwrite(&gray, 1, 1, file);
+                               }
+                       }
+//             }
+//     }
+
+       fclose(file);
+       if(chown(name, atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) {
+               perror("chown");
+       }
+
+       return;
+}
 
 int main()
 {
-       printf("Hello, World!\n");
+       char *filename = "bin/lb_2d.srec";
+       FILE *datfile; char *datname = "populations.dat";
+       int dummy, old0 = 0;
+
+       e_epiphany_t dev;
+       e_mem_t      mem;
+
+       e_set_host_verbosity(H_D0);
+       e_set_loader_verbosity(L_D0);
+
+       dummy = system("rm ./tmp/i*.ppm ./tmp/anim.gif"); (void)dummy;
+
+       /* overwrite results file */
+       datfile = fopen(datname, "w");
+       if(!datfile)
+               FAIL("Can't write result file: %s\n", strerror(errno));
+
+       /* initialize workgroup, allocate and clear shared memory */
+       if(e_init(NULL) != E_OK)
+               FAIL("Can't init!\n");
+       e_reset_system();
+       if(e_open(&dev, 0, 0, 4, 4) != E_OK)
+               FAIL("Can't open!\n");
+       if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK)
+               FAIL("Can't alloc!\n");
+       if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+               FAIL("Can't clear shm!\n");
+
+       /* load programs */
+       if(e_load(filename, &dev, 0, 0, E_TRUE) != E_OK)
+               FAIL("Can't load!\n");
+
+       /* ================================================================ */
+       while(1) {
+               printf("Polling shared memory.\n");
+
+               while(1) {
+                       /* read states */
+                       if(e_read(&mem, 0, 0, (off_t)0, &states,
+                               sizeof(states_t)) == E_ERR)
+                                       FAIL("Can't poll!\n");
+
+                       /* compare with old values */
+                       if(memcmp(&laststates, &states, sizeof(states_t)))
+                               break;
+               }
+
+               /* read full shared memory */
+               if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR)
+                       FAIL("Can't read shm!\n");
+
+               /* save (updated) states */
+               memcpy(&states,     &shm, sizeof(states_t));
+               memcpy(&laststates, &shm, sizeof(states_t));
+
+               /* print states */
+               printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+                       states[0],  states[1],  states[2],  states[3]);
+               printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+                       states[4],  states[5],  states[6],  states[7]);
+               printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+                       states[8],  states[9],  states[10], states[11]);
+               printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n",
+                       states[12], states[13], states[14], states[15]);
+
+               /* write populations */
+               if(states[0] != old0) {
+                       write_populations(datfile, 0, states[0]);
+                       write_image(states[0]);
+                       old0 = states[0];
+               }
+
+               if(states[0] >= 21) break;
+       }
+       /* ================================================================ */
+
+       if(e_free(&mem)  != E_OK) FAIL("Can't free!\n");
+       if(e_close(&dev) != E_OK) FAIL("Can't close!\n");
+       if(e_finalize()  != E_OK) FAIL("Can't finalize!\n");
+
+       fclose(datfile);
+       if(chown(datname, atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) {
+                       FAIL("Can't chown populations!\n");
+       }
+
+       printf("\nProgram finished successfully.\n");
+       printf("Convert ...\n");
+       dummy = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)dummy;
+       if(chown("./tmp/anim.gif", atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) {
+               FAIL("Can't chown anim!\n");
+       }
+
        return(0);
 }
+
diff --git a/lb/shared.h b/lb/shared.h
new file mode 100644 (file)
index 0000000..44515d9
--- /dev/null
@@ -0,0 +1,34 @@
+/* shared data types and external memory layout */
+#ifndef _SHARED_H_
+#define _SHARED_H_
+
+#include <stdint.h>
+#ifndef PACKED
+#define PACKED __attribute__((packed))
+#endif /* PACKED */
+
+/* number of cores */
+#define NUM_CORES 16
+
+/* size of per-core subgrid */
+#define BLOCK_X 15
+#define BLOCK_Y 15
+
+/* floating point type */
+typedef float FLOAT;
+
+/* state type */
+typedef uint32_t states_t[NUM_CORES];
+
+/* node and block type (D2Q9) */
+typedef FLOAT       d2q9_node_t[9];
+typedef d2q9_node_t d2q9_block_t[BLOCK_X][BLOCK_Y];
+
+/* shared memory structure */
+typedef struct {
+       states_t     states;
+       d2q9_block_t lattice[NUM_CORES];
+} PACKED shm_t;
+
+#endif /* _SHARED_H_ */
+