From 9204792e8833860ba1d44e1d97082b44f2407736 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Wed, 25 Jun 2014 22:05:27 +0000 Subject: [PATCH] lb: D2Q9 working, single bank, single core limited to 10x10 (double precision) or 15x15 (single precision) --- lb/Makefile | 53 +++++++++----- lb/esrc/d2q9.c | 116 ++++++++++++++++++++++++++++++ lb/esrc/lb.h | 14 ++++ lb/esrc/lb_2d.c | 88 +++++++++++++++++++++++ lb/hsrc/main.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++- lb/shared.h | 34 +++++++++ 6 files changed, 468 insertions(+), 19 deletions(-) create mode 100644 lb/esrc/d2q9.c create mode 100644 lb/esrc/lb.h create mode 100644 lb/esrc/lb_2d.c create mode 100644 lb/shared.h diff --git a/lb/Makefile b/lb/Makefile index badbdd8..949e1a6 100644 --- a/lb/Makefile +++ b/lb/Makefile @@ -1,4 +1,4 @@ -# Makefile +# Template Makefile for Epiphany # host toolchain HCC = gcc @@ -13,6 +13,14 @@ ECFLAGS = -Os -std=c99 -falign-loops=8 -falign-functions=8 -Wall ELFLAGS = -T$(EPIPHANY_HOME)/bsps/current/fast.ldf -le-lib EOFLAGS = -R .shared_dram +# host application +HAPP = $(DEST)/ep_main +HOBJS = $(HDEST)/main.o + +# epiphany applications +EAPPS = $(DEST)/lb_2d.srec +ECOMMON = $(EDEST)/d2q9.o + # folders HSRC = hsrc HDEST = hobj @@ -20,19 +28,27 @@ ESRC = esrc EDEST = eobj DEST = bin -# applications (host binary and epiphany SREC files) to build -HAPP = $(DEST)/ep_main -EAPPS = # $(DEST)/test.srec - -# object files to build -HOBJS = $(HDEST)/main.o -EOBJS = # $(EDEST)/test.o +# === Magic begins here =================================================== +EOBJS = $(EAPPS:$(DEST)%srec=$(EDEST)%o) $(ECOMMON) +EELFS = $(EAPPS:$(DEST)%srec=$(EDEST)%elf) -# === Rules =============================================================== .SECONDARY: -.PHONY: all host target folders run clean - -all: run +.PHONY: all help host target folders run clean + +# === Phony Rules ========================================================= +help: + @echo $(EOBJS) + @$(ECHO) + @$(ECHO) "Epiphany Makefile - Help" + @$(ECHO) " help show this help" + @$(ECHO) " host build host application ($(HAPP))" + @$(ECHO) " target build epiphany applications ($(EAPPS))" + @$(ECHO) " all build all" + @$(ECHO) " run build all, then run host application" + @$(ECHO) " clean remove applications and intermediate files" + @$(ECHO) + +all: host target host: folders $(HAPP) @@ -48,8 +64,9 @@ run: host target clean: @$(ECHO) "\tCLEAN" - @rm -v -f $(HOBJS) $(EOBJS) $(EAPPS) $(HAPP) - @rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) + @rm -v -f $(HAPP) $(HOBJS) $(EAPPS) $(EELFS) $(EOBJS) + @-rmdir -v --ignore-fail-on-non-empty $(HDEST) $(EDEST) $(DEST) \ + 2>/dev/null $(HDEST): @$(ECHO) "\tMKDIR $(HDEST)" @@ -63,7 +80,7 @@ $(DEST): @$(ECHO) "\tMKDIR $(DEST)" @mkdir -p $(DEST) -# === Host Toolchain ====================================================== +# === Host Rules ========================================================== $(HAPP): $(HOBJS) @$(ECHO) "\t(HOST) LINK\t$@" @$(HCC) -o $@ $^ $(HLFLAGS) @@ -72,14 +89,14 @@ $(HDEST)/%.o: $(HSRC)/%.c @$(ECHO) "\t(HOST) CC\t$@" @$(HCC) $(HCFLAGS) -c -o $@ $^ -# === Target Toolchain ==================================================== +# === Target Rules ======================================================== $(DEST)/%.srec: $(EDEST)/%.elf @$(ECHO) "\t(TARGET) OBJCOPY $@" @$(EOC) $(EOFLAGS) --output-target srec --srec-forceS3 $^ $@ -$(EDEST)/%.elf: $(EDEST)/%.o +$(EDEST)/%.elf: $(EDEST)/%.o $(ECOMMON) @$(ECHO) "\t(TARGET) LINK\t$@" - @$(ECC) -o $@ $^ $(ECFLAGS) + @$(ECC) -o $@ $^ $(ELFLAGS) $(EDEST)/%.o: $(ESRC)/%.c @$(ECHO) "\t(TARGET) CC\t$@" diff --git a/lb/esrc/d2q9.c b/lb/esrc/d2q9.c new file mode 100644 index 0000000..22f5f5f --- /dev/null +++ b/lb/esrc/d2q9.c @@ -0,0 +1,116 @@ +/* D2Q9 lattice boltzmann functions */ + +#include "../shared.h" + +/* velocities */ +static const int d2q9_v[9][2] = { { 0, 0}, + {-1, 1}, {-1, 0}, {-1,-1}, { 0,-1}, + { 1,-1}, { 1, 0}, { 1, 1}, { 0, 1}, +}; + +/* weights */ +static const FLOAT d2q9_w[9] = { 4./9., + 1./36., 1./9., 1./36., 1./9., + 1./36., 1./9., 1./36., 1./9., +}; + +void init_block(d2q9_block_t block) +{ + /* all with rho = 0.1 */ + for(int x = 0; x < BLOCK_X; x++) + for(int y = 0; y < BLOCK_Y; y++) + for(int q = 0; q < 9; q++) + block[x][y][q] = 0.1 * d2q9_w[q]; + + /* except here with 0.2 */ + for(int q = 0; q < 9; q++) + block[0][0][q] = 0.2 * d2q9_w[q]; + + return; +} + +void collide_and_swap(d2q9_block_t f, int x, int y, FLOAT omega) +{ + /* macroscopic */ + FLOAT rho = f[x][y][0] + f[x][y][1] + f[x][y][2] + + f[x][y][3] + f[x][y][4] + f[x][y][5] + + f[x][y][6] + f[x][y][7] + f[x][y][8]; + FLOAT ux = (f[x][y][7] + f[x][y][6] + f[x][y][5] - + f[x][y][1] - f[x][y][2] - f[x][y][3]) / rho; + FLOAT uy = (f[x][y][1] + f[x][y][8] + f[x][y][7] - + f[x][y][3] - f[x][y][4] - f[x][y][5]) / rho; + FLOAT sqr = 1.5 * (ux*ux + uy*uy); + + /* update node */ + for(int q = 0; q < 9; q++) { + FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; + FLOAT eq = rho * d2q9_w[q] * + (1. + 3. * cu + 4.5 * cu*cu - sqr); + f[x][y][q] *= (1.0 - omega); + f[x][y][q] += omega * eq; + } + + /* swap */ + for(int q = 1; q <= 4; q++) { + FLOAT tmp = f[x][y][q]; + f[x][y][q] = f[x][y][q+4]; + f[x][y][q+4] = tmp; + } +} + +void stream_node(d2q9_block_t f, int x, int y) +{ + for(int q = 1; q <= 4; q++) { + int next_x = x + d2q9_v[q][0]; + int next_y = y + d2q9_v[q][1]; + + /* wrap around */ + if(next_x < 0) next_x += BLOCK_X; + else if(next_x >= BLOCK_X) next_x -= BLOCK_X; + if(next_y < 0) next_y += BLOCK_Y; + else if(next_y >= BLOCK_Y) next_y -= BLOCK_Y; + + FLOAT tmp = f[x][y][q+4]; + f[x][y][q+4] = f[next_x][next_y][q]; + f[next_x][next_y][q] = tmp; + } +} + +void collide_and_stream_bulk(d2q9_block_t f, FLOAT omega) +{ + /* don't touch the border nodes */ + for(int x = 1; x < BLOCK_X-1; x++) { + for(int y = 1; y < BLOCK_Y-1; y++) { + /* macroscopic */ + FLOAT rho = f[x][y][0] + f[x][y][1] + f[x][y][2] + + f[x][y][3] + f[x][y][4] + f[x][y][5] + + f[x][y][6] + f[x][y][7] + f[x][y][8]; + FLOAT ux = (f[x][y][7] + f[x][y][6] + f[x][y][5] - + f[x][y][1] - f[x][y][2] - f[x][y][3]) / rho; + FLOAT uy = (f[x][y][1] + f[x][y][8] + f[x][y][7] - + f[x][y][3] - f[x][y][4] - f[x][y][5]) / rho; + FLOAT sqr = 1.5 * (ux*ux + uy*uy); + + /* update node */ + for(int q = 0; q < 9; q++) { + FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; + FLOAT eq = rho * d2q9_w[q] * + (1. + 3. * cu + 4.5 * cu*cu - sqr); + f[x][y][q] *= (1.0 - omega); + f[x][y][q] += omega * eq; + } + + /* stream */ + for(int q = 0; q <= 4; q++) { + int next_x = x + d2q9_v[q][0]; + int next_y = y + d2q9_v[q][1]; + + FLOAT tmp = f[x][y][q]; + f[x][y][q] = f[x][y][q+4]; + f[x][y][q+4] = f[next_x][next_y][q]; + f[next_x][next_y][q] = tmp; + } + } + } +} + diff --git a/lb/esrc/lb.h b/lb/esrc/lb.h new file mode 100644 index 0000000..9c099bf --- /dev/null +++ b/lb/esrc/lb.h @@ -0,0 +1,14 @@ +/* D2Q9 lattice boltzmann functions */ + +#include "../shared.h" + +void init_block (d2q9_block_t); + +void collide_and_swap (d2q9_block_t, int x, int y, FLOAT); +void stream_node (d2q9_block_t, int x, int y); + +void collide_and_stream_bulk(d2q9_block_t, FLOAT); + + + + diff --git a/lb/esrc/lb_2d.c b/lb/esrc/lb_2d.c new file mode 100644 index 0000000..99f6550 --- /dev/null +++ b/lb/esrc/lb_2d.c @@ -0,0 +1,88 @@ +/* D2Q9 lattice boltzmann implementation */ + +#include +#include "../shared.h" + +#include + +#include "lb.h" + +/* shared memory overlay */ +volatile shm_t shm SECTION(".shared_dram"); + +/* local block */ +d2q9_block_t block SECTION(".data_bank2"); + +void delay(void) +{ + for(volatile int i = 0; i < 1000000; i++) + for(volatile int j = 0; j < 10; j++) + ; +} + +int main() +{ + const FLOAT omega = 1.0; + + init_block(block); + + while(1) { +#if 0 + /* collide all nodes */ + for(int x = 0; x < BLOCK_X; x++) + for(int y = 0; y < BLOCK_Y; y++) + collide_and_swap(block, x, y, omega); + + /* XXX synchronize */ + + /* stream all nodes */ + for(int x = 0; x < BLOCK_X; x++) + for(int y = 0; y < BLOCK_Y; y++) + stream_node(block, x, y); + + /* XXX synchronize */ + +#else + /* collide boundaries: top, bottom */ + for(int x = 0; x < BLOCK_X; x++) { + collide_and_swap(block, x, 0, omega); + collide_and_swap(block, x, BLOCK_Y-1, omega); + } + + /* collide boundaries: left, right */ + for(int y = 1; y < BLOCK_Y-1; y++) { + collide_and_swap(block, 0, y, omega); + collide_and_swap(block, BLOCK_X-1, y, omega); + } + + /* XXX synchronize */ + + /* collide and stream the bulk */ + collide_and_stream_bulk(block, omega); + + /* stream the boundaries: left, right */ + for(int x = 0; x < BLOCK_X; x++) { + stream_node(block, x, 0 ); + stream_node(block, x, BLOCK_Y-1); + } + + /* stream the boundaries: left, right */ + for(int y = 1; y < BLOCK_Y-1; y++) { + stream_node(block, 0, y); + stream_node(block, BLOCK_X-1, y); + } + + /* XXX synchronize */ +#endif + + + /* copy grid to shm */ + memcpy(&shm.lattice[0], &block, sizeof(d2q9_block_t)); + + /* flag host */ + shm.states[0]++; + } + + while(1); +} + diff --git a/lb/hsrc/main.c b/lb/hsrc/main.c index 3394467..971425a 100644 --- a/lb/hsrc/main.c +++ b/lb/hsrc/main.c @@ -1,7 +1,187 @@ +/* Host Application */ + #include +#include +#include +#include +#include +#include + +#include +#include "../shared.h" + +#define FAIL(...) { fprintf(stderr, __VA_ARGS__); exit(1); } +#define SHM_OFFSET 0x01000000 + +static states_t laststates, states; /* old state value */ +static shm_t shm = {{ 0 }}; /* local shm copy */ + +void write_populations(FILE *file, int core, int iter) +{ + for(int x = 0; x < BLOCK_X; x++) { + for(int y = 0; y < BLOCK_Y; y++) { + fprintf(file, "%03d: [%02d,%02d]: ", iter, x, y); + for(int q = 0; q < 9; q++) { + fprintf(file, "%.5f\t", + shm.lattice[core][x][y][q]); + } + fprintf(file, "\n"); + } + } + fprintf(file, "\n"); + + return; +} + +void write_image(int iter) +{ + FILE *file; char name[32]; + + FLOAT rhos[4][4][BLOCK_X][BLOCK_Y]; + FLOAT min = 1, max = 0; + uint8_t gray; + + snprintf(name, 32, "./tmp/i%06d.ppm", iter); + file = fopen(name, "wb"); + if(!file) exit(-1); + + fprintf(file, "P5\n%d %d\n%d\n", BLOCK_X, BLOCK_Y, 255); + + /* calculate all densities and remember min/max */ + int cx = 0, cy = 0; +// for(int cy = 0; cy < 1; cy++) { +// for(int cx = 0; cx < 1; cx++) { + for(int x = 0; x < BLOCK_X; x++) { + for(int y = 0; y < BLOCK_Y; y++) { + rhos[cy][cx][x][y] = 0; + for(int q = 0; q < 9; q++) { + rhos[cy][cx][x][y] += + shm.lattice[cy*4+cx][x][y][q]; + } + + if(rhos[cy][cx][x][y] < min) + min = rhos[cy][cx][x][y]; + if(rhos[cy][cx][x][y] > max) + max = rhos[cy][cx][x][y]; + } + } +// } +// } + + /* now scale values and write to image file */ +// for(int cy = 0; cy < 4; cy++) { +// for(int cx = 0; cx < 4; cx++) { + for(int x = 0; x < BLOCK_X; x++) { + for(int y = 0; y < BLOCK_Y; y++) { + gray = (int)(255.*(rhos[cy][cx][x][y]-min) / (max-min)); + fwrite(&gray, 1, 1, file); + } + } +// } +// } + + fclose(file); + if(chown(name, atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) { + perror("chown"); + } + + return; +} int main() { - printf("Hello, World!\n"); + char *filename = "bin/lb_2d.srec"; + FILE *datfile; char *datname = "populations.dat"; + int dummy, old0 = 0; + + e_epiphany_t dev; + e_mem_t mem; + + e_set_host_verbosity(H_D0); + e_set_loader_verbosity(L_D0); + + dummy = system("rm ./tmp/i*.ppm ./tmp/anim.gif"); (void)dummy; + + /* overwrite results file */ + datfile = fopen(datname, "w"); + if(!datfile) + FAIL("Can't write result file: %s\n", strerror(errno)); + + /* initialize workgroup, allocate and clear shared memory */ + if(e_init(NULL) != E_OK) + FAIL("Can't init!\n"); + e_reset_system(); + if(e_open(&dev, 0, 0, 4, 4) != E_OK) + FAIL("Can't open!\n"); + if(e_alloc(&mem, SHM_OFFSET, sizeof(shm_t)) != E_OK) + FAIL("Can't alloc!\n"); + if(e_write(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) + FAIL("Can't clear shm!\n"); + + /* load programs */ + if(e_load(filename, &dev, 0, 0, E_TRUE) != E_OK) + FAIL("Can't load!\n"); + + /* ================================================================ */ + while(1) { + printf("Polling shared memory.\n"); + + while(1) { + /* read states */ + if(e_read(&mem, 0, 0, (off_t)0, &states, + sizeof(states_t)) == E_ERR) + FAIL("Can't poll!\n"); + + /* compare with old values */ + if(memcmp(&laststates, &states, sizeof(states_t))) + break; + } + + /* read full shared memory */ + if(e_read(&mem, 0, 0, (off_t)0, &shm, sizeof(shm_t)) == E_ERR) + FAIL("Can't read shm!\n"); + + /* save (updated) states */ + memcpy(&states, &shm, sizeof(states_t)); + memcpy(&laststates, &shm, sizeof(states_t)); + + /* print states */ + printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n", + states[0], states[1], states[2], states[3]); + printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n", + states[4], states[5], states[6], states[7]); + printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n", + states[8], states[9], states[10], states[11]); + printf("\t0x%08x 0x%08x 0x%08x 0x%08x\n", + states[12], states[13], states[14], states[15]); + + /* write populations */ + if(states[0] != old0) { + write_populations(datfile, 0, states[0]); + write_image(states[0]); + old0 = states[0]; + } + + if(states[0] >= 21) break; + } + /* ================================================================ */ + + if(e_free(&mem) != E_OK) FAIL("Can't free!\n"); + if(e_close(&dev) != E_OK) FAIL("Can't close!\n"); + if(e_finalize() != E_OK) FAIL("Can't finalize!\n"); + + fclose(datfile); + if(chown(datname, atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) { + FAIL("Can't chown populations!\n"); + } + + printf("\nProgram finished successfully.\n"); + printf("Convert ...\n"); + dummy = system("convert ./tmp/i*.ppm ./tmp/anim.gif"); (void)dummy; + if(chown("./tmp/anim.gif", atoi(getenv("SUDO_UID")), atoi(getenv("SUDO_GID")))) { + FAIL("Can't chown anim!\n"); + } + return(0); } + diff --git a/lb/shared.h b/lb/shared.h new file mode 100644 index 0000000..44515d9 --- /dev/null +++ b/lb/shared.h @@ -0,0 +1,34 @@ +/* shared data types and external memory layout */ +#ifndef _SHARED_H_ +#define _SHARED_H_ + +#include +#ifndef PACKED +#define PACKED __attribute__((packed)) +#endif /* PACKED */ + +/* number of cores */ +#define NUM_CORES 16 + +/* size of per-core subgrid */ +#define BLOCK_X 15 +#define BLOCK_Y 15 + +/* floating point type */ +typedef float FLOAT; + +/* state type */ +typedef uint32_t states_t[NUM_CORES]; + +/* node and block type (D2Q9) */ +typedef FLOAT d2q9_node_t[9]; +typedef d2q9_node_t d2q9_block_t[BLOCK_X][BLOCK_Y]; + +/* shared memory structure */ +typedef struct { + states_t states; + d2q9_block_t lattice[NUM_CORES]; +} PACKED shm_t; + +#endif /* _SHARED_H_ */ + -- 2.30.2