From: Sebastian Date: Tue, 2 Sep 2014 21:53:29 +0000 (+0000) Subject: d3q19: implement optimized bulk collision/stream X-Git-Url: http://sraa.de/git/?a=commitdiff_plain;h=fdc0eabcc440897f32fe561f99d2aee68d680256;p=lattice-boltzmann-epiphany.git d3q19: implement optimized bulk collision/stream --- diff --git a/d3q19/esrc/d3q19.c b/d3q19/esrc/d3q19.c index 4156ad5..2f05354 100644 --- a/d3q19/esrc/d3q19.c +++ b/d3q19/esrc/d3q19.c @@ -45,8 +45,8 @@ void collide(block_t f, int x, int y, int z, FLOAT omega) #if 1 /* Zou/He boundary conditions */ if(row == 0 && z == 0) { /* top */ - const FLOAT Ux = 0.0; /* wall speed */ - const FLOAT Uy = 0.2; + const FLOAT Ux = 0.6; /* wall speed */ + const FLOAT Uy = 0.0; const FLOAT Uz = 0.0; FLOAT rho = ( f[z][y][x][0] + f[z][y][x][1] + f[z][y][x][2] + @@ -221,42 +221,66 @@ void stream(block_t f, int x, int y, int z) /* ================================================================== */ -#if 0 -void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega) +void bulk(block_t f, FLOAT omega) { - /* don't touch the border nodes */ - for(int x = 1; x < BLOCKS_X-1; x++) { - for(int y = 1; y < BLOCKS_Y-1; y++) { - /* macroscopic */ - FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] + - f[y][x][3] + f[y][x][4] + f[y][x][5] + - f[y][x][6] + f[y][x][7] + f[y][x][8]; - FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] - - f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho; - FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] - - f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho; - FLOAT sqr = 1.5 * (ux*ux + uy*uy); - - /* update node */ - for(int q = 0; q < 9; q++) { - FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1]; - FLOAT eq = rho * d2q9_w[q] * - (1. + 3. * cu + 4.5 * cu*cu - sqr); - f[y][x][q] *= (1.0 - omega); - f[y][x][q] += omega * eq; - } - - /* stream */ - for(int q = 0; q <= 4; q++) { - int next_x = x + d2q9_v[q][0]; - int next_y = y + d2q9_v[q][1]; - - FLOAT tmp = f[y][x][q]; - f[y][x][q] = f[y][x][q+4]; - f[y][x][q+4] = f[next_y][next_x][q]; - f[next_y][next_x][q] = tmp; - } + /* don't touch border nodes */ + for(int z = 1; z < NODES_Z-1; z++) + for(int y = 1; y < NODES_Y-1; y++) + for(int x = 1; x < NODES_X-1; x++) + { + /* macroscopic */ + FLOAT rho = + f[z][y][x][0] + f[z][y][x][1] + f[z][y][x][2] + + f[z][y][x][3] + f[z][y][x][4] + f[z][y][x][5] + + f[z][y][x][6] + f[z][y][x][7] + f[z][y][x][8] + + f[z][y][x][9] + f[z][y][x][10] + f[z][y][x][11] + + f[z][y][x][12] + f[z][y][x][13] + f[z][y][x][14] + + f[z][y][x][15] + f[z][y][x][16] + f[z][y][x][17] + + f[z][y][x][18]; + + FLOAT ux = ( + f[z][y][x][10] + f[z][y][x][13] + f[z][y][x][14] + + f[z][y][x][15] + f[z][y][x][16] - f[z][y][x][1] - + f[z][y][x][4] - f[z][y][x][5] - f[z][y][x][6] - + f[z][y][x][7] ) / rho; + + FLOAT uy = ( + f[z][y][x][5] + f[z][y][x][11] + f[z][y][x][13] + + f[z][y][x][17] + f[z][y][x][18] - f[z][y][x][2] - + f[z][y][x][4] - f[z][y][x][8] - f[z][y][x][9] - + f[z][y][x][14] ) / rho; + + FLOAT uz = ( + f[z][y][x][7] + f[z][y][x][9] + f[z][y][x][12] + + f[z][y][x][15] + f[z][y][x][17] - f[z][y][x][3] - + f[z][y][x][6] - f[z][y][x][8] - f[z][y][x][16] - + f[z][y][x][18] ) / rho; + + FLOAT sqr = 1.5 * (ux*ux + uy*uy + uz*uz); + + /* update node */ + for(int q = 0; q < 19; q++) { + FLOAT cu = + ux*d3q19_v[q][0] + + uy*d3q19_v[q][1] + + uz*d3q19_v[q][2]; + FLOAT eq = rho * d3q19_w[q] * + (1. + 3. * cu + 4.5 * cu*cu - sqr); + f[z][y][x][q] *= (1.0 - omega); + f[z][y][x][q] += omega * eq; + } + + /* stream */ + for(int q = 0; q < 9; q++) { + int next_x = x + d3q19_v[q][0]; + int next_y = y + d3q19_v[q][1]; + int next_z = z + d3q19_v[q][2]; + + FLOAT tmp = f[z][y][x][q]; + f[z][y][x][q] = f[z][y][x][q+9]; + f[z][y][x][q+9] = f[next_z][next_y][next_x][q]; + f[next_z][next_y][next_x][q] = tmp; } } } -#endif + diff --git a/d3q19/esrc/d3q19.h b/d3q19/esrc/d3q19.h index 3b8f57a..7994620 100644 --- a/d3q19/esrc/d3q19.h +++ b/d3q19/esrc/d3q19.h @@ -2,8 +2,8 @@ #include "../shared.h" -void init (block_t); -void collide (block_t, int x, int y, int z, FLOAT); -void stream (block_t, int x, int y, int z); -void collide_stream_bulk(block_t, FLOAT); +void init (block_t); +void collide(block_t, int x, int y, int z, FLOAT); +void stream (block_t, int x, int y, int z); +void bulk (block_t, FLOAT); diff --git a/d3q19/esrc/main.c b/d3q19/esrc/main.c index 625fb67..d54823b 100644 --- a/d3q19/esrc/main.c +++ b/d3q19/esrc/main.c @@ -55,7 +55,7 @@ int main() /* main loop */ for(int iter = 0; iter < 3000; iter++) { -#if 1 +#if 0 /* collide all nodes */ for(int z = 0; z < NODES_Z; z++) for(int y = 0; y < NODES_Y; y++) @@ -72,34 +72,58 @@ int main() stream(*block, x, y, z); #else - /* collide boundaries: top, bottom */ - for(int x = 0; x < BLOCKS_X; x++) { - d2q9_collide(*block, x, 0, omega); - d2q9_collide(*block, x, BLOCKS_Y-1, omega); + /* collide boundaries: top/bottom (Z) */ + for(int y = 0; y < NODES_Y; y++) + for(int x = 0; x < NODES_X; x++) + { + collide(*block, x, y, 0, omega); + collide(*block, x, y, NODES_Z-1, omega); } - /* collide boundaries: left, right */ - for(int y = 1; y < BLOCKS_Y-1; y++) { - d2q9_collide(*block, 0, y, omega); - d2q9_collide(*block, BLOCKS_X-1, y, omega); + /* collide boundaries: front/back (Y) */ + for(int z = 1; z < NODES_Z-1; z++) + for(int x = 0; x < NODES_X; x++) + { + collide(*block, x, 0 , z, omega); + collide(*block, x, NODES_Y-1, z, omega); + } + + /* collide boundaries: left/right (X) */ + for(int z = 1; z < NODES_Z-1; z++) + for(int y = 1; y < NODES_Y-1; y++) + { + collide(*block, 0, y, z, omega); + collide(*block, NODES_X-1, y, z, omega); } /* synchronize */ e_barrier(barriers, tgt_bars); /* collide and stream the bulk */ - d2q9_collide_stream_bulk(*block, omega); + bulk(*block, omega); + + /* stream boundaries: top/bottom (Z) */ + for(int y = 0; y < NODES_Y; y++) + for(int x = 0; x < NODES_X; x++) + { + stream(*block, x, y, 0 ); + stream(*block, x, y, NODES_Z-1); + } - /* stream the boundaries: top, bottom */ - for(int x = 0; x < BLOCKS_X; x++) { - d2q9_stream(*block, x, 0 ); - d2q9_stream(*block, x, BLOCKS_Y-1); + /* stream boundaries: front/back (Y) */ + for(int z = 1; z < NODES_Z-1; z++) + for(int x = 0; x < NODES_X; x++) + { + stream(*block, x, 0, z); + stream(*block, x, NODES_Y-1, z); } - /* stream the boundaries: left, right */ - for(int y = 1; y < BLOCKS_Y-1; y++) { - d2q9_stream(*block, 0, y); - d2q9_stream(*block, BLOCKS_X-1, y); + /* stream boundaries: left/right (X) */ + for(int z = 1; z < NODES_Z-1; z++) + for(int y = 1; y < NODES_Y-1; y++) + { + stream(*block, 0, y, z); + stream(*block, NODES_X-1, y, z); } #endif