d3q19: implement optimized bulk collision/stream
authorSebastian <git@sraa.de>
Tue, 2 Sep 2014 21:53:29 +0000 (21:53 +0000)
committerSebastian <git@sraa.de>
Tue, 2 Sep 2014 21:53:29 +0000 (21:53 +0000)
d3q19/esrc/d3q19.c
d3q19/esrc/d3q19.h
d3q19/esrc/main.c

index 4156ad5d6c766de7353aa456893722322656e9fe..2f053540889ca73efb0ca40100aa553a5fc797b1 100644 (file)
@@ -45,8 +45,8 @@ void collide(block_t f, int x, int y, int z, FLOAT omega)
 #if 1
        /* Zou/He boundary conditions */
        if(row == 0 && z == 0) {                                /* top */
-               const FLOAT Ux = 0.0;   /* wall speed */
-               const FLOAT Uy = 0.2;
+               const FLOAT Ux = 0.6;   /* wall speed */
+               const FLOAT Uy = 0.0;
                const FLOAT Uz = 0.0;
 
                FLOAT rho = ( f[z][y][x][0]  + f[z][y][x][1]  + f[z][y][x][2]  +
@@ -221,42 +221,66 @@ void stream(block_t f, int x, int y, int z)
 
 /* ================================================================== */
 
-#if 0
-void d2q9_collide_stream_bulk(d2q9_block_t f, FLOAT omega)
+void bulk(block_t f, FLOAT omega)
 {
-       /* don't touch the border nodes */
-       for(int x = 1; x < BLOCKS_X-1; x++) {
-               for(int y = 1; y < BLOCKS_Y-1; y++) {
-                       /* macroscopic */
-                       FLOAT rho = f[y][x][0] + f[y][x][1] + f[y][x][2] +
-                               f[y][x][3] + f[y][x][4] + f[y][x][5] +
-                               f[y][x][6] + f[y][x][7] + f[y][x][8];
-                       FLOAT ux = (f[y][x][7] + f[y][x][6] + f[y][x][5] -
-                               f[y][x][1] - f[y][x][2] - f[y][x][3]) / rho;
-                       FLOAT uy = (f[y][x][1] + f[y][x][8] + f[y][x][7] -
-                               f[y][x][3] - f[y][x][4] - f[y][x][5]) / rho;
-                       FLOAT sqr = 1.5 * (ux*ux + uy*uy);
-
-                       /* update node */
-                       for(int q = 0; q < 9; q++) {
-                               FLOAT cu = ux*d2q9_v[q][0] + uy*d2q9_v[q][1];
-                               FLOAT eq = rho * d2q9_w[q] *
-                                       (1. + 3. * cu + 4.5 * cu*cu - sqr);
-                               f[y][x][q] *= (1.0 - omega);
-                               f[y][x][q] += omega * eq;
-                       }
-
-                       /* stream */
-                       for(int q = 0; q <= 4; q++) {
-                               int next_x = x + d2q9_v[q][0];
-                               int next_y = y + d2q9_v[q][1];
-
-                               FLOAT tmp    = f[y][x][q];
-                               f[y][x][q]   = f[y][x][q+4];
-                               f[y][x][q+4] = f[next_y][next_x][q];
-                               f[next_y][next_x][q] = tmp;
-                       }
+       /* don't touch border nodes */
+       for(int z = 1; z < NODES_Z-1; z++)
+               for(int y = 1; y < NODES_Y-1; y++)
+                       for(int x = 1; x < NODES_X-1; x++)
+       {
+               /* macroscopic */
+               FLOAT rho =
+                       f[z][y][x][0] + f[z][y][x][1] + f[z][y][x][2] +
+                       f[z][y][x][3] + f[z][y][x][4] + f[z][y][x][5] +
+                       f[z][y][x][6] + f[z][y][x][7] + f[z][y][x][8] +
+                       f[z][y][x][9] + f[z][y][x][10] + f[z][y][x][11] +
+                       f[z][y][x][12] + f[z][y][x][13] + f[z][y][x][14] +
+                       f[z][y][x][15] + f[z][y][x][16] + f[z][y][x][17] +
+                       f[z][y][x][18];
+
+               FLOAT ux = (
+                       f[z][y][x][10] + f[z][y][x][13] + f[z][y][x][14] +
+                       f[z][y][x][15] + f[z][y][x][16] - f[z][y][x][1] -
+                       f[z][y][x][4]  - f[z][y][x][5]  - f[z][y][x][6] -
+                       f[z][y][x][7] ) / rho;
+
+               FLOAT uy = (
+                       f[z][y][x][5]  + f[z][y][x][11] + f[z][y][x][13] +
+                       f[z][y][x][17] + f[z][y][x][18] - f[z][y][x][2]  -
+                       f[z][y][x][4]  - f[z][y][x][8]  - f[z][y][x][9]  -
+                       f[z][y][x][14] ) / rho;
+
+               FLOAT uz = (
+                       f[z][y][x][7]  + f[z][y][x][9]  + f[z][y][x][12] +
+                       f[z][y][x][15] + f[z][y][x][17] - f[z][y][x][3]  -
+                       f[z][y][x][6]  - f[z][y][x][8]  - f[z][y][x][16] -
+                       f[z][y][x][18] ) / rho;
+
+               FLOAT sqr = 1.5 * (ux*ux + uy*uy + uz*uz);
+
+               /* update node */
+               for(int q = 0; q < 19; q++) {
+                       FLOAT cu =
+                               ux*d3q19_v[q][0] +
+                               uy*d3q19_v[q][1] +
+                               uz*d3q19_v[q][2];
+                       FLOAT eq = rho * d3q19_w[q] *
+                               (1. + 3. * cu + 4.5 * cu*cu - sqr);
+                       f[z][y][x][q] *= (1.0 - omega);
+                       f[z][y][x][q] += omega * eq;
+               }
+
+               /* stream */
+               for(int q = 0; q < 9; q++) {
+                       int next_x = x + d3q19_v[q][0];
+                       int next_y = y + d3q19_v[q][1];
+                       int next_z = z + d3q19_v[q][2];
+
+                       FLOAT tmp       = f[z][y][x][q];
+                       f[z][y][x][q]   = f[z][y][x][q+9];
+                       f[z][y][x][q+9] = f[next_z][next_y][next_x][q];
+                       f[next_z][next_y][next_x][q] = tmp;
                }
        }
 }
-#endif
+
index 3b8f57a7b93019f92f6af618df74c79f2102551e..79946209817f33b681d5cf6ec2f5cc1d838c75ee 100644 (file)
@@ -2,8 +2,8 @@
 
 #include "../shared.h"
 
-void init               (block_t);
-void collide            (block_t, int x, int y, int z,  FLOAT);
-void stream             (block_t, int x, int y, int z);
-void collide_stream_bulk(block_t, FLOAT);
+void init   (block_t);
+void collide(block_t, int x, int y, int z,  FLOAT);
+void stream (block_t, int x, int y, int z);
+void bulk   (block_t, FLOAT);
 
index 625fb6740c8044feee7a38e43c838dba1141510d..d54823be5a7e07f5215c67b54fcc43d848f826cb 100644 (file)
@@ -55,7 +55,7 @@ int main()
 
        /* main loop */
        for(int iter = 0; iter < 3000; iter++) {
-#if 1
+#if 0
                /* collide all nodes */
                for(int z = 0; z < NODES_Z; z++)
                        for(int y = 0; y < NODES_Y; y++)
@@ -72,34 +72,58 @@ int main()
                                        stream(*block, x, y, z);
 
 #else
-               /* collide boundaries: top, bottom */
-               for(int x = 0; x < BLOCKS_X; x++) {
-                       d2q9_collide(*block, x, 0,          omega);
-                       d2q9_collide(*block, x, BLOCKS_Y-1, omega);
+               /* collide boundaries: top/bottom (Z) */
+               for(int y = 0; y < NODES_Y; y++)
+                       for(int x = 0; x < NODES_X; x++)
+               {
+                       collide(*block, x, y, 0,         omega);
+                       collide(*block, x, y, NODES_Z-1, omega);
                }
 
-               /* collide boundaries: left, right */
-               for(int y = 1; y < BLOCKS_Y-1; y++) {
-                       d2q9_collide(*block, 0,         y, omega);
-                       d2q9_collide(*block, BLOCKS_X-1, y, omega);
+               /* collide boundaries: front/back (Y) */
+               for(int z = 1; z < NODES_Z-1; z++)
+                       for(int x = 0; x < NODES_X; x++)
+               {
+                       collide(*block, x, 0      ,   z, omega);
+                       collide(*block, x, NODES_Y-1, z, omega);
+               }
+
+               /* collide boundaries: left/right (X) */
+               for(int z = 1; z < NODES_Z-1; z++)
+                       for(int y = 1; y < NODES_Y-1; y++)
+               {
+                       collide(*block, 0,         y, z, omega);
+                       collide(*block, NODES_X-1, y, z, omega);
                }
 
                /* synchronize */
                e_barrier(barriers, tgt_bars);
 
                /* collide and stream the bulk */
-               d2q9_collide_stream_bulk(*block, omega);
+               bulk(*block, omega);
+
+               /* stream boundaries: top/bottom (Z) */
+               for(int y = 0; y < NODES_Y; y++)
+                       for(int x = 0; x < NODES_X; x++)
+               {
+                       stream(*block, x, y, 0        );
+                       stream(*block, x, y, NODES_Z-1);
+               }
 
-               /* stream the boundaries: top, bottom */
-               for(int x = 0; x < BLOCKS_X; x++) {
-                       d2q9_stream(*block, x, 0         );
-                       d2q9_stream(*block, x, BLOCKS_Y-1);
+               /* stream boundaries: front/back (Y) */
+               for(int z = 1; z < NODES_Z-1; z++)
+                       for(int x = 0; x < NODES_X; x++)
+               {
+                       stream(*block, x, 0,         z);
+                       stream(*block, x, NODES_Y-1, z);
                }
 
-               /* stream the boundaries: left, right */
-               for(int y = 1; y < BLOCKS_Y-1; y++) {
-                       d2q9_stream(*block, 0,          y);
-                       d2q9_stream(*block, BLOCKS_X-1, y);
+               /* stream boundaries: left/right (X) */
+               for(int z = 1; z < NODES_Z-1; z++)
+                       for(int y = 1; y < NODES_Y-1; y++)
+               {
+                       stream(*block, 0,         y, z);
+                       stream(*block, NODES_X-1, y, z);
                }
 #endif