Optimize and cleanup examples/charm++/load_balancing/stencil3d 18/4818/5
authorNitin Bhat <nbhat4@illinois.edu>
Wed, 21 Nov 2018 17:49:12 +0000 (11:49 -0600)
committerNitin Bhat <nbhat4@illinois.edu>
Mon, 26 Nov 2018 22:07:20 +0000 (16:07 -0600)
Previously, the ghost arrays were allocated and deleted for every
iteration. With this commit, they are allocated once in the
constructor and reused for every iteration. This significantly
improves the execution time of the application.

Other changes include removing stray tabs and indenting the code.

Change-Id: I18d7b7fc8d2f0c97e13c8b2e18534e0d1f089802

examples/charm++/load_balancing/stencil3d/stencil3d.C
examples/charm++/load_balancing/stencil3d/stencil3d.ci

index 55be00aeada4952c466a2e144060dac90f00218c..a43fb20599018f0f19c5934b17fe7ee0767a833b 100644 (file)
@@ -8,18 +8,18 @@
  *
  *
  *
- *           *****************
- *        *               *  *
- *   ^ *****************     *
- *   | *               *     *
- *   | *               *     *
- *   | *               *     *
- *   Y *               *     *
- *   | *               *     *
- *   | *               *     *
- *   | *               *  * 
- *   ~ *****************    Z
- *     <------ X ------> 
+ *        *****************
+ *        *       *       *
+ *   ^    *****************     *
+ *   |    *       *       *
+ *   |    *       *       *
+ *   |    *       *       *
+ *   Y    *       *       *
+ *   |    *       *       *
+ *   |    *       *       *
+ *   |    *       *       *
+ *   ~    *****************    Z
+ *        <------ X ------>
  *
  *   X: left, right --> wrap_x
  *   Y: top, bottom --> wrap_y
@@ -49,24 +49,24 @@ int myrand(int numpes) {
   return((unsigned)(next/65536) % numpes);
 }
 
-// We want to wrap entries around, and because mod operator % 
+// We want to wrap entries around, and because mod operator %
 // sometimes misbehaves on negative values. -1 maps to the highest value.
-#define wrap_x(a)      (((a)+num_chare_x)%num_chare_x)
-#define wrap_y(a)      (((a)+num_chare_y)%num_chare_y)
-#define wrap_z(a)      (((a)+num_chare_z)%num_chare_z)
-
-#define index(a,b,c)   ((a)+(b)*(blockDimX+2)+(c)*(blockDimX+2)*(blockDimY+2))
-
-#define MAX_ITER       100
-#define LBPERIOD_ITER  5     // LB is called every LBPERIOD_ITER number of program iterations
-#define CHANGELOAD     30
-#define LEFT           1
-#define RIGHT          2
-#define TOP            3
-#define BOTTOM         4
-#define FRONT          5
-#define BACK           6
-#define DIVIDEBY7              0.14285714285714285714
+#define wrap_x(a)    (((a)+num_chare_x)%num_chare_x)
+#define wrap_y(a)    (((a)+num_chare_y)%num_chare_y)
+#define wrap_z(a)    (((a)+num_chare_z)%num_chare_z)
+
+#define index(a,b,c)    ((a)+(b)*(blockDimX+2)+(c)*(blockDimX+2)*(blockDimY+2))
+
+#define MAX_ITER         100
+#define LBPERIOD_ITER    5     // LB is called every LBPERIOD_ITER number of program iterations
+#define CHANGELOAD       30
+#define LEFT             1
+#define RIGHT            2
+#define TOP              3
+#define BOTTOM           4
+#define FRONT            5
+#define BACK             6
+#define DIVIDEBY7        0.14285714285714285714
 
 /** \class Main
  *
@@ -84,18 +84,18 @@ class Main : public CBase_Main {
 
       // store the main proxy
       mainProxy = thisProxy;
-       
+
       if(m->argc == 3) {
-       arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
-        blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]); 
+        arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
+        blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]);
       }
       else if (m->argc == 7) {
         arrayDimX = atoi(m->argv[1]);
-       arrayDimY = atoi(m->argv[2]);
-       arrayDimZ = atoi(m->argv[3]);
-        blockDimX = atoi(m->argv[4]); 
-       blockDimY = atoi(m->argv[5]); 
-       blockDimZ = atoi(m->argv[6]);
+        arrayDimY = atoi(m->argv[2]);
+        arrayDimZ = atoi(m->argv[3]);
+        blockDimX = atoi(m->argv[4]);
+        blockDimY = atoi(m->argv[5]);
+        blockDimZ = atoi(m->argv[6]);
       }
 
       if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
@@ -144,6 +144,14 @@ class Stencil: public CBase_Stencil {
     double *temperature;
     double *new_temperature;
 
+    // ghost arrays
+    double *leftGhost;
+    double *rightGhost;
+    double *topGhost;
+    double *bottomGhost;
+    double *frontGhost;
+    double *backGhost;
+
     // Constructor, initialize values
     Stencil() {
       usesAtSync = true;
@@ -154,9 +162,9 @@ class Stencil: public CBase_Stencil {
       new_temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
 
       for(k=0; k<blockDimZ+2; ++k)
-       for(j=0; j<blockDimY+2; ++j)
-         for(i=0; i<blockDimX+2; ++i)
-           temperature[index(i, j, k)] = 0.0;
+        for(j=0; j<blockDimY+2; ++j)
+          for(i=0; i<blockDimX+2; ++i)
+            temperature[index(i, j, k)] = 0.0;
 
       iterations = 0;
       imsg = 0;
@@ -165,6 +173,14 @@ class Stencil: public CBase_Stencil {
       if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
         startTime = CkWallTimer();
 
+      // Allocate ghost arrays
+      leftGhost   = new double[blockDimY*blockDimZ];
+      rightGhost  = new double[blockDimY*blockDimZ];
+      topGhost    = new double[blockDimX*blockDimZ];
+      bottomGhost = new double[blockDimX*blockDimZ];
+      frontGhost  = new double[blockDimX*blockDimY];
+      backGhost   = new double[blockDimX*blockDimY];
+
 #if CMK_LBDB_ON
       // set period arbitrarily small so that LB occurs when AtSync is called
       // this is in case the default LBPERIOD is larger than the time to complete LBPERIOD_ITER
@@ -181,8 +197,14 @@ class Stencil: public CBase_Stencil {
 
       size_t size = (blockDimX+2) * (blockDimY+2) * (blockDimZ+2);
       if (p.isUnpacking()) {
-       temperature = new double[size];
-       new_temperature = new double[size];
+        temperature     = new double[size];
+        new_temperature = new double[size];
+        leftGhost       = new double[blockDimY*blockDimZ];
+        rightGhost      = new double[blockDimY*blockDimZ];
+        topGhost        = new double[blockDimX*blockDimZ];
+        bottomGhost     = new double[blockDimX*blockDimZ];
+        frontGhost      = new double[blockDimX*blockDimY];
+        backGhost       = new double[blockDimX*blockDimY];
       }
       p(temperature, size);
       p(new_temperature, size);
@@ -190,107 +212,98 @@ class Stencil: public CBase_Stencil {
 
     Stencil(CkMigrateMessage* m) { }
 
-    ~Stencil() { 
-      delete [] temperature; 
-      delete [] new_temperature; 
+    ~Stencil() {
+      delete [] temperature;
+      delete [] new_temperature;
+      delete [] leftGhost;
+      delete [] rightGhost;
+      delete [] topGhost;
+      delete [] bottomGhost;
+      delete [] frontGhost;
+      delete [] backGhost;
     }
 
     // Send ghost faces to the six neighbors
     void begin_iteration(void) {
       iterations++;
 
-      // Copy different faces into messages
-      double *leftGhost =  new double[blockDimY*blockDimZ];
-      double *rightGhost =  new double[blockDimY*blockDimZ];
-      double *topGhost =  new double[blockDimX*blockDimZ];
-      double *bottomGhost =  new double[blockDimX*blockDimZ];
-      double *frontGhost =  new double[blockDimX*blockDimY];
-      double *backGhost =  new double[blockDimX*blockDimY];
-
       for(int k=0; k<blockDimZ; ++k)
-       for(int j=0; j<blockDimY; ++j) {
-         leftGhost[k*blockDimY+j] = temperature[index(1, j+1, k+1)];
-         rightGhost[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)];
-       }
+        for(int j=0; j<blockDimY; ++j) {
+          leftGhost[k*blockDimY+j] = temperature[index(1, j+1, k+1)];
+          rightGhost[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)];
+        }
 
       for(int k=0; k<blockDimZ; ++k)
-       for(int i=0; i<blockDimX; ++i) {
-         topGhost[k*blockDimX+i] = temperature[index(i+1, 1, k+1)];
-         bottomGhost[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)];
-       }
+        for(int i=0; i<blockDimX; ++i) {
+          topGhost[k*blockDimX+i] = temperature[index(i+1, 1, k+1)];
+          bottomGhost[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)];
+        }
 
       for(int j=0; j<blockDimY; ++j)
-       for(int i=0; i<blockDimX; ++i) {
-         frontGhost[j*blockDimX+i] = temperature[index(i+1, j+1, 1)];
-         backGhost[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)];
-       }
+        for(int i=0; i<blockDimX; ++i) {
+          frontGhost[j*blockDimX+i] = temperature[index(i+1, j+1, 1)];
+          backGhost[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)];
+        }
 
       // Send my left face
       thisProxy(wrap_x(thisIndex.x-1), thisIndex.y, thisIndex.z)
-         .receiveGhosts(iterations, RIGHT, blockDimY, blockDimZ, leftGhost);
+        .receiveGhosts(iterations, RIGHT, blockDimY, blockDimZ, leftGhost);
       // Send my right face
       thisProxy(wrap_x(thisIndex.x+1), thisIndex.y, thisIndex.z)
-         .receiveGhosts(iterations, LEFT, blockDimY, blockDimZ, rightGhost);
+        .receiveGhosts(iterations, LEFT, blockDimY, blockDimZ, rightGhost);
       // Send my bottom face
       thisProxy(thisIndex.x, wrap_y(thisIndex.y-1), thisIndex.z)
-         .receiveGhosts(iterations, TOP, blockDimX, blockDimZ, bottomGhost);
+        .receiveGhosts(iterations, TOP, blockDimX, blockDimZ, bottomGhost);
       // Send my top face
       thisProxy(thisIndex.x, wrap_y(thisIndex.y+1), thisIndex.z)
-         .receiveGhosts(iterations, BOTTOM, blockDimX, blockDimZ, topGhost);
+        .receiveGhosts(iterations, BOTTOM, blockDimX, blockDimZ, topGhost);
       // Send my front face
       thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z-1))
-         .receiveGhosts(iterations, BACK, blockDimX, blockDimY, frontGhost);
+        .receiveGhosts(iterations, BACK, blockDimX, blockDimY, frontGhost);
       // Send my back face
       thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z+1))
-         .receiveGhosts(iterations, FRONT, blockDimX, blockDimY, backGhost);
-
-      delete[] leftGhost;
-      delete[] rightGhost;
-      delete[] bottomGhost;
-      delete[] topGhost;
-      delete[] frontGhost;
-      delete[] backGhost;
+        .receiveGhosts(iterations, FRONT, blockDimX, blockDimY, backGhost);
     }
 
     void processGhosts(int dir, int height, int width, double gh[]) {
       switch(dir) {
-       case LEFT:
-         for(int k=0; k<width; ++k)
-           for(int j=0; j<height; ++j) {
-             temperature[index(0, j+1, k+1)] = gh[k*height+j];
-           }
-         break;
-       case RIGHT:
-         for(int k=0; k<width; ++k)
-           for(int j=0; j<height; ++j) {
-             temperature[index(blockDimX+1, j+1, k+1)] = gh[k*height+j];
-           }
-         break;
-       case BOTTOM:
-         for(int k=0; k<width; ++k)
-           for(int i=0; i<height; ++i) {
-             temperature[index(i+1, 0, k+1)] = gh[k*height+i];
-           }
-         break;
-       case TOP:
-         for(int k=0; k<width; ++k)
-           for(int i=0; i<height; ++i) {
-             temperature[index(i+1, blockDimY+1, k+1)] = gh[k*height+i];
-           }
-         break;
-       case FRONT:
-         for(int j=0; j<width; ++j)
-           for(int i=0; i<height; ++i) {
-             temperature[index(i+1, j+1, 0)] = gh[j*height+i];
-           }
-         break;
-       case BACK:
-         for(int j=0; j<width; ++j)
-           for(int i=0; i<height; ++i) {
-             temperature[index(i+1, j+1, blockDimZ+1)] = gh[j*height+i];
-           }
-         break;
-       default:
+        case LEFT:
+          for(int k=0; k<width; ++k)
+            for(int j=0; j<height; ++j) {
+              temperature[index(0, j+1, k+1)] = gh[k*height+j];
+            }
+          break;
+        case RIGHT:
+          for(int k=0; k<width; ++k)
+            for(int j=0; j<height; ++j) {
+              temperature[index(blockDimX+1, j+1, k+1)] = gh[k*height+j];
+            }
+          break;
+        case BOTTOM:
+          for(int k=0; k<width; ++k)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, 0, k+1)] = gh[k*height+i];
+            }
+          break;
+        case TOP:
+          for(int k=0; k<width; ++k)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, blockDimY+1, k+1)] = gh[k*height+i];
+            }
+          break;
+        case FRONT:
+          for(int j=0; j<width; ++j)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, j+1, 0)] = gh[j*height+i];
+            }
+          break;
+        case BACK:
+          for(int j=0; j<width; ++j)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, j+1, blockDimZ+1)] = gh[j*height+i];
+            }
+          break;
+        default:
           CkAbort("ERROR\n");
       }
     }
@@ -301,7 +314,6 @@ class Stencil: public CBase_Stencil {
 
       // calculate error
       // not being done right now since we are doing a fixed no. of iterations
-
       double *tmp;
       tmp = temperature;
       temperature = new_temperature;
@@ -310,21 +322,21 @@ class Stencil: public CBase_Stencil {
       constrainBC();
 
       if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0) {
-       double endTime = CkWallTimer();
-       CkPrintf("[%d] Time per iteration: %f %f\n", iterations, (endTime - startTime), endTime);
+        double endTime = CkWallTimer();
+        CkPrintf("[%d] Time per iteration: %f %f\n", iterations, (endTime - startTime), endTime);
       }
 
       if(iterations == MAX_ITER)
-       contribute(CkCallback(CkReductionTarget(Main, report), mainProxy));
+        contribute(CkCallback(CkReductionTarget(Main, report), mainProxy));
       else {
-       if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
-         startTime = CkWallTimer();
-       if(iterations % LBPERIOD_ITER == 0)
-         {
-           AtSync();
-         }
-       else
-         contribute(CkCallback(CkReductionTarget(Stencil, doStep), thisProxy));
+        if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
+          startTime = CkWallTimer();
+        if(iterations % LBPERIOD_ITER == 0)
+        {
+          AtSync();
+        }
+        else
+          contribute(CkCallback(CkReductionTarget(Stencil, doStep), thisProxy));
       }
     }
 
@@ -337,26 +349,26 @@ class Stencil: public CBase_Stencil {
       double work = 100.0;
 
       if(index >= numChares*0.2 && index <=numChares*0.8) {
-       work = work * ((double)index/(double)numChares) + (double)itno;
-       // CkPrintf("[%d][%d][%d] %d %d %f\n", thisIndex.x, thisIndex.y, thisIndex.z, index, itno, work);
+        work = work * ((double)index/(double)numChares) + (double)itno;
+        // CkPrintf("[%d][%d][%d] %d %d %f\n", thisIndex.x, thisIndex.y, thisIndex.z, index, itno, work);
       } else
-       work = 10.0;
+        work = 10.0;
 
 #pragma unroll
       for(int w=0; w<work; w++) {
-       for(int k=1; k<blockDimZ+1; ++k)
-         for(int j=1; j<blockDimY+1; ++j)
-           for(int i=1; i<blockDimX+1; ++i) {
-             // update my value based on the surrounding values
-             new_temperature[index(i, j, k)] = (temperature[index(i-1, j, k)]
-                                             +  temperature[index(i+1, j, k)]
-                                             +  temperature[index(i, j-1, k)]
-                                             +  temperature[index(i, j+1, k)]
-                                             +  temperature[index(i, j, k-1)]
-                                             +  temperature[index(i, j, k+1)]
-                                             +  temperature[index(i, j, k)] )
-                                             *  DIVIDEBY7;
-           } // end for
+        for(int k=1; k<blockDimZ+1; ++k)
+          for(int j=1; j<blockDimY+1; ++j)
+            for(int i=1; i<blockDimX+1; ++i) {
+              // update my value based on the surrounding values
+              new_temperature[index(i, j, k)] = (temperature[index(i-1, j, k)]
+                  +  temperature[index(i+1, j, k)]
+                  +  temperature[index(i, j-1, k)]
+                  +  temperature[index(i, j+1, k)]
+                  +  temperature[index(i, j, k-1)]
+                  +  temperature[index(i, j, k+1)]
+                  +  temperature[index(i, j, k)] )
+                *  DIVIDEBY7;
+            } // end for
       }
     }
 
@@ -364,14 +376,14 @@ class Stencil: public CBase_Stencil {
     void constrainBC() {
       // Heat left, top and front faces of each chare's block
       for(int k=1; k<blockDimZ+1; ++k)
-       for(int i=1; i<blockDimX+1; ++i)
-         temperature[index(i, 1, k)] = 255.0;
+        for(int i=1; i<blockDimX+1; ++i)
+          temperature[index(i, 1, k)] = 255.0;
       for(int k=1; k<blockDimZ+1; ++k)
-       for(int j=1; j<blockDimY+1; ++j)
-         temperature[index(1, j, k)] = 255.0;
+        for(int j=1; j<blockDimY+1; ++j)
+          temperature[index(1, j, k)] = 255.0;
       for(int j=1; j<blockDimY+1; ++j)
-       for(int i=1; i<blockDimX+1; ++i)
-         temperature[index(i, j, 1)] = 255.0;
+        for(int i=1; i<blockDimX+1; ++i)
+          temperature[index(i, j, 1)] = 255.0;
     }
 
     void ResumeFromSync() {
index 874e96c05ac3f4e0e28e1baaa7db955384e1c84c..1670ca6147379d0a4118af336f99546169998e5b 100644 (file)
@@ -21,24 +21,23 @@ mainmodule stencil3d {
     entry Stencil(void);
     entry void begin_iteration(void);
     entry void receiveGhosts(int iter, int dir, int height, int width,
-                             double ghosts[height*width]);
+        double ghosts[height*width]);
 
     entry [reductiontarget] void doStep() {
       serial "begin_iteration" {
-       begin_iteration();
+        begin_iteration();
       }
       for(imsg = 0; imsg < 6; imsg++) {
-       // "iterations" keeps track of messages across steps
-       when receiveGhosts[iterations] (int iter, int dir, int height, 
-                                     int width, double ghosts[height*width])
-         serial "process_ghosts" {
+        // "iterations" keeps track of messages across steps
+        when receiveGhosts[iterations] (int iter, int dir, int height,
+            int width, double ghosts[height*width])
+          serial "process_ghosts" {
             processGhosts(dir, height, width, ghosts);
           }
       }
       serial "do_work" {
-       check_and_compute();
+        check_and_compute();
       }
     };
   };
-
 };