add Jacobi3d example without ft and with correct boundary constrain so that parallel...
authorYanhua Sun <sun51@illinois.edu>
Fri, 13 Sep 2013 16:01:13 +0000 (11:01 -0500)
committerYanhua Sun <sun51@illinois.edu>
Fri, 13 Sep 2013 16:01:13 +0000 (11:01 -0500)
examples/charm++/jacobi3d-sdag-constrain/Makefile [new file with mode: 0644]
examples/charm++/jacobi3d-sdag-constrain/jacobi3d.C [new file with mode: 0644]
examples/charm++/jacobi3d-sdag-constrain/jacobi3d.ci [new file with mode: 0644]
src/ck-tune/trace-autoPerf.C

diff --git a/examples/charm++/jacobi3d-sdag-constrain/Makefile b/examples/charm++/jacobi3d-sdag-constrain/Makefile
new file mode 100644 (file)
index 0000000..34373f5
--- /dev/null
@@ -0,0 +1,30 @@
+OPTS   = -O3
+CHARMC = ../../../bin/charmc $(OPTS)
+
+OBJS = jacobi3d.o
+
+all: jacobi3d
+
+jacobi3d: $(OBJS)
+       $(CHARMC) -language charm++ -o jacobi3d $(OBJS)
+
+projections: $(OBJS)
+       $(CHARMC) -language charm++ -tracemode projections -lz -o jacobi3d.prj $(OBJS)
+
+summary: $(OBJS)
+       $(CHARMC) -language charm++ -tracemode summary -lz -o jacobi3d.sum $(OBJS)
+
+autoPerf: $(OBJS)
+       $(CHARMC) -language charm++ -tracemode autoPerf -lz -o jacobi3d.ap $(OBJS)
+
+jacobi3d.decl.h: jacobi3d.ci
+       $(CHARMC)  jacobi3d.ci
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o jacobi3d jacobi3d.prj charmrun *~
+
+jacobi3d.o: jacobi3d.C jacobi3d.decl.h
+       $(CHARMC) -c jacobi3d.C
+
+test: jacobi3d
+       ./charmrun +p2 ./jacobi3d 20 10 +balancer Refine $(TESTOPTS)
diff --git a/examples/charm++/jacobi3d-sdag-constrain/jacobi3d.C b/examples/charm++/jacobi3d-sdag-constrain/jacobi3d.C
new file mode 100644 (file)
index 0000000..935832e
--- /dev/null
@@ -0,0 +1,344 @@
+#define MAX_ITER       200     
+#define LEFT                   1
+#define RIGHT                  2
+#define TOP                    3
+#define BOTTOM                 4
+#define FRONT                  5
+#define BACK                   6
+#define DIVIDEBY7              0.14285714285714285714
+#define DELTA                  0.01
+
+#include "jacobi3d.decl.h"
+
+/*readonly*/ CProxy_Main mainProxy;
+/*readonly*/ int arrayDimX;
+/*readonly*/ int arrayDimY;
+/*readonly*/ int arrayDimZ;
+/*readonly*/ int blockDimX;
+/*readonly*/ int blockDimY;
+/*readonly*/ int blockDimZ;
+
+// specify the number of worker chares in each dimension
+/*readonly*/ int num_chare_x;
+/*readonly*/ int num_chare_y;
+/*readonly*/ int num_chare_z;
+/*readonly*/ int max_iter;
+
+#define wrapX(a)       (((a)+num_chare_x)%num_chare_x)
+#define wrapY(a)       (((a)+num_chare_y)%num_chare_y)
+#define wrapZ(a)       (((a)+num_chare_z)%num_chare_z)
+
+#define index(a,b,c)   ((a)+(b)*(blockDimX+2)+(c)*(blockDimX+2)*(blockDimY+2))
+
+double startTime;
+double endTime;
+
+/** \class Main
+ *
+ */
+class Main : public CBase_Main {
+public:
+  CProxy_Jacobi array;
+  int iterations;
+
+  Main(CkArgMsg* m) {
+    if ( (m->argc != 3) && (m->argc != 7) && (m->argc != 4) && (m->argc != 8)) {
+      CkPrintf("%s [array_size] [block_size]\n", m->argv[0]);
+      CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]);
+      CkAbort("Abort");
+    }
+
+    // set iteration counter to zero
+    iterations = 0;
+    max_iter = MAX_ITER;
+    // store the main proxy
+    mainProxy = thisProxy;
+       
+    if(m->argc <5 ) {
+      arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
+      blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]); 
+      if(m->argc == 4)
+          max_iter =  atoi(m->argv[3]);
+    }
+    else if (m->argc <9) {
+      arrayDimX = atoi(m->argv[1]);
+      arrayDimY = atoi(m->argv[2]);
+      arrayDimZ = atoi(m->argv[3]);
+      blockDimX = atoi(m->argv[4]); 
+      blockDimY = atoi(m->argv[5]); 
+      blockDimZ = atoi(m->argv[6]);
+      if(m->argc == 8)
+          max_iter =  atoi(m->argv[7]);
+    }
+
+    if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
+      CkAbort("array_size_X % block_size_X != 0!");
+    if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0)
+      CkAbort("array_size_Y % block_size_Y != 0!");
+    if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0)
+      CkAbort("array_size_Z % block_size_Z != 0!");
+
+    num_chare_x = arrayDimX / blockDimX;
+    num_chare_y = arrayDimY / blockDimY;
+    num_chare_z = arrayDimZ / blockDimZ;
+
+    // print info
+    CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n");
+    CkPrintf("Running Jacobi on %d processors with (%d, %d, %d) chares\n", CkNumPes(), num_chare_x, num_chare_y, num_chare_z);
+    CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ);
+    CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ);
+
+    // Create new array of worker chares
+    array = CProxy_Jacobi::ckNew(num_chare_x, num_chare_y, num_chare_z);
+
+    //Start the computation
+    array.run();
+    startTime = CkWallTimer();
+  }
+
+  void commonExit(int iter)
+  {
+    endTime = CkWallTimer();
+    CkPrintf("Time elapsed per iteration: %f total time %f \n", (endTime - startTime) / iter, (endTime-startTime));
+    CkExit();
+
+  }
+  void doneConverge(int done_iters) {
+      CkPrintf(" finished due to convergence %d \n", done_iters); 
+      commonExit(done_iters);
+  }
+  void doneIter(double error)
+  {
+      CkPrintf(" finished due to maximum iterations %d with error  %f \n", max_iter, error); 
+      commonExit(max_iter);
+  }
+};
+
+/** \class Jacobi
+ *
+ */
+
+class Jacobi: public CBase_Jacobi {
+  Jacobi_SDAG_CODE
+
+public:
+  int iterations;
+  int neighbors;
+  int remoteCount;
+  double error;
+  double *temperature;
+  double *new_temperature;
+  bool converged;
+
+  // Constructor, initialize values
+  Jacobi() {
+    converged = false;
+    neighbors = 6;
+    if(thisIndex.x == 0) 
+        neighbors--;
+    if( thisIndex.x== num_chare_x-1)
+        neighbors--;
+    if(thisIndex.y == 0) 
+        neighbors--;
+    if( thisIndex.y== num_chare_y-1)
+        neighbors--;
+    if(thisIndex.z == 0) 
+        neighbors--;
+    if( thisIndex.z== num_chare_z-1)
+          neighbors--;
+
+    // allocate a three dimensional array
+    temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
+    new_temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
+
+    for(int k=0; k<blockDimZ+2; ++k)
+      for(int j=0; j<blockDimY+2; ++j)
+        for(int i=0; i<blockDimX+2; ++i)
+          new_temperature[index(i, j, k)] = temperature[index(i, j, k)] = 0.0;
+    //print();
+    iterations = 0;
+    constrainBC();
+    //print();
+  }
+
+  void pup(PUP::er &p)
+  {
+    CBase_Jacobi::pup(p);
+    __sdag_pup(p);
+    p|iterations;
+    p|neighbors;
+
+    size_t size = (blockDimX+2) * (blockDimY+2) * (blockDimZ+2);
+    if (p.isUnpacking()) {
+      temperature = new double[size];
+      new_temperature = new double[size];
+    }
+    p(temperature, size);
+    p(new_temperature, size);
+  }
+
+  Jacobi(CkMigrateMessage* m) { }
+
+  ~Jacobi() { 
+    delete [] temperature; 
+    delete [] new_temperature; 
+  }
+
+  // Send ghost faces to the six neighbors
+  void begin_iteration(void) {
+    // Copy different faces into messages
+    double *leftGhost =  new double[blockDimY*blockDimZ];
+    double *rightGhost =  new double[blockDimY*blockDimZ];
+    double *topGhost =  new double[blockDimX*blockDimZ];
+    double *bottomGhost =  new double[blockDimX*blockDimZ];
+    double *frontGhost =  new double[blockDimX*blockDimY];
+    double *backGhost =  new double[blockDimX*blockDimY];
+    for(int k=0; k<blockDimZ; ++k)
+      for(int j=0; j<blockDimY; ++j) {
+        leftGhost[k*blockDimY+j] = temperature[index(1, j+1, k+1)];
+        rightGhost[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)];
+      }
+
+    for(int k=0; k<blockDimZ; ++k)
+      for(int i=0; i<blockDimX; ++i) {
+        topGhost[k*blockDimX+i] = temperature[index(i+1, 1, k+1)];
+        bottomGhost[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)];
+      }
+
+    for(int j=0; j<blockDimY; ++j)
+      for(int i=0; i<blockDimX; ++i) {
+        frontGhost[j*blockDimX+i] = temperature[index(i+1, j+1, 1)];
+        backGhost[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)];
+      }
+
+    int x = thisIndex.x, y = thisIndex.y, z = thisIndex.z;
+    if(thisIndex.x>0)
+        thisProxy(wrapX(x-1),y,z).updateGhosts(iterations, RIGHT,  blockDimY, blockDimZ, rightGhost);
+    if(thisIndex.x<num_chare_x-1)
+        thisProxy(wrapX(x+1),y,z).updateGhosts(iterations, LEFT,   blockDimY, blockDimZ, leftGhost);
+    if(thisIndex.y>0)
+        thisProxy(x,wrapY(y-1),z).updateGhosts(iterations, TOP,    blockDimX, blockDimZ, topGhost);
+    if(thisIndex.y<num_chare_y-1)
+        thisProxy(x,wrapY(y+1),z).updateGhosts(iterations, BOTTOM, blockDimX, blockDimZ, bottomGhost);
+    if(thisIndex.z>0)
+        thisProxy(x,y,wrapZ(z-1)).updateGhosts(iterations, BACK,   blockDimX, blockDimY, backGhost);
+    if(thisIndex.z<num_chare_z-1)
+        thisProxy(x,y,wrapZ(z+1)).updateGhosts(iterations, FRONT,  blockDimX, blockDimY, frontGhost);
+
+    delete [] leftGhost;
+    delete [] rightGhost;
+    delete [] bottomGhost;
+    delete [] topGhost;
+    delete [] frontGhost;
+    delete [] backGhost;
+  }
+
+  void updateBoundary(int dir, int height, int width, double* gh) {
+    switch(dir) {
+    case LEFT:
+      for(int k=0; k<width; ++k)
+        for(int j=0; j<height; ++j) {
+          temperature[index(0, j+1, k+1)] = gh[k*height+j];
+        }
+      break;
+    case RIGHT:
+      for(int k=0; k<width; ++k)
+        for(int j=0; j<height; ++j) {
+          temperature[index(blockDimX+1, j+1, k+1)] = gh[k*height+j];
+        }
+      break;
+    case BOTTOM:
+      for(int k=0; k<width; ++k)
+        for(int i=0; i<height; ++i) {
+          temperature[index(i+1, 0, k+1)] = gh[k*height+i];
+        }
+      break;
+    case TOP:
+      for(int k=0; k<width; ++k)
+        for(int i=0; i<height; ++i) {
+          temperature[index(i+1, blockDimY+1, k+1)] = gh[k*height+i];
+        }
+      break;
+    case FRONT:
+      for(int j=0; j<width; ++j)
+        for(int i=0; i<height; ++i) {
+          temperature[index(i+1, j+1, 0)] = gh[j*height+i];
+        }
+      break;
+    case BACK:
+      for(int j=0; j<width; ++j)
+        for(int i=0; i<height; ++i) {
+          temperature[index(i+1, j+1, blockDimZ+1)] = gh[j*height+i];
+        }
+      break;
+    default:
+      CkAbort("ERROR\n");
+    }
+  }
+
+  // Check to see if we have received all neighbor values yet
+  // If all neighbor values have been received, we update our values and proceed
+  double computeKernel() {
+    double error = 0.0, max_error = 0.0;
+    for(int k=1; k<blockDimZ+1; ++k)
+      for(int j=1; j<blockDimY+1; ++j)
+        for(int i=1; i<blockDimX+1; ++i) {
+          // update my value based on the surrounding values
+          new_temperature[index(i, j, k)] = (temperature[index(i-1, j, k)] 
+                                             +  temperature[index(i+1, j, k)]
+                                             +  temperature[index(i, j-1, k)]
+                                             +  temperature[index(i, j+1, k)]
+                                             +  temperature[index(i, j, k-1)]
+                                             +  temperature[index(i, j, k+1)]
+                                             +  temperature[index(i, j, k)] ) * DIVIDEBY7;
+          error = fabs(new_temperature[index(i,j,k)] - temperature[index(i,j,k)]);
+          if (error > max_error) {
+            max_error = error;
+          }
+        } // end for
+    
+    double *tmp;
+    tmp = temperature;
+    temperature = new_temperature;
+    new_temperature = tmp;
+
+    //constrainBC();
+
+    return max_error;
+  }
+
+  void print()
+  {
+
+    for(int k=1; k<blockDimZ+2; ++k)
+      for(int j=1; j<blockDimY+2; ++j)
+        for(int i=1; i<blockDimX+2; ++i)
+          CkPrintf(" -%d:%d:%d %f ", k,j,i, temperature[index(k, j, i)]);
+    CkPrintf("--------------------------------\n");
+  }
+  // Enforce some boundary conditions
+  void constrainBC() {
+    // // Heat right, left
+    if(thisIndex.x == 0 )
+        for(int j=0; j<blockDimY+2; ++j)
+            for(int k=0; k<blockDimZ+2; ++k)
+            {   
+                new_temperature[index(0, j, k)] = temperature[index(0, j, k)] = 255.0;
+            }
+    if(thisIndex.y == 0 )
+        for(int j=0; j<blockDimX+2; ++j)
+            for(int k=0; k<blockDimZ+2; ++k)
+            {   
+                new_temperature[index(j,0, k)]  = temperature[index(j,0, k)] = 255.0;
+            }
+    if(thisIndex.z == 0 )
+        for(int j=0; j<blockDimX+2; ++j)
+            for(int k=0; k<blockDimY+2; ++k)
+            {   
+                new_temperature[index(j, k, 0)] = temperature[index(j, k, 0)] = 255.0;
+            }
+
+  }
+};
+
+#include "jacobi3d.def.h"
diff --git a/examples/charm++/jacobi3d-sdag-constrain/jacobi3d.ci b/examples/charm++/jacobi3d-sdag-constrain/jacobi3d.ci
new file mode 100644 (file)
index 0000000..879379d
--- /dev/null
@@ -0,0 +1,48 @@
+mainmodule jacobi3d {
+  readonly CProxy_Main mainProxy;
+  readonly int arrayDimX;
+  readonly int arrayDimY;
+  readonly int arrayDimZ;
+  readonly int blockDimX;
+  readonly int blockDimY;
+  readonly int blockDimZ;
+  readonly int num_chare_x;
+  readonly int num_chare_y;
+  readonly int num_chare_z;
+  readonly int max_iter;
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry [reductiontarget] void doneConverge(int);
+    entry [reductiontarget] void doneIter(double);
+  };
+
+  array [3D] Jacobi {
+    entry Jacobi(void);
+    entry void updateGhosts(int ref, int dir, int w, int h, double gh[w*h]);
+    entry [reductiontarget] void checkConverged(double e);
+    entry void run() {
+      while (iterations < max_iter && !converged) {
+        atomic { begin_iteration(); }
+        for (remoteCount = 0; remoteCount < neighbors; remoteCount++) {
+          when updateGhosts[iterations](int ref, int dir, int w, int h, double buf[w*h]) atomic {
+            updateBoundary(dir, w, h, buf);
+          }
+        }
+        atomic {
+          error = computeKernel();
+          //CkPrintf(" error   %d %d %d  is %f\n", thisIndex.x, thisIndex.y, thisIndex.z, error);
+          iterations++;
+          contribute(sizeof(double), &error, CkReduction::max_double, CkCallback(CkReductionTarget(Jacobi, checkConverged), thisProxy));
+        }
+        when checkConverged(double maxerror)
+        {
+            if (maxerror<DELTA) atomic{ converged = true; contribute(sizeof(int), &iterations, CkReduction::nop, CkCallback((CkReductionTarget(Main, doneConverge)), mainProxy)); }
+        }
+      }
+      if(iterations >= max_iter)
+          atomic { contribute(sizeof(double), &error, CkReduction::max_double, CkCallback((CkReductionTarget(Main, doneIter)), mainProxy));}
+
+    };
+  };
+};
index 90da9fcd027684af9faf9f47ad7f2aecce8f2106..1def57131b029497c92b656509c3c4cca8d4c796 100644 (file)
@@ -164,9 +164,9 @@ void TraceAutoPerfBOC::globalPerfAnalyze(CkReductionMsg *msg )
     double overheadPercentage = data->overheadTotalTime/totalTime;
     double utilPercentage = data->utilTotalTime/totalTime;
     //DEBUG_PRINT ( 
     double overheadPercentage = data->overheadTotalTime/totalTime;
     double utilPercentage = data->utilTotalTime/totalTime;
     //DEBUG_PRINT ( 
-    CkPrintf("Utilization(%):  \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f)\n", data->utilMin*100, data->utilMax*100, utilPercentage*100 );
-    CkPrintf("Idle(%):         \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) \n", data->idleMin*100,  data->idleMax*100, idlePercentage*100);
-    CkPrintf("Overhead(%):     \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) \n", data->overheadMin*100, data->overheadMax*100, overheadPercentage*100);
+    CkPrintf("Utilization(%):  \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) time:%f\n", data->utilMin*100, data->utilMax*100, utilPercentage*100, data->utilTotalTime);
+    CkPrintf("Idle(%):         \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) time:%f \n", data->idleMin*100,  data->idleMax*100, idlePercentage*100, data->idleTotalTime);
+    CkPrintf("Overhead(%):     \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) time:%f \n", data->overheadMin*100, data->overheadMax*100, overheadPercentage*100, data->overheadTotalTime);
     CkPrintf("Grainsize(ms):\t(avg:max)\t: (%.3f:    %.3f) \n", data->utilTotalTime/data->numInvocations*1000, data->grainsizeMax*1000);
     CkPrintf("Invocations:  \t%lld\n", data->numInvocations);
     //)
     CkPrintf("Grainsize(ms):\t(avg:max)\t: (%.3f:    %.3f) \n", data->utilTotalTime/data->numInvocations*1000, data->grainsizeMax*1000);
     CkPrintf("Invocations:  \t%lld\n", data->numInvocations);
     //)