Zerocopy EM API: Added examples for each registration mode 29/4529/23
authorNitin Bhat <nbhat4@illinois.edu>
Thu, 30 Aug 2018 17:45:06 +0000 (13:45 -0400)
committerNitin Bhat <nbhat4@illinois.edu>
Tue, 11 Dec 2018 21:17:27 +0000 (16:17 -0500)
Change-Id: I6e9c33e647a27c1721f84ae4c13b8fdb7085c9e0

35 files changed:
examples/charm++/zerocopy/entry_method_api/Makefile
examples/charm++/zerocopy/entry_method_api/misc/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/misc/simpleVec/Makefile [moved from examples/charm++/zerocopy/entry_method_api/simpleVec/Makefile with 82% similarity]
examples/charm++/zerocopy/entry_method_api/misc/simpleVec/simpleZCVec.C [moved from examples/charm++/zerocopy/entry_method_api/simpleVec/simpleZCVec.C with 100% similarity]
examples/charm++/zerocopy/entry_method_api/misc/simpleVec/simpleZCVec.ci [moved from examples/charm++/zerocopy/entry_method_api/simpleVec/simpleZCVec.ci with 100% similarity]
examples/charm++/zerocopy/entry_method_api/prereg/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/prereg/pingpong/Makefile [moved from examples/charm++/zerocopy/entry_method_api/pingpong/Makefile with 83% similarity]
examples/charm++/zerocopy/entry_method_api/prereg/pingpong/pingpong.C [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/prereg/pingpong/pingpong.ci [moved from examples/charm++/zerocopy/entry_method_api/pingpong/pingpong.ci with 100% similarity]
examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/Makefile [moved from examples/charm++/zerocopy/entry_method_api/simpleZeroCopy/Makefile with 86% similarity]
examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/simpleZeroCopy.C [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/simpleZeroCopy.ci [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/prereg/stencil3d/Makefile [moved from examples/charm++/zerocopy/entry_method_api/stencil3d/Makefile with 90% similarity]
examples/charm++/zerocopy/entry_method_api/prereg/stencil3d/stencil3d.C [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/prereg/stencil3d/stencil3d.ci [moved from examples/charm++/zerocopy/entry_method_api/stencil3d/stencil3d.ci with 100% similarity]
examples/charm++/zerocopy/entry_method_api/reg/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/reg/pingpong/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/reg/pingpong/pingpong.C [moved from examples/charm++/zerocopy/entry_method_api/pingpong/pingpong.C with 100% similarity]
examples/charm++/zerocopy/entry_method_api/reg/pingpong/pingpong.ci [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/reg/simpleZeroCopy/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/reg/simpleZeroCopy/simpleZeroCopy.C [moved from examples/charm++/zerocopy/entry_method_api/simpleZeroCopy/simpleZeroCopy.C with 95% similarity]
examples/charm++/zerocopy/entry_method_api/reg/simpleZeroCopy/simpleZeroCopy.ci [moved from examples/charm++/zerocopy/entry_method_api/simpleZeroCopy/simpleZeroCopy.ci with 100% similarity]
examples/charm++/zerocopy/entry_method_api/reg/stencil3d/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/reg/stencil3d/stencil3d.C [moved from examples/charm++/zerocopy/entry_method_api/stencil3d/stencil3d.C with 100% similarity]
examples/charm++/zerocopy/entry_method_api/reg/stencil3d/stencil3d.ci [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/pingpong/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/pingpong/pingpong.C [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/pingpong/pingpong.ci [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/simpleZeroCopy.C [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/simpleZeroCopy.ci [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/Makefile [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/stencil3d.C [new file with mode: 0644]
examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/stencil3d.ci [new file with mode: 0644]

index ab4bb4f523a8bf8c725aa797c67a53b0b94d0dc8..4cc65d57535128690450183b0f338401a1a58c4e 100644 (file)
@@ -1,8 +1,8 @@
 DIRS = \
-  pingpong \
-  simpleZeroCopy \
-  stencil3d \
-  simpleVec \
+  unreg \
+  prereg \
+  reg \
+  misc \
 
 TESTDIRS = $(DIRS)
 
diff --git a/examples/charm++/zerocopy/entry_method_api/misc/Makefile b/examples/charm++/zerocopy/entry_method_api/misc/Makefile
new file mode 100644 (file)
index 0000000..6f3cc5c
--- /dev/null
@@ -0,0 +1,21 @@
+DIRS = \
+  simpleVec \
+
+TESTDIRS = $(DIRS)
+
+all: $(foreach i,$(DIRS),build-$i)
+
+test: $(foreach i,$(TESTDIRS),test-$i)
+
+clean: $(foreach i,$(DIRS),clean-$i)
+       rm -f TAGS #*#
+       rm -f core *~
+
+$(foreach i,$(DIRS),build-$i):
+       $(MAKE) -C $(subst build-,,$@) all OPTS='$(OPTS)'
+
+$(foreach i,$(DIRS),test-$i):
+       $(MAKE) -C $(subst test-,,$@) test OPTS='$(OPTS)' TESTOPTS='$(TESTOPTS)'
+
+$(foreach i,$(DIRS),clean-$i):
+       $(MAKE) -C $(subst clean-,,$@) clean OPTS='$(OPTS)'
similarity index 82%
rename from examples/charm++/zerocopy/entry_method_api/simpleVec/Makefile
rename to examples/charm++/zerocopy/entry_method_api/misc/simpleVec/Makefile
index 317aad656ebd3c902542fbf613b07fa20b8f0d71..51442add697577c740b52db6b6cebd59dfe56365 100644 (file)
@@ -1,5 +1,5 @@
--include ../../../../common.mk
-CHARMC=../../../../../bin/charmc $(OPTS)
+-include ../../../../../common.mk
+CHARMC= ../../../../../../bin/charmc $(OPTS)
 
 all: simpleZCVec
 
diff --git a/examples/charm++/zerocopy/entry_method_api/prereg/Makefile b/examples/charm++/zerocopy/entry_method_api/prereg/Makefile
new file mode 100644 (file)
index 0000000..278c36d
--- /dev/null
@@ -0,0 +1,23 @@
+DIRS = \
+  pingpong \
+  simpleZeroCopy \
+  stencil3d \
+
+TESTDIRS = $(DIRS)
+
+all: $(foreach i,$(DIRS),build-$i)
+
+test: $(foreach i,$(TESTDIRS),test-$i)
+
+clean: $(foreach i,$(DIRS),clean-$i)
+       rm -f TAGS #*#
+       rm -f core *~
+
+$(foreach i,$(DIRS),build-$i):
+       $(MAKE) -C $(subst build-,,$@) all OPTS='$(OPTS)'
+
+$(foreach i,$(DIRS),test-$i):
+       $(MAKE) -C $(subst test-,,$@) test OPTS='$(OPTS)' TESTOPTS='$(TESTOPTS)'
+
+$(foreach i,$(DIRS),clean-$i):
+       $(MAKE) -C $(subst clean-,,$@) clean OPTS='$(OPTS)'
similarity index 83%
rename from examples/charm++/zerocopy/entry_method_api/pingpong/Makefile
rename to examples/charm++/zerocopy/entry_method_api/prereg/pingpong/Makefile
index 23e468ae4535aa00da6a92b26e02e7e43a11bec9..99ad14f49d3beb89a58adde6c14cf2af40c23027 100644 (file)
@@ -1,5 +1,5 @@
--include ../../../../common.mk
-CHARMC=../../../../../bin/charmc $(OPTS)
+-include ../../../../../common.mk
+CHARMC=../../../../../../bin/charmc $(OPTS)
 
 all:   pgm
 
diff --git a/examples/charm++/zerocopy/entry_method_api/prereg/pingpong/pingpong.C b/examples/charm++/zerocopy/entry_method_api/prereg/pingpong/pingpong.C
new file mode 100644 (file)
index 0000000..6e528d2
--- /dev/null
@@ -0,0 +1,121 @@
+#include "pingpong.decl.h"
+
+#define BIG_ITER 10
+#define SMALL_ITER 100
+
+#define MAX_PAYLOAD 1 << 12
+
+CProxy_main mainProxy;
+
+#define P1 0
+#define P2 1%CkNumPes()
+
+class main : public CBase_main
+{
+  CProxy_Ping1 arr1;
+  int size;
+public:
+  main(CkMigrateMessage *m) {}
+  main(CkArgMsg *m)
+  {
+    if(CkNumPes()>2) {
+      CkAbort("Run this program on 1 or 2 processors only.\n");
+    }
+    delete m;
+    size = 1024;
+    mainProxy = thisProxy;
+    CkPrintf("Size (bytes) \t\tIterations\t\tRegular API (one-way us)\tZero Copy API (one-way us)\n");
+    arr1 = CProxy_Ping1::ckNew(2);
+    CkStartQD(CkCallback(CkIndex_main::maindone(), mainProxy));
+  };
+
+  void maindone(void){
+    if(size < MAX_PAYLOAD){
+      arr1[0].start(size);
+      size = size << 1;
+    }
+    else if(size == MAX_PAYLOAD){
+      arr1[0].freeBuffer();
+    }
+  };
+};
+
+
+class Ping1 : public CBase_Ping1
+{
+  int size;
+  int niter;
+  int iterations;
+  double start_time, end_time, reg_time, zerocpy_time;
+  char *nocopyMsg;
+
+public:
+  Ping1()
+  {
+    nocopyMsg = (char *)CkRdmaAlloc(sizeof(char) * MAX_PAYLOAD);
+    niter = 0;
+  }
+  Ping1(CkMigrateMessage *m) {}
+
+  void start(int size)
+  {
+    niter = 0;
+    if(size >= 1 << 20)
+      iterations = SMALL_ITER;
+    else
+      iterations = BIG_ITER;
+    start_time = CkWallTimer();
+    thisProxy[1].recv(nocopyMsg, size);
+  }
+
+  void freeBuffer(){
+    CkRdmaFree(nocopyMsg);
+    if(thisIndex == 0){
+      thisProxy[1].freeBuffer();
+    }
+    else{
+      CkExit();
+    }
+  }
+
+  void recv(char* msg, int size)
+  {
+    if(thisIndex==0) {
+      niter++;
+      if(niter==iterations) {
+        end_time = CkWallTimer();
+        reg_time = 1.0e6*(end_time-start_time)/iterations;
+        niter = 0;
+        start_time = CkWallTimer();
+        thisProxy[1].recv_zerocopy(CkSendBuffer(nocopyMsg, CK_BUFFER_PREREG), size);
+      } else {
+        thisProxy[1].recv(nocopyMsg, size);
+      }
+    } else {
+      thisProxy[0].recv(nocopyMsg, size);
+    }
+  }
+
+  void recv_zerocopy(char* msg, int size)
+  {
+    if(thisIndex==0) {
+      niter++;
+      if(niter==iterations) {
+        end_time = CkWallTimer();
+        zerocpy_time = 1.0e6*(end_time-start_time)/iterations;
+        if(size < 1 << 24)
+          CkPrintf("%d\t\t\t%d\t\t\t%lf\t\t\t%lf\n", size, iterations, reg_time/2, zerocpy_time/2);
+        else //using different print format for larger numbers for aligned output
+          CkPrintf("%d\t\t%d\t\t\t%lf\t\t\t%lf\n", size, iterations, reg_time/2, zerocpy_time/2);
+        niter=0;
+        mainProxy.maindone();
+      } else {
+        thisProxy[1].recv_zerocopy(CkSendBuffer(nocopyMsg, CK_BUFFER_PREREG), size);
+      }
+    } else {
+      thisProxy[0].recv_zerocopy(CkSendBuffer(nocopyMsg, CK_BUFFER_PREREG), size);
+    }
+  }
+};
+
+#include "pingpong.def.h"
similarity index 86%
rename from examples/charm++/zerocopy/entry_method_api/simpleZeroCopy/Makefile
rename to examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/Makefile
index 28f08b07a61d2748da7ca6f131ea8e74a757dd8e..8c3654c2774224480c699dd0878ded020e6e0525 100644 (file)
@@ -1,5 +1,5 @@
--include ../../../../common.mk
-CHARMC=../../../../../bin/charmc $(OPTS)
+-include ../../../../../common.mk
+CHARMC=../../../../../../bin/charmc $(OPTS)
 
 all: simpleZeroCopy
 
diff --git a/examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/simpleZeroCopy.C b/examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/simpleZeroCopy.C
new file mode 100644 (file)
index 0000000..0447cf3
--- /dev/null
@@ -0,0 +1,281 @@
+#include "simpleZeroCopy.decl.h"
+#include <assert.h>
+
+//Set DEBUG(x) to x to see the debug messages
+//#define DEBUG(x) x
+#define DEBUG(x)
+#define LBPERIOD_ITER 5
+
+int numElements;
+
+//Main chare
+class Main : public CBase_Main{
+  public:
+    Main(CkArgMsg *m){
+      if(m->argc!=2){
+        ckout<<"Usage: zerocopy <numelements>"<<endl;
+        CkExit(1);
+      }
+      numElements = atoi(m->argv[1]);
+      delete m;
+      if(numElements%2 != 0){
+        ckout<<"Argument <numelements> should be even"<<endl;
+        CkExit(1);
+      }
+
+      CProxy_RRMap rrMap = CProxy_RRMap::ckNew();
+      CkArrayOptions opts(numElements);
+      opts.setMap(rrMap);
+      CProxy_zerocopyObject zerocopyObj = CProxy_zerocopyObject::ckNew(opts);
+      zerocopyObj.testZeroCopy(thisProxy);
+    }
+
+    void done(){
+      CkPrintf("sdagRun: completed\nAll sending completed and result validated\n");
+      CkExit();
+    }
+
+    Main(CkMigrateMessage *m){}
+};
+
+template<class T>
+void compareArray(T *&aArr, T *&bArr, int size, int startIdx=0){
+  for(int i=0; i<size; i++)
+    assert(aArr[i] == bArr[i+startIdx]);
+}
+
+template<class T>
+void copyArray(T *&dest, T *&src, int size){
+  if(dest != NULL)
+    CkRdmaFree(dest);
+  //dest = new T[size];
+  dest = (T *)CkRdmaAlloc(sizeof(T) * size);
+  memcpy(dest,src,size*sizeof(T));
+}
+
+template<class T>
+void assignValues(T *&arr, int size){
+  //arr = new T[size];
+  arr = (T *)CkRdmaAlloc(sizeof(T) * size);
+  for(int i=0; i<size; i++)
+     arr[i] = rand() % 100 + 1;
+}
+
+void assignCharValues(char *&arr, int size){
+  //arr = new char[size];
+  arr = (char *)CkRdmaAlloc(sizeof(char) * size);
+  for(int i=0; i<size; i++)
+     arr[i] = (char)(rand() % 125 + 1);
+}
+
+//zerocopy object chare
+class zerocopyObject : public CBase_zerocopyObject{
+  int *iArr1, *iArr2;
+  double *dArr1, *dArr2;
+  char *cArr1;
+  int iSize1, iSize2, dSize1, dSize2, cSize1, iOffset1, cOffset1;
+  int destIndex, iter, num, j;
+  int mixedZeroCopySentCounter, sdagZeroCopySentCounter, sdagZeroCopyRecvCounter;
+  bool firstMigrationPending;
+  CkCallback cb, sdagCb;
+  int idx_zerocopySent, idx_sdagZeroCopySent;;
+  CProxy_Main mainProxy;
+
+  public:
+    zerocopyObject_SDAG_CODE
+    zerocopyObject(){
+      usesAtSync = true;
+      destIndex = numElements - 1 - thisIndex;
+      DEBUG(CkPrintf("[%d]  me - %d, my neighbour- %d \n", CkMyNode(), thisIndex, destIndex);)
+      //counter for tracking mixedSend completions to initiate sdagRun
+      mixedZeroCopySentCounter = 0;
+
+      //counter for tracking sdagRecv send completions
+      sdagZeroCopySentCounter = 0;
+
+      //counter for tracking sdagRecv completions
+      sdagZeroCopyRecvCounter = 0;
+      iArr1 = NULL;
+      iArr2 = NULL;
+      dArr1 = NULL;
+      dArr2 = NULL;
+      cArr1 = NULL;
+      iter = 1;
+      num = 4;
+      j = 0;
+      firstMigrationPending = true;
+      idx_zerocopySent = CkIndex_zerocopyObject::zerocopySent(NULL);
+      idx_sdagZeroCopySent = CkIndex_zerocopyObject::sdagZeroCopySent(NULL);
+      cb = CkCallback(idx_zerocopySent, thisProxy[thisIndex]);
+      sdagCb = CkCallback(idx_sdagZeroCopySent, thisProxy[thisIndex]);
+    }
+
+    void pup(PUP::er &p){
+      p|iter;
+      p|destIndex;
+      p|cb;
+      p|num;
+      p|iSize1;
+      p|dSize2;
+      p|mixedZeroCopySentCounter;
+      p|sdagZeroCopySentCounter;
+      p|sdagZeroCopyRecvCounter;
+      p|mainProxy;
+      p|sdagCb;
+
+      // sdagRun only uses iArr1 and dArr2
+      // other others needn't be pupped/unpupped
+      if (p.isUnpacking()){
+        iArr1 = (int *)CkRdmaAlloc(iSize1 * sizeof(int));
+        dArr2 = (double *)CkRdmaAlloc(dSize2 * sizeof(double));
+        j=0;
+        firstMigrationPending = false;
+      }
+      p(iArr1, iSize1);
+      p(dArr2, dSize2);
+    }
+
+    ~zerocopyObject() {
+      if(firstMigrationPending) {
+        // delete on first migration on all chares
+        CkRdmaFree(cArr1);
+
+        if(thisIndex < numElements/2) {
+          // delete on first migration on the first set of chares
+          // as it is deleted in the callback on the other set
+          CkRdmaFree(iArr2);
+          CkRdmaFree(dArr1);
+        }
+
+      }
+      // delete everytime after migration as they are pupped to be used for sdagRun
+      CkRdmaFree(dArr2);
+      CkRdmaFree(iArr1);
+    }
+
+    zerocopyObject(CkMigrateMessage *m){}
+
+    void zerocopySent(CkDataMsg *m){
+      // Get access to the array information sent via zerocopy
+      CkNcpyBuffer *src = (CkNcpyBuffer *)(m->data);
+      //free((void *)(src->ptr));
+      CkRdmaFree((void *)(src->ptr));
+      delete m;
+
+      if(++mixedZeroCopySentCounter == 2)
+        thisProxy[thisIndex].sdagRun();
+    }
+
+    void sdagZeroCopySent(CkDataMsg *m){
+      delete m;
+      // increment on completing the send of an zerocopy parameter in sdagRecv
+      sdagZeroCopySentCounter++;
+
+      // check that all sends and recvs have completed and then advance
+      if(sdagZeroCopySentCounter == 2*num && sdagZeroCopyRecvCounter == num)
+        nextStep();
+    }
+
+    void testZeroCopy(CProxy_Main mProxy){
+      iSize1 = 210;
+      iSize2 = 11;
+      dSize1 = 4700;
+      dSize2 = 79;
+      cSize1 = 32;
+
+      iOffset1 = 100;
+      cOffset1 = 2;
+
+      mainProxy = mProxy;
+      if(thisIndex < numElements/2){
+        assignValues(iArr1, iSize1);
+        assignValues(iArr2, iSize2);
+        assignValues(dArr1, dSize1);
+        assignValues(dArr2, dSize2);
+        assignCharValues(cArr1, cSize1);
+        thisProxy[destIndex].send(iSize1, iArr1, dSize1, dArr1, cSize1, cArr1);
+      }
+    }
+
+    void send(int n1, int *ptr1, int n2, double *ptr2, int n3, char *ptr3){
+      if(thisIndex < numElements/2){
+        compareArray(ptr1, iArr1, n1);
+        compareArray(ptr2, dArr1, n2);
+        compareArray(ptr3, cArr1, n3);
+        DEBUG(ckout<<"["<<CkMyPe()<<"] "<<thisIndex<<"->"<<destIndex<<": Regular send completed"<<endl;)
+        if(thisIndex == 0)
+          CkPrintf("send: completed\n");
+        // cannot use PREREG mode for offset buffers
+        thisProxy[destIndex].zerocopySend(iSize1-iOffset1, CkSendBuffer(iArr1+iOffset1), dSize1, CkSendBuffer(dArr1, CK_BUFFER_PREREG), cSize1-cOffset1, CkSendBuffer(cArr1 + cOffset1));
+      }
+      else{
+        thisProxy[destIndex].send(n1, ptr1, n2, ptr2, n3, ptr3);
+      }
+    }
+
+    void zerocopySend(int n1, int *ptr1, int n2, double *ptr2, int n3, char *ptr3){
+      if(thisIndex < numElements/2){
+        compareArray(ptr1, iArr1, n1, iOffset1);
+        compareArray(ptr2, dArr1, n2);
+        compareArray(ptr3, cArr1, n3, cOffset1);
+        DEBUG(ckout<<"["<<CkMyPe()<<"] "<<thisIndex<<"->"<<destIndex<<": ZeroCopy send completed"<<endl;)
+        if(thisIndex == 0)
+          CkPrintf("zerocopySend: completed\n");
+        thisProxy[destIndex].mixedSend(iSize1, iArr1, dSize1, CkSendBuffer(dArr1, CK_BUFFER_PREREG), iSize2, CkSendBuffer(iArr2, CK_BUFFER_PREREG), dSize2, dArr2);
+      }
+      else{
+        copyArray(iArr1, ptr1, n1);
+        copyArray(dArr1, ptr2, n2);
+        copyArray(cArr1, ptr3, n3);
+        thisProxy[destIndex].zerocopySend(n1, CkSendBuffer(iArr1, CK_BUFFER_PREREG), n2, CkSendBuffer(dArr1, CK_BUFFER_PREREG), n3, CkSendBuffer(cArr1, CK_BUFFER_PREREG));
+      }
+    }
+
+    void mixedSend(int n1, int *ptr1, int n2, double *ptr2, int n3, int *ptr3, int n4, double *ptr4){
+      if(thisIndex < numElements/2){
+        compareArray(ptr1, iArr1, n1);
+        compareArray(ptr2, dArr1, n2);
+        compareArray(ptr3, iArr2, n3);
+        compareArray(ptr4, dArr2, n4);
+        DEBUG(ckout<<"["<<CkMyPe()<<"] "<<thisIndex<<"->"<<destIndex<<": Mixed send completed "<<endl;)
+        if(thisIndex == 0)
+          CkPrintf("mixedSend: completed\n");
+        thisProxy[thisIndex].sdagRun();
+      }
+      else{
+        copyArray(iArr1, ptr1, n1);
+        copyArray(dArr1, ptr2, n2);
+        copyArray(iArr2, ptr3, n3);
+        copyArray(dArr2, ptr4, n4);
+        thisProxy[destIndex].mixedSend(n1, iArr1, n2, CkSendBuffer(dArr1, cb, CK_BUFFER_PREREG), n3, CkSendBuffer(iArr2, cb, CK_BUFFER_PREREG), n4, dArr2);
+      }
+    }
+
+    void nextStep() {
+      // reset the completion counters
+      sdagZeroCopyRecvCounter = 0;
+      sdagZeroCopySentCounter = 0;
+
+      if(thisIndex == 0)
+          CkPrintf("sdagRun: Iteration %d completed\n", iter);
+
+      //increase iteration and continue
+      iter++;
+
+      //load balance
+      if(iter % LBPERIOD_ITER == 0)
+        AtSync();
+      else if(iter<=100)
+        thisProxy[thisIndex].sdagRun();
+      else {
+        CkCallback reductionCb(CkReductionTarget(Main, done), mainProxy);
+        contribute(reductionCb);
+      }
+    }
+
+    void ResumeFromSync() {
+      thisProxy[thisIndex].sdagRun();
+    }
+};
+
+#include "simpleZeroCopy.def.h"
diff --git a/examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/simpleZeroCopy.ci b/examples/charm++/zerocopy/entry_method_api/prereg/simpleZeroCopy/simpleZeroCopy.ci
new file mode 100644 (file)
index 0000000..6144a71
--- /dev/null
@@ -0,0 +1,46 @@
+mainmodule simpleZeroCopy {
+
+  readonly int numElements;
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry [reductiontarget] void done();
+  };
+
+  array [1D] zerocopyObject{
+    entry zerocopyObject();
+    entry void testZeroCopy(CProxy_Main mProxy);
+    entry void zerocopySent(CkDataMsg *msg);
+    entry void send(int n1, int ptr1[n1], int n2, double ptr2[n2], int n3, char ptr3[n3]);
+    entry void zerocopySend(int n1, nocopy int ptr1[n1], int n2, nocopy double ptr2[n2], int n3, nocopy char ptr3[n3]);
+    entry void mixedSend(int n1, int ptr1[n1], int n2, nocopy double ptr2[n2], int n3, nocopy int ptr3[n3], int n4, double ptr4[n4]);
+
+    entry void sdagRun() {
+      serial {
+        // send num arrays to its partner
+        for(int i = 1; i <= num; i++)
+          thisProxy[destIndex].sdagRecv(iter, iSize1, CkSendBuffer(iArr1, sdagCb, CK_BUFFER_PREREG), dSize2, CkSendBuffer(dArr2, sdagCb, CK_BUFFER_PREREG));
+      }
+
+      // wait for num arrays from partner
+      for (j = 1; j <= num; j++){
+        when sdagRecv[iter] (int iter, int n1, nocopy int ptr1[n1], int n2, nocopy double ptr2[n2]){
+          serial {
+            // increment counter on receiving a sdagRecv
+            sdagZeroCopyRecvCounter++;
+            compareArray(ptr1, iArr1, n1);
+            compareArray(ptr2, dArr2, n2);
+
+            // check that all sends and recvs have completed and then advance
+            if(sdagZeroCopySentCounter == 2*num && sdagZeroCopyRecvCounter == num)
+              nextStep();
+          }
+        }
+      }
+    }
+
+    entry void sdagZeroCopySent(CkDataMsg *msg);
+    entry void sdagRecv(int iter, int n1, nocopy int ptr1[n1], int n2, nocopy double ptr2[n2]);
+  };
+
+}
similarity index 90%
rename from examples/charm++/zerocopy/entry_method_api/stencil3d/Makefile
rename to examples/charm++/zerocopy/entry_method_api/prereg/stencil3d/Makefile
index b81102cf93f79d4b65bedb725fa26c6c88b0fe9f..16ad372e162e80bfcfafab3c7f25a60f7f46c414 100644 (file)
@@ -1,5 +1,5 @@
--include ../../../../common.mk
-CHARMC = ../../../../../bin/charmc $(OPTS)
+-include ../../../../../common.mk
+CHARMC = ../../../../../../bin/charmc $(OPTS)
 
 OBJS = stencil3d.o
 
diff --git a/examples/charm++/zerocopy/entry_method_api/prereg/stencil3d/stencil3d.C b/examples/charm++/zerocopy/entry_method_api/prereg/stencil3d/stencil3d.C
new file mode 100644 (file)
index 0000000..f66b3bd
--- /dev/null
@@ -0,0 +1,390 @@
+/** \file stencil3d.C
+
+ *  This example is the nocopy verison of the stencil example
+ *  in examples/charm++/load_balancing/stencil3d *
+ *           *****************
+ *        *               *  *
+ *   ^ *****************     *
+ *   | *               *     *
+ *   | *               *     *
+ *   | *               *     *
+ *   Y *               *     *
+ *   | *               *     *
+ *   | *               *     *
+ *   | *               *  *
+ *   ~ *****************    Z
+ *     <------ X ------>
+ *
+ *   X: left, right --> wrap_x
+ *   Y: top, bottom --> wrap_y
+ *   Z: front, back --> wrap_z
+ */
+
+#include "stencil3d.decl.h"
+#include "TopoManager.h"
+
+/*readonly*/ CProxy_Main mainProxy;
+/*readonly*/ int arrayDimX;
+/*readonly*/ int arrayDimY;
+/*readonly*/ int arrayDimZ;
+/*readonly*/ int blockDimX;
+/*readonly*/ int blockDimY;
+/*readonly*/ int blockDimZ;
+
+// specify the number of worker chares in each dimension
+/*readonly*/ int num_chare_x;
+/*readonly*/ int num_chare_y;
+/*readonly*/ int num_chare_z;
+
+static unsigned long next = 1;
+
+int myrand(int numpes) {
+  next = next * 1103515245 + 12345;
+  return((unsigned)(next/65536) % numpes);
+}
+
+// We want to wrap entries around, and because mod operator %
+// sometimes misbehaves on negative values. -1 maps to the highest value.
+#define wrap_x(a)      (((a)+num_chare_x)%num_chare_x)
+#define wrap_y(a)      (((a)+num_chare_y)%num_chare_y)
+#define wrap_z(a)      (((a)+num_chare_z)%num_chare_z)
+
+#define index(a,b,c)   ((a)+(b)*(blockDimX+2)+(c)*(blockDimX+2)*(blockDimY+2))
+
+#define MAX_ITER       100
+#define LBPERIOD_ITER  5    // LB is called every LBPERIOD_ITER number of program iterations
+#define CHANGELOAD     30
+#define LEFT           1
+#define RIGHT          2
+#define TOP            3
+#define BOTTOM         4
+#define FRONT          5
+#define BACK           6
+#define DIVIDEBY7              0.14285714285714285714
+
+/** \class Main
+ *
+ */
+class Main : public CBase_Main {
+  public:
+    CProxy_Stencil array;
+
+    Main(CkArgMsg* m) {
+      if ( (m->argc != 3) && (m->argc != 7) ) {
+        CkPrintf("%s [array_size] [block_size]\n", m->argv[0]);
+        CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]);
+        CkAbort("Abort");
+      }
+
+      // store the main proxy
+      mainProxy = thisProxy;
+
+      if(m->argc == 3) {
+        arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
+        blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]);
+      }
+      else if (m->argc == 7) {
+        arrayDimX = atoi(m->argv[1]);
+        arrayDimY = atoi(m->argv[2]);
+        arrayDimZ = atoi(m->argv[3]);
+        blockDimX = atoi(m->argv[4]);
+        blockDimY = atoi(m->argv[5]);
+        blockDimZ = atoi(m->argv[6]);
+      }
+
+      if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
+        CkAbort("array_size_X % block_size_X != 0!");
+      if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0)
+        CkAbort("array_size_Y % block_size_Y != 0!");
+      if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0)
+        CkAbort("array_size_Z % block_size_Z != 0!");
+
+      num_chare_x = arrayDimX / blockDimX;
+      num_chare_y = arrayDimY / blockDimY;
+      num_chare_z = arrayDimZ / blockDimZ;
+
+      // print info
+      CkPrintf("\nSTENCIL COMPUTATION WITH BARRIERS\n");
+      CkPrintf("Running Stencil on %d processors with (%d, %d, %d) chares\n", CkNumPes(), num_chare_x, num_chare_y, num_chare_z);
+      CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ);
+      CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ);
+
+      // Create new array of worker chares
+      array = CProxy_Stencil::ckNew(num_chare_x, num_chare_y, num_chare_z);
+
+      //Start the computation
+      array.doStep();
+    }
+
+    // Each worker reports back to here when it completes an iteration
+    void report() {
+      CkExit();
+    }
+};
+
+/** \class Stencil
+ *
+ */
+
+class Stencil: public CBase_Stencil {
+  Stencil_SDAG_CODE
+  private:
+    double startTime;
+
+  public:
+    int iterations;
+    int imsg;
+    int counter;
+
+    double *temperature;
+    double *new_temperature;
+    CkCallback cb;
+
+    // callback function called on completion of sending ghosts
+    void completedSendingGhost(CkDataMsg *msg){
+      CkNcpyBuffer *src = (CkNcpyBuffer *)(msg->data);
+      void *ptr = (void *)(src->ptr);
+      CkRdmaFree(ptr);
+      delete msg;
+      counter++;
+      // Advance to next step on completion of sending ghosts to the 6 neighbors
+      if(counter == 6){
+        counter = 0;
+        thisProxy[thisIndex].nextStep();
+      }
+    }
+
+    // Constructor, initialize values
+    Stencil() {
+      usesAtSync = true;
+      counter = 0;
+
+      int i, j, k;
+      cb = CkCallback(CkIndex_Stencil::completedSendingGhost(NULL), thisProxy(thisIndex.x, thisIndex.y, thisIndex.z));
+      // allocate a three dimensional array
+      temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
+      new_temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
+
+      for(k=0; k<blockDimZ+2; ++k)
+        for(j=0; j<blockDimY+2; ++j)
+          for(i=0; i<blockDimX+2; ++i)
+            temperature[index(i, j, k)] = 0.0;
+
+      iterations = 0;
+      imsg = 0;
+      constrainBC();
+      // start measuring time
+      if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
+        startTime = CkWallTimer();
+
+#if CMK_LBDB_ON
+      // set period arbitrarily small so that LB occurs when AtSync is called
+      // this is in case the default LBPERIOD is larger than the time to complete LBPERIOD_ITER
+      // iterations
+      getLBDB()->SetLBPeriod(0);
+#endif
+    }
+
+    void pup(PUP::er &p)
+    {
+      p|startTime;
+      p|iterations;
+      p|imsg;
+
+      size_t size = (blockDimX+2) * (blockDimY+2) * (blockDimZ+2);
+      if (p.isUnpacking()) {
+        cb = CkCallback(CkIndex_Stencil::completedSendingGhost(NULL), thisProxy(thisIndex.x, thisIndex.y, thisIndex.z));
+        temperature = new double[size];
+        new_temperature = new double[size];
+        counter = 0;
+      }
+      p(temperature, size);
+      p(new_temperature, size);
+    }
+
+    Stencil(CkMigrateMessage* m) { }
+
+    ~Stencil() {
+      delete [] temperature;
+      delete [] new_temperature;
+    }
+
+    // Send ghost faces to the six neighbors
+    void begin_iteration(void) {
+      iterations++;
+
+      // Copy different faces into messages
+      double *leftGhost =  (double *)CkRdmaAlloc(sizeof(double) * blockDimY * blockDimZ);
+      double *rightGhost = (double *)CkRdmaAlloc(sizeof(double) * blockDimY * blockDimZ);
+      double *topGhost =  (double *)CkRdmaAlloc(sizeof(double) * blockDimX * blockDimZ);
+      double *bottomGhost = (double *)CkRdmaAlloc(sizeof(double) * blockDimX * blockDimZ);
+      double *frontGhost =  (double *)CkRdmaAlloc(sizeof(double) * blockDimX * blockDimY);
+      double *backGhost =  (double *)CkRdmaAlloc(sizeof(double) * blockDimX * blockDimY);
+
+      for(int k=0; k<blockDimZ; ++k)
+        for(int j=0; j<blockDimY; ++j) {
+          leftGhost[k*blockDimY+j] = temperature[index(1, j+1, k+1)];
+          rightGhost[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)];
+        }
+
+      for(int k=0; k<blockDimZ; ++k)
+        for(int i=0; i<blockDimX; ++i) {
+          topGhost[k*blockDimX+i] = temperature[index(i+1, 1, k+1)];
+          bottomGhost[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)];
+        }
+
+      for(int j=0; j<blockDimY; ++j)
+        for(int i=0; i<blockDimX; ++i) {
+          frontGhost[j*blockDimX+i] = temperature[index(i+1, j+1, 1)];
+          backGhost[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)];
+        }
+
+      // Send my left face
+      thisProxy(wrap_x(thisIndex.x-1), thisIndex.y, thisIndex.z)
+        .receiveGhosts(iterations, RIGHT, blockDimY, blockDimZ, CkSendBuffer(leftGhost, cb, CK_BUFFER_PREREG));
+      // Send my right face
+      thisProxy(wrap_x(thisIndex.x+1), thisIndex.y, thisIndex.z)
+        .receiveGhosts(iterations, LEFT, blockDimY, blockDimZ, CkSendBuffer(rightGhost, cb, CK_BUFFER_PREREG));
+      // Send my bottom face
+      thisProxy(thisIndex.x, wrap_y(thisIndex.y-1), thisIndex.z)
+        .receiveGhosts(iterations, TOP, blockDimX, blockDimZ, CkSendBuffer(bottomGhost, cb, CK_BUFFER_PREREG));
+      // Send my top face
+      thisProxy(thisIndex.x, wrap_y(thisIndex.y+1), thisIndex.z)
+        .receiveGhosts(iterations, BOTTOM, blockDimX, blockDimZ, CkSendBuffer(topGhost, cb, CK_BUFFER_PREREG));
+      // Send my front face
+      thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z-1))
+        .receiveGhosts(iterations, BACK, blockDimX, blockDimY, CkSendBuffer(frontGhost, cb, CK_BUFFER_PREREG));
+      // Send my back face
+      thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z+1))
+        .receiveGhosts(iterations, FRONT, blockDimX, blockDimY, CkSendBuffer(backGhost, cb, CK_BUFFER_PREREG));
+
+      // control flow continues in completedSendingGhost
+    }
+
+    void processGhosts(int dir, int height, int width, double gh[]) {
+      switch(dir) {
+        case LEFT:
+          for(int k=0; k<width; ++k)
+            for(int j=0; j<height; ++j) {
+              temperature[index(0, j+1, k+1)] = gh[k*height+j];
+            }
+          break;
+        case RIGHT:
+          for(int k=0; k<width; ++k)
+            for(int j=0; j<height; ++j) {
+              temperature[index(blockDimX+1, j+1, k+1)] = gh[k*height+j];
+            }
+          break;
+        case BOTTOM:
+          for(int k=0; k<width; ++k)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, 0, k+1)] = gh[k*height+i];
+            }
+          break;
+        case TOP:
+          for(int k=0; k<width; ++k)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, blockDimY+1, k+1)] = gh[k*height+i];
+            }
+          break;
+        case FRONT:
+          for(int j=0; j<width; ++j)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, j+1, 0)] = gh[j*height+i];
+            }
+          break;
+        case BACK:
+          for(int j=0; j<width; ++j)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, j+1, blockDimZ+1)] = gh[j*height+i];
+            }
+          break;
+        default:
+          CkAbort("ERROR\n");
+      }
+    }
+
+    void check_and_compute() {
+      compute_kernel();
+
+      // calculate error
+      // not being done right now since we are doing a fixed no. of iterations
+
+      double *tmp;
+      tmp = temperature;
+      temperature = new_temperature;
+      new_temperature = tmp;
+
+      constrainBC();
+
+      if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0) {
+        double endTime = CkWallTimer();
+        CkPrintf("[%d] Time per iteration: %f %f\n", iterations, (endTime - startTime), endTime);
+      }
+
+      if(iterations == MAX_ITER)
+        contribute(CkCallback(CkReductionTarget(Main, report), mainProxy));
+      else {
+        if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
+          startTime = CkWallTimer();
+        if(iterations % LBPERIOD_ITER == 0)
+        {
+          AtSync();
+        }
+        else {
+          contribute(CkCallback(CkReductionTarget(Stencil, doStep), thisProxy));
+        }
+      }
+    }
+
+    // Check to see if we have received all neighbor values yet
+    // If all neighbor values have been received, we update our values and proceed
+    void compute_kernel() {
+      int itno = (int)ceil((double)iterations/(double)CHANGELOAD) * 5;
+      int index = thisIndex.x + thisIndex.y*num_chare_x + thisIndex.z*num_chare_x*num_chare_y;
+      int numChares = num_chare_x * num_chare_y * num_chare_z;
+      double work = 100.0;
+
+      if(index >= numChares*0.2 && index <=numChares*0.8) {
+        work = work * ((double)index/(double)numChares) + (double)itno;
+      } else
+        work = 10.0;
+
+#pragma unroll
+      for(int w=0; w<work; w++) {
+        for(int k=1; k<blockDimZ+1; ++k)
+          for(int j=1; j<blockDimY+1; ++j)
+            for(int i=1; i<blockDimX+1; ++i) {
+              // update my value based on the surrounding values
+              new_temperature[index(i, j, k)] = (temperature[index(i-1, j, k)]
+                  +  temperature[index(i+1, j, k)]
+                  +  temperature[index(i, j-1, k)]
+                  +  temperature[index(i, j+1, k)]
+                  +  temperature[index(i, j, k-1)]
+                  +  temperature[index(i, j, k+1)]
+                  +  temperature[index(i, j, k)] )
+                *  DIVIDEBY7;
+            } // end for
+      }
+    }
+
+    // Enforce some boundary conditions
+    void constrainBC() {
+      // Heat left, top and front faces of each chare's block
+      for(int k=1; k<blockDimZ+1; ++k)
+        for(int i=1; i<blockDimX+1; ++i)
+          temperature[index(i, 1, k)] = 255.0;
+      for(int k=1; k<blockDimZ+1; ++k)
+        for(int j=1; j<blockDimY+1; ++j)
+          temperature[index(1, j, k)] = 255.0;
+      for(int j=1; j<blockDimY+1; ++j)
+        for(int i=1; i<blockDimX+1; ++i)
+          temperature[index(i, j, 1)] = 255.0;
+    }
+
+    void ResumeFromSync() {
+      doStep();
+    }
+};
+
+#include "stencil3d.def.h"
diff --git a/examples/charm++/zerocopy/entry_method_api/reg/Makefile b/examples/charm++/zerocopy/entry_method_api/reg/Makefile
new file mode 100644 (file)
index 0000000..278c36d
--- /dev/null
@@ -0,0 +1,23 @@
+DIRS = \
+  pingpong \
+  simpleZeroCopy \
+  stencil3d \
+
+TESTDIRS = $(DIRS)
+
+all: $(foreach i,$(DIRS),build-$i)
+
+test: $(foreach i,$(TESTDIRS),test-$i)
+
+clean: $(foreach i,$(DIRS),clean-$i)
+       rm -f TAGS #*#
+       rm -f core *~
+
+$(foreach i,$(DIRS),build-$i):
+       $(MAKE) -C $(subst build-,,$@) all OPTS='$(OPTS)'
+
+$(foreach i,$(DIRS),test-$i):
+       $(MAKE) -C $(subst test-,,$@) test OPTS='$(OPTS)' TESTOPTS='$(TESTOPTS)'
+
+$(foreach i,$(DIRS),clean-$i):
+       $(MAKE) -C $(subst clean-,,$@) clean OPTS='$(OPTS)'
diff --git a/examples/charm++/zerocopy/entry_method_api/reg/pingpong/Makefile b/examples/charm++/zerocopy/entry_method_api/reg/pingpong/Makefile
new file mode 100644 (file)
index 0000000..99ad14f
--- /dev/null
@@ -0,0 +1,25 @@
+-include ../../../../../common.mk
+CHARMC=../../../../../../bin/charmc $(OPTS)
+
+all:   pgm
+
+OBJS = pingpong.o
+
+pgm: $(OBJS)
+       $(CHARMC) -language charm++ -o pgm $(OBJS)
+
+cifiles: pingpong.ci
+       $(CHARMC)  pingpong.ci
+       touch cifiles
+
+pingpong.o: pingpong.C cifiles
+       $(CHARMC) -c pingpong.C
+
+test: all
+       @echo "Intra-processor Pingpong.."
+       $(call run, ./pgm +p1 )
+       @echo "Inter-processor Pingpong.."
+       $(call run, ./pgm +p2 )
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o pgm charmrun cifiles
diff --git a/examples/charm++/zerocopy/entry_method_api/reg/pingpong/pingpong.ci b/examples/charm++/zerocopy/entry_method_api/reg/pingpong/pingpong.ci
new file mode 100644 (file)
index 0000000..caec180
--- /dev/null
@@ -0,0 +1,14 @@
+mainmodule pingpong {
+  readonly CProxy_main mainProxy;
+  mainchare main {
+    entry main(CkArgMsg *m);
+    entry void maindone(void);
+  };
+  array [1D] Ping1 {
+    entry Ping1();
+    entry void start(int size);
+    entry void freeBuffer();
+    entry void recv(char msg[size], int size);
+    entry void recv_zerocopy(nocopy char msg[size], int size);
+  };
+};
diff --git a/examples/charm++/zerocopy/entry_method_api/reg/simpleZeroCopy/Makefile b/examples/charm++/zerocopy/entry_method_api/reg/simpleZeroCopy/Makefile
new file mode 100644 (file)
index 0000000..8c3654c
--- /dev/null
@@ -0,0 +1,21 @@
+-include ../../../../../common.mk
+CHARMC=../../../../../../bin/charmc $(OPTS)
+
+all: simpleZeroCopy
+
+simpleZeroCopy:  simpleZeroCopy.o
+       $(CHARMC) simpleZeroCopy.o -o simpleZeroCopy -language charm++ -module CommonLBs
+
+cifiles: simpleZeroCopy.ci
+       $(CHARMC) -c simpleZeroCopy.ci
+       touch cifiles
+
+simpleZeroCopy.o : simpleZeroCopy.C cifiles
+       $(CHARMC) -c simpleZeroCopy.C
+
+test: all
+       $(call run, +p4 ./simpleZeroCopy 32 +balancer RotateLB)
+       $(call run, +p4 ./simpleZeroCopy 32 +balancer GreedyLB)
+
+clean:
+       rm -f *.def.h *.decl.h *.o *~ *.exe cifiles charmrun simpleZeroCopy
similarity index 95%
rename from examples/charm++/zerocopy/entry_method_api/simpleZeroCopy/simpleZeroCopy.C
rename to examples/charm++/zerocopy/entry_method_api/reg/simpleZeroCopy/simpleZeroCopy.C
index 0a248529395ea7e4d0a739de79ac04380af73766..8ecffcfffc213d0b5496a92007f6a99903e88879 100644 (file)
@@ -155,7 +155,10 @@ class zerocopyObject : public CBase_zerocopyObject{
     void zerocopySent(CkDataMsg *m){
       // Get access to the array information sent via zerocopy
       CkNcpyBuffer *src = (CkNcpyBuffer *)(m->data);
+      // de-register the memory
+      src->deregisterMem();
       free((void *)(src->ptr));
+
       delete m;
 
       if(++mixedZeroCopySentCounter == 2)
@@ -163,10 +166,19 @@ class zerocopyObject : public CBase_zerocopyObject{
     }
 
     void sdagZeroCopySent(CkDataMsg *m){
-      delete m;
       // increment on completing the send of an zerocopy parameter in sdagRecv
       sdagZeroCopySentCounter++;
 
+      // Get access to the array information sent via zerocopy
+      CkNcpyBuffer *src = (CkNcpyBuffer *)(m->data);
+
+      // de-register the memory
+      src->deregisterMem();
+
+      void *ptr = (void *)(src->ptr); // do not free pointer as it is used in the next iteration as well
+
+      delete m;
+
       // check that all sends and recvs have completed and then advance
       if(sdagZeroCopySentCounter == 2*num && sdagZeroCopyRecvCounter == num)
         nextStep();
diff --git a/examples/charm++/zerocopy/entry_method_api/reg/stencil3d/Makefile b/examples/charm++/zerocopy/entry_method_api/reg/stencil3d/Makefile
new file mode 100644 (file)
index 0000000..16ad372
--- /dev/null
@@ -0,0 +1,31 @@
+-include ../../../../../common.mk
+CHARMC = ../../../../../../bin/charmc $(OPTS)
+
+OBJS = stencil3d.o
+
+all: stencil3d
+
+stencil3d: $(OBJS)
+       $(CHARMC) -language charm++ -module CommonLBs -o stencil3d $(OBJS)
+
+projections: $(OBJS)
+       $(CHARMC) -language charm++ -module CommonLBs -tracemode projections -lz -o stencil3d.prj $(OBJS)
+
+summary: $(OBJS)
+       $(CHARMC) -language charm++ -module CommonLBs -tracemode summary -lz -o stencil3d.sum $(OBJS)
+
+stencil3d.decl.h: stencil3d.ci
+       $(CHARMC)  stencil3d.ci
+
+stencil3d.o: stencil3d.C stencil3d.decl.h
+       $(CHARMC) -c stencil3d.C
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o stencil3d stencil3d.prj charmrun *~
+
+test: stencil3d
+       $(call run, +p4 ./stencil3d 64 32 +balancer RefineLB )
+       $(call run, +p4 ./stencil3d 64 32 +balancer GreedyLB )
+
+bgtest: stencil3d
+       $(call run, +p4 ./stencil3d 32 16 +balancer CommLB +x2 +y2 +z1 +cth1 +wth1 )
diff --git a/examples/charm++/zerocopy/entry_method_api/reg/stencil3d/stencil3d.ci b/examples/charm++/zerocopy/entry_method_api/reg/stencil3d/stencil3d.ci
new file mode 100644 (file)
index 0000000..d93540e
--- /dev/null
@@ -0,0 +1,49 @@
+mainmodule stencil3d {
+
+  readonly CProxy_Main mainProxy;
+  readonly int arrayDimX;
+  readonly int arrayDimY;
+  readonly int arrayDimZ;
+  readonly int blockDimX;
+  readonly int blockDimY;
+  readonly int blockDimZ;
+
+  readonly int num_chare_x;
+  readonly int num_chare_y;
+  readonly int num_chare_z;
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry [reductiontarget] void report();
+  };
+
+  array [3D] Stencil {
+    entry Stencil(void);
+    entry void begin_iteration(void);
+
+    entry void completedSendingGhost(CkDataMsg *msg);
+    entry void nextStep(){
+      for(imsg = 0; imsg < 6; imsg++) {
+        // "iterations" keeps track of messages across steps
+        when receiveGhosts[iterations] (int iter, int dir, int height,
+            int width, nocopy double ghosts[height*width])
+          serial "process_ghosts" {
+            processGhosts(dir, height, width, ghosts);
+          }
+      }
+      serial "do_work" {
+        check_and_compute();
+      }
+    }
+
+    entry void receiveGhosts(int iter, int dir, int height, int width,
+        nocopy double ghosts[height*width]);
+
+    entry [reductiontarget] void doStep() {
+      serial "begin_iteration" {
+        begin_iteration();
+      }
+    };
+  };
+
+};
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/Makefile b/examples/charm++/zerocopy/entry_method_api/unreg/Makefile
new file mode 100644 (file)
index 0000000..278c36d
--- /dev/null
@@ -0,0 +1,23 @@
+DIRS = \
+  pingpong \
+  simpleZeroCopy \
+  stencil3d \
+
+TESTDIRS = $(DIRS)
+
+all: $(foreach i,$(DIRS),build-$i)
+
+test: $(foreach i,$(TESTDIRS),test-$i)
+
+clean: $(foreach i,$(DIRS),clean-$i)
+       rm -f TAGS #*#
+       rm -f core *~
+
+$(foreach i,$(DIRS),build-$i):
+       $(MAKE) -C $(subst build-,,$@) all OPTS='$(OPTS)'
+
+$(foreach i,$(DIRS),test-$i):
+       $(MAKE) -C $(subst test-,,$@) test OPTS='$(OPTS)' TESTOPTS='$(TESTOPTS)'
+
+$(foreach i,$(DIRS),clean-$i):
+       $(MAKE) -C $(subst clean-,,$@) clean OPTS='$(OPTS)'
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/pingpong/Makefile b/examples/charm++/zerocopy/entry_method_api/unreg/pingpong/Makefile
new file mode 100644 (file)
index 0000000..99ad14f
--- /dev/null
@@ -0,0 +1,25 @@
+-include ../../../../../common.mk
+CHARMC=../../../../../../bin/charmc $(OPTS)
+
+all:   pgm
+
+OBJS = pingpong.o
+
+pgm: $(OBJS)
+       $(CHARMC) -language charm++ -o pgm $(OBJS)
+
+cifiles: pingpong.ci
+       $(CHARMC)  pingpong.ci
+       touch cifiles
+
+pingpong.o: pingpong.C cifiles
+       $(CHARMC) -c pingpong.C
+
+test: all
+       @echo "Intra-processor Pingpong.."
+       $(call run, ./pgm +p1 )
+       @echo "Inter-processor Pingpong.."
+       $(call run, ./pgm +p2 )
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o pgm charmrun cifiles
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/pingpong/pingpong.C b/examples/charm++/zerocopy/entry_method_api/unreg/pingpong/pingpong.C
new file mode 100644 (file)
index 0000000..14c2b41
--- /dev/null
@@ -0,0 +1,121 @@
+#include "pingpong.decl.h"
+
+#define BIG_ITER 10
+#define SMALL_ITER 100
+
+#define MAX_PAYLOAD 1 << 12
+
+CProxy_main mainProxy;
+
+#define P1 0
+#define P2 1%CkNumPes()
+
+class main : public CBase_main
+{
+  CProxy_Ping1 arr1;
+  int size;
+public:
+  main(CkMigrateMessage *m) {}
+  main(CkArgMsg *m)
+  {
+    if(CkNumPes()>2) {
+      CkAbort("Run this program on 1 or 2 processors only.\n");
+    }
+    delete m;
+    size = 1024;
+    mainProxy = thisProxy;
+    CkPrintf("Size (bytes) \t\tIterations\t\tRegular API (one-way us)\tZero Copy API (one-way us)\n");
+    arr1 = CProxy_Ping1::ckNew(2);
+    CkStartQD(CkCallback(CkIndex_main::maindone(), mainProxy));
+  };
+
+  void maindone(void){
+    if(size < MAX_PAYLOAD){
+      arr1[0].start(size);
+      size = size << 1;
+    }
+    else if(size == MAX_PAYLOAD){
+      arr1[0].freeBuffer();
+    }
+  };
+};
+
+
+class Ping1 : public CBase_Ping1
+{
+  int size;
+  int niter;
+  int iterations;
+  double start_time, end_time, reg_time, zerocpy_time;
+  char *nocopyMsg;
+
+public:
+  Ping1()
+  {
+    nocopyMsg = new char[MAX_PAYLOAD];
+    niter = 0;
+  }
+  Ping1(CkMigrateMessage *m) {}
+
+  void start(int size)
+  {
+    niter = 0;
+    if(size >= 1 << 20)
+      iterations = SMALL_ITER;
+    else
+      iterations = BIG_ITER;
+    start_time = CkWallTimer();
+    thisProxy[1].recv(nocopyMsg, size);
+  }
+
+  void freeBuffer(){
+    delete [] nocopyMsg;
+    if(thisIndex == 0){
+      thisProxy[1].freeBuffer();
+    }
+    else{
+      CkExit();
+    }
+  }
+
+  void recv(char* msg, int size)
+  {
+    if(thisIndex==0) {
+      niter++;
+      if(niter==iterations) {
+        end_time = CkWallTimer();
+        reg_time = 1.0e6*(end_time-start_time)/iterations;
+        niter = 0;
+        start_time = CkWallTimer();
+        thisProxy[1].recv_zerocopy(CkSendBuffer(nocopyMsg, CK_BUFFER_UNREG), size);
+      } else {
+        thisProxy[1].recv(nocopyMsg, size);
+      }
+    } else {
+      thisProxy[0].recv(nocopyMsg, size);
+    }
+  }
+
+  void recv_zerocopy(char* msg, int size)
+  {
+    if(thisIndex==0) {
+      niter++;
+      if(niter==iterations) {
+        end_time = CkWallTimer();
+        zerocpy_time = 1.0e6*(end_time-start_time)/iterations;
+        if(size < 1 << 24)
+          CkPrintf("%d\t\t\t%d\t\t\t%lf\t\t\t%lf\n", size, iterations, reg_time/2, zerocpy_time/2);
+        else //using different print format for larger numbers for aligned output
+          CkPrintf("%d\t\t%d\t\t\t%lf\t\t\t%lf\n", size, iterations, reg_time/2, zerocpy_time/2);
+        niter=0;
+        mainProxy.maindone();
+      } else {
+        thisProxy[1].recv_zerocopy(CkSendBuffer(nocopyMsg, CK_BUFFER_UNREG), size);
+      }
+    } else {
+      thisProxy[0].recv_zerocopy(CkSendBuffer(nocopyMsg, CK_BUFFER_UNREG), size);
+    }
+  }
+};
+
+#include "pingpong.def.h"
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/pingpong/pingpong.ci b/examples/charm++/zerocopy/entry_method_api/unreg/pingpong/pingpong.ci
new file mode 100644 (file)
index 0000000..caec180
--- /dev/null
@@ -0,0 +1,14 @@
+mainmodule pingpong {
+  readonly CProxy_main mainProxy;
+  mainchare main {
+    entry main(CkArgMsg *m);
+    entry void maindone(void);
+  };
+  array [1D] Ping1 {
+    entry Ping1();
+    entry void start(int size);
+    entry void freeBuffer();
+    entry void recv(char msg[size], int size);
+    entry void recv_zerocopy(nocopy char msg[size], int size);
+  };
+};
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/Makefile b/examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/Makefile
new file mode 100644 (file)
index 0000000..8c3654c
--- /dev/null
@@ -0,0 +1,21 @@
+-include ../../../../../common.mk
+CHARMC=../../../../../../bin/charmc $(OPTS)
+
+all: simpleZeroCopy
+
+simpleZeroCopy:  simpleZeroCopy.o
+       $(CHARMC) simpleZeroCopy.o -o simpleZeroCopy -language charm++ -module CommonLBs
+
+cifiles: simpleZeroCopy.ci
+       $(CHARMC) -c simpleZeroCopy.ci
+       touch cifiles
+
+simpleZeroCopy.o : simpleZeroCopy.C cifiles
+       $(CHARMC) -c simpleZeroCopy.C
+
+test: all
+       $(call run, +p4 ./simpleZeroCopy 32 +balancer RotateLB)
+       $(call run, +p4 ./simpleZeroCopy 32 +balancer GreedyLB)
+
+clean:
+       rm -f *.def.h *.decl.h *.o *~ *.exe cifiles charmrun simpleZeroCopy
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/simpleZeroCopy.C b/examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/simpleZeroCopy.C
new file mode 100644 (file)
index 0000000..dd0f90d
--- /dev/null
@@ -0,0 +1,283 @@
+#include "simpleZeroCopy.decl.h"
+#include <assert.h>
+
+//Set DEBUG(x) to x to see the debug messages
+//#define DEBUG(x) x
+#define DEBUG(x)
+#define LBPERIOD_ITER 5
+
+int numElements;
+
+//Main chare
+class Main : public CBase_Main{
+  public:
+    Main(CkArgMsg *m){
+      if(m->argc!=2){
+        ckout<<"Usage: zerocopy <numelements>"<<endl;
+        CkExit(1);
+      }
+      numElements = atoi(m->argv[1]);
+      delete m;
+      if(numElements%2 != 0){
+        ckout<<"Argument <numelements> should be even"<<endl;
+        CkExit(1);
+      }
+
+      CProxy_RRMap rrMap = CProxy_RRMap::ckNew();
+      CkArrayOptions opts(numElements);
+      opts.setMap(rrMap);
+      CProxy_zerocopyObject zerocopyObj = CProxy_zerocopyObject::ckNew(opts);
+      zerocopyObj.testZeroCopy(thisProxy);
+    }
+
+    void done(){
+      CkPrintf("sdagRun: completed\nAll sending completed and result validated\n");
+      CkExit();
+    }
+
+    Main(CkMigrateMessage *m){}
+};
+
+template<class T>
+void compareArray(T *&aArr, T *&bArr, int size, int startIdx=0){
+  for(int i=0; i<size; i++)
+    assert(aArr[i] == bArr[i+startIdx]);
+}
+
+template<class T>
+void copyArray(T *&dest, T *&src, int size){
+  if(dest != NULL)
+    free(dest);
+  dest = new T[size];
+  memcpy(dest,src,size*sizeof(T));
+}
+
+template<class T>
+void assignValues(T *&arr, int size){
+  arr = new T[size];
+  for(int i=0; i<size; i++)
+     arr[i] = rand() % 100 + 1;
+}
+
+void assignCharValues(char *&arr, int size){
+  arr = new char[size];
+  for(int i=0; i<size; i++)
+     arr[i] = (char)(rand() % 125 + 1);
+}
+
+//zerocopy object chare
+class zerocopyObject : public CBase_zerocopyObject{
+  int *iArr1, *iArr2;
+  double *dArr1, *dArr2;
+  char *cArr1;
+  int iSize1, iSize2, dSize1, dSize2, cSize1, iOffset1, cOffset1;
+  int destIndex, iter, num, j;
+  int mixedZeroCopySentCounter, sdagZeroCopySentCounter, sdagZeroCopyRecvCounter;
+  bool firstMigrationPending;
+  CkCallback cb, sdagCb;
+  int idx_zerocopySent, idx_sdagZeroCopySent;;
+  CProxy_Main mainProxy;
+
+  public:
+    zerocopyObject_SDAG_CODE
+    zerocopyObject(){
+      usesAtSync = true;
+      destIndex = numElements - 1 - thisIndex;
+      DEBUG(CkPrintf("[%d]  me - %d, my neighbour- %d \n", CkMyNode(), thisIndex, destIndex);)
+      //counter for tracking mixedSend completions to initiate sdagRun
+      mixedZeroCopySentCounter = 0;
+
+      //counter for tracking sdagRecv send completions
+      sdagZeroCopySentCounter = 0;
+
+      //counter for tracking sdagRecv completions
+      sdagZeroCopyRecvCounter = 0;
+      iArr1 = NULL;
+      iArr2 = NULL;
+      dArr1 = NULL;
+      dArr2 = NULL;
+      cArr1 = NULL;
+      iter = 1;
+      num = 4;
+      j = 0;
+      firstMigrationPending = true;
+      idx_zerocopySent = CkIndex_zerocopyObject::zerocopySent(NULL);
+      idx_sdagZeroCopySent = CkIndex_zerocopyObject::sdagZeroCopySent(NULL);
+      cb = CkCallback(idx_zerocopySent, thisProxy[thisIndex]);
+      sdagCb = CkCallback(idx_sdagZeroCopySent, thisProxy[thisIndex]);
+    }
+
+    void pup(PUP::er &p){
+      p|iter;
+      p|destIndex;
+      p|cb;
+      p|num;
+      p|iSize1;
+      p|dSize2;
+      p|mixedZeroCopySentCounter;
+      p|sdagZeroCopySentCounter;
+      p|sdagZeroCopyRecvCounter;
+      p|mainProxy;
+      p|sdagCb;
+
+      // sdagRun only uses iArr1 and dArr2
+      // other others needn't be pupped/unpupped
+      if (p.isUnpacking()){
+        iArr1 = new int[iSize1];
+        dArr2 = new double[dSize2];
+        j=0;
+        firstMigrationPending = false;
+      }
+      p(iArr1, iSize1);
+      p(dArr2, dSize2);
+    }
+
+    ~zerocopyObject() {
+      if(firstMigrationPending) {
+        // delete on first migration on all chares
+        delete [] cArr1;
+
+        if(thisIndex < numElements/2) {
+          // delete on first migration on the first set of chares
+          // as it is deleted in the callback on the other set
+          delete [] iArr2;
+          delete [] dArr1;
+        }
+
+      }
+      // delete everytime after migration as they are pupped to be used for sdagRun
+      delete [] dArr2;
+      delete [] iArr1;
+    }
+
+    zerocopyObject(CkMigrateMessage *m){}
+
+    void zerocopySent(CkDataMsg *m){
+      // Get access to the array information sent via zerocopy
+      CkNcpyBuffer *src = (CkNcpyBuffer *)(m->data);
+      // de-register the memory
+      src->deregisterMem();
+      free((void *)(src->ptr));
+
+      delete m;
+
+      if(++mixedZeroCopySentCounter == 2)
+        thisProxy[thisIndex].sdagRun();
+    }
+
+    void sdagZeroCopySent(CkDataMsg *m){
+      // increment on completing the send of an zerocopy parameter in sdagRecv
+      sdagZeroCopySentCounter++;
+
+      // Get access to the array information sent via zerocopy
+      CkNcpyBuffer *src = (CkNcpyBuffer *)(m->data);
+      // de-register the memory
+      src->deregisterMem();
+
+      delete m;
+
+      // check that all sends and recvs have completed and then advance
+      if(sdagZeroCopySentCounter == 2*num && sdagZeroCopyRecvCounter == num)
+        nextStep();
+    }
+
+    void testZeroCopy(CProxy_Main mProxy){
+      iSize1 = 210;
+      iSize2 = 11;
+      dSize1 = 4700;
+      dSize2 = 79;
+      cSize1 = 32;
+
+      iOffset1 = 3;
+      cOffset1 = 2;
+
+      mainProxy = mProxy;
+      if(thisIndex < numElements/2){
+        assignValues(iArr1, iSize1);
+        assignValues(iArr2, iSize2);
+        assignValues(dArr1, dSize1);
+        assignValues(dArr2, dSize2);
+        assignCharValues(cArr1, cSize1);
+        thisProxy[destIndex].send(iSize1, iArr1, dSize1, dArr1, cSize1, cArr1);
+      }
+    }
+
+    void send(int n1, int *ptr1, int n2, double *ptr2, int n3, char *ptr3){
+      if(thisIndex < numElements/2){
+        compareArray(ptr1, iArr1, n1);
+        compareArray(ptr2, dArr1, n2);
+        compareArray(ptr3, cArr1, n3);
+        DEBUG(ckout<<"["<<CkMyPe()<<"] "<<thisIndex<<"->"<<destIndex<<": Regular send completed"<<endl;)
+        if(thisIndex == 0)
+          CkPrintf("send: completed\n");
+        thisProxy[destIndex].zerocopySend(iSize1-iOffset1, CkSendBuffer(iArr1+iOffset1, CK_BUFFER_UNREG), dSize1, CkSendBuffer(dArr1, CK_BUFFER_UNREG), cSize1-cOffset1, CkSendBuffer(cArr1 + cOffset1, CK_BUFFER_UNREG)); }
+      else{
+        thisProxy[destIndex].send(n1, ptr1, n2, ptr2, n3, ptr3);
+      }
+    }
+
+    void zerocopySend(int n1, int *ptr1, int n2, double *ptr2, int n3, char *ptr3){
+      if(thisIndex < numElements/2){
+        compareArray(ptr1, iArr1, n1, iOffset1);
+        compareArray(ptr2, dArr1, n2);
+        compareArray(ptr3, cArr1, n3, cOffset1);
+        DEBUG(ckout<<"["<<CkMyPe()<<"] "<<thisIndex<<"->"<<destIndex<<": ZeroCopy send completed"<<endl;)
+        if(thisIndex == 0)
+          CkPrintf("zerocopySend: completed\n");
+        thisProxy[destIndex].mixedSend(iSize1, iArr1, dSize1, CkSendBuffer(dArr1, CK_BUFFER_UNREG), iSize2, CkSendBuffer(iArr2, CK_BUFFER_UNREG), dSize2, dArr2);
+      }
+      else{
+        copyArray(iArr1, ptr1, n1);
+        copyArray(dArr1, ptr2, n2);
+        copyArray(cArr1, ptr3, n3);
+        thisProxy[destIndex].zerocopySend(n1, CkSendBuffer(iArr1, CK_BUFFER_UNREG), n2, CkSendBuffer(dArr1, CK_BUFFER_UNREG), n3, CkSendBuffer(cArr1, CK_BUFFER_UNREG));
+      }
+    }
+
+    void mixedSend(int n1, int *ptr1, int n2, double *ptr2, int n3, int *ptr3, int n4, double *ptr4){
+      if(thisIndex < numElements/2){
+        compareArray(ptr1, iArr1, n1); compareArray(ptr2, dArr1, n2);
+        compareArray(ptr3, iArr2, n3);
+        compareArray(ptr4, dArr2, n4);
+        DEBUG(ckout<<"["<<CkMyPe()<<"] "<<thisIndex<<"->"<<destIndex<<": Mixed send completed "<<endl;)
+        if(thisIndex == 0)
+          CkPrintf("mixedSend: completed\n");
+        thisProxy[thisIndex].sdagRun();
+      }
+      else{
+        copyArray(iArr1, ptr1, n1);
+        copyArray(dArr1, ptr2, n2);
+        copyArray(iArr2, ptr3, n3);
+        copyArray(dArr2, ptr4, n4);
+        thisProxy[destIndex].mixedSend(n1, iArr1, n2, CkSendBuffer(dArr1, cb, CK_BUFFER_UNREG), n3, CkSendBuffer(iArr2, cb, CK_BUFFER_UNREG), n4, dArr2);
+      }
+    }
+
+    void nextStep() {
+      // reset the completion counters
+      sdagZeroCopyRecvCounter = 0;
+      sdagZeroCopySentCounter = 0;
+
+      if(thisIndex == 0)
+          CkPrintf("sdagRun: Iteration %d completed\n", iter);
+
+      //increase iteration and continue
+      iter++;
+
+      //load balance
+      if(iter % LBPERIOD_ITER == 0)
+        AtSync();
+      else if(iter<=100)
+        thisProxy[thisIndex].sdagRun();
+      else {
+        CkCallback reductionCb(CkReductionTarget(Main, done), mainProxy);
+        contribute(reductionCb);
+      }
+    }
+
+    void ResumeFromSync() {
+      thisProxy[thisIndex].sdagRun();
+    }
+};
+
+#include "simpleZeroCopy.def.h"
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/simpleZeroCopy.ci b/examples/charm++/zerocopy/entry_method_api/unreg/simpleZeroCopy/simpleZeroCopy.ci
new file mode 100644 (file)
index 0000000..5c260f1
--- /dev/null
@@ -0,0 +1,46 @@
+mainmodule simpleZeroCopy {
+
+  readonly int numElements;
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry [reductiontarget] void done();
+  };
+
+  array [1D] zerocopyObject{
+    entry zerocopyObject();
+    entry void testZeroCopy(CProxy_Main mProxy);
+    entry void zerocopySent(CkDataMsg *msg);
+    entry void send(int n1, int ptr1[n1], int n2, double ptr2[n2], int n3, char ptr3[n3]);
+    entry void zerocopySend(int n1, nocopy int ptr1[n1], int n2, nocopy double ptr2[n2], int n3, nocopy char ptr3[n3]);
+    entry void mixedSend(int n1, int ptr1[n1], int n2, nocopy double ptr2[n2], int n3, nocopy int ptr3[n3], int n4, double ptr4[n4]);
+
+    entry void sdagRun() {
+      serial {
+        // send num arrays to its partner
+        for(int i = 1; i <= num; i++)
+          thisProxy[destIndex].sdagRecv(iter, iSize1, CkSendBuffer(iArr1, sdagCb, CK_BUFFER_UNREG), dSize2, CkSendBuffer(dArr2, sdagCb, CK_BUFFER_UNREG));
+      }
+
+      // wait for num arrays from partner
+      for (j = 1; j <= num; j++){
+        when sdagRecv[iter] (int iter, int n1, nocopy int ptr1[n1], int n2, nocopy double ptr2[n2]){
+          serial {
+            // increment counter on receiving a sdagRecv
+            sdagZeroCopyRecvCounter++;
+            compareArray(ptr1, iArr1, n1);
+            compareArray(ptr2, dArr2, n2);
+
+            // check that all sends and recvs have completed and then advance
+            if(sdagZeroCopySentCounter == 2*num && sdagZeroCopyRecvCounter == num)
+              nextStep();
+          }
+        }
+      }
+    }
+
+    entry void sdagZeroCopySent(CkDataMsg *msg);
+    entry void sdagRecv(int iter, int n1, nocopy int ptr1[n1], int n2, nocopy double ptr2[n2]);
+  };
+
+}
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/Makefile b/examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/Makefile
new file mode 100644 (file)
index 0000000..16ad372
--- /dev/null
@@ -0,0 +1,31 @@
+-include ../../../../../common.mk
+CHARMC = ../../../../../../bin/charmc $(OPTS)
+
+OBJS = stencil3d.o
+
+all: stencil3d
+
+stencil3d: $(OBJS)
+       $(CHARMC) -language charm++ -module CommonLBs -o stencil3d $(OBJS)
+
+projections: $(OBJS)
+       $(CHARMC) -language charm++ -module CommonLBs -tracemode projections -lz -o stencil3d.prj $(OBJS)
+
+summary: $(OBJS)
+       $(CHARMC) -language charm++ -module CommonLBs -tracemode summary -lz -o stencil3d.sum $(OBJS)
+
+stencil3d.decl.h: stencil3d.ci
+       $(CHARMC)  stencil3d.ci
+
+stencil3d.o: stencil3d.C stencil3d.decl.h
+       $(CHARMC) -c stencil3d.C
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o stencil3d stencil3d.prj charmrun *~
+
+test: stencil3d
+       $(call run, +p4 ./stencil3d 64 32 +balancer RefineLB )
+       $(call run, +p4 ./stencil3d 64 32 +balancer GreedyLB )
+
+bgtest: stencil3d
+       $(call run, +p4 ./stencil3d 32 16 +balancer CommLB +x2 +y2 +z1 +cth1 +wth1 )
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/stencil3d.C b/examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/stencil3d.C
new file mode 100644 (file)
index 0000000..2ad19c5
--- /dev/null
@@ -0,0 +1,393 @@
+/** \file stencil3d.C
+
+ *  This example is the nocopy verison of the stencil example
+ *  in examples/charm++/load_balancing/stencil3d *
+ *           *****************
+ *        *               *  *
+ *   ^ *****************     *
+ *   | *               *     *
+ *   | *               *     *
+ *   | *               *     *
+ *   Y *               *     *
+ *   | *               *     *
+ *   | *               *     *
+ *   | *               *  *
+ *   ~ *****************    Z
+ *     <------ X ------>
+ *
+ *   X: left, right --> wrap_x
+ *   Y: top, bottom --> wrap_y
+ *   Z: front, back --> wrap_z
+ */
+
+#include "stencil3d.decl.h"
+#include "TopoManager.h"
+
+/*readonly*/ CProxy_Main mainProxy;
+/*readonly*/ int arrayDimX;
+/*readonly*/ int arrayDimY;
+/*readonly*/ int arrayDimZ;
+/*readonly*/ int blockDimX;
+/*readonly*/ int blockDimY;
+/*readonly*/ int blockDimZ;
+
+// specify the number of worker chares in each dimension
+/*readonly*/ int num_chare_x;
+/*readonly*/ int num_chare_y;
+/*readonly*/ int num_chare_z;
+
+static unsigned long next = 1;
+
+int myrand(int numpes) {
+  next = next * 1103515245 + 12345;
+  return((unsigned)(next/65536) % numpes);
+}
+
+// We want to wrap entries around, and because mod operator %
+// sometimes misbehaves on negative values. -1 maps to the highest value.
+#define wrap_x(a)      (((a)+num_chare_x)%num_chare_x)
+#define wrap_y(a)      (((a)+num_chare_y)%num_chare_y)
+#define wrap_z(a)      (((a)+num_chare_z)%num_chare_z)
+
+#define index(a,b,c)   ((a)+(b)*(blockDimX+2)+(c)*(blockDimX+2)*(blockDimY+2))
+
+#define MAX_ITER       100
+#define LBPERIOD_ITER  5    // LB is called every LBPERIOD_ITER number of program iterations
+#define CHANGELOAD     30
+#define LEFT           1
+#define RIGHT          2
+#define TOP            3
+#define BOTTOM         4
+#define FRONT          5
+#define BACK           6
+#define DIVIDEBY7              0.14285714285714285714
+
+/** \class Main
+ *
+ */
+class Main : public CBase_Main {
+  public:
+    CProxy_Stencil array;
+
+    Main(CkArgMsg* m) {
+      if ( (m->argc != 3) && (m->argc != 7) ) {
+        CkPrintf("%s [array_size] [block_size]\n", m->argv[0]);
+        CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]);
+        CkAbort("Abort");
+      }
+
+      // store the main proxy
+      mainProxy = thisProxy;
+
+      if(m->argc == 3) {
+        arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
+        blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]);
+      }
+      else if (m->argc == 7) {
+        arrayDimX = atoi(m->argv[1]);
+        arrayDimY = atoi(m->argv[2]);
+        arrayDimZ = atoi(m->argv[3]);
+        blockDimX = atoi(m->argv[4]);
+        blockDimY = atoi(m->argv[5]);
+        blockDimZ = atoi(m->argv[6]);
+      }
+
+      if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
+        CkAbort("array_size_X % block_size_X != 0!");
+      if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0)
+        CkAbort("array_size_Y % block_size_Y != 0!");
+      if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0)
+        CkAbort("array_size_Z % block_size_Z != 0!");
+
+      num_chare_x = arrayDimX / blockDimX;
+      num_chare_y = arrayDimY / blockDimY;
+      num_chare_z = arrayDimZ / blockDimZ;
+
+      // print info
+      CkPrintf("\nSTENCIL COMPUTATION WITH BARRIERS\n");
+      CkPrintf("Running Stencil on %d processors with (%d, %d, %d) chares\n", CkNumPes(), num_chare_x, num_chare_y, num_chare_z);
+      CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ);
+      CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ);
+
+      // Create new array of worker chares
+      array = CProxy_Stencil::ckNew(num_chare_x, num_chare_y, num_chare_z);
+
+      //Start the computation
+      array.doStep();
+    }
+
+    // Each worker reports back to here when it completes an iteration
+    void report() {
+      CkExit();
+    }
+};
+
+/** \class Stencil
+ *
+ */
+
+class Stencil: public CBase_Stencil {
+  Stencil_SDAG_CODE
+  private:
+    double startTime;
+
+  public:
+    int iterations;
+    int imsg;
+    int counter;
+
+    double *temperature;
+    double *new_temperature;
+    CkCallback cb;
+
+    // callback function called on completion of sending ghosts
+    void completedSendingGhost(CkDataMsg *msg){
+      CkNcpyBuffer *src = (CkNcpyBuffer *)(msg->data);
+      void *ptr = (void *)(src->ptr);
+      // deregister memory
+      src->deregisterMem();
+      free(ptr);
+
+      delete msg;
+      counter++;
+      // Advance to next step on completion of sending ghosts to the 6 neighbors
+      if(counter == 6){
+        counter = 0;
+        thisProxy[thisIndex].nextStep();
+      }
+    }
+
+    // Constructor, initialize values
+    Stencil() {
+      usesAtSync = true;
+      counter = 0;
+
+      int i, j, k;
+      cb = CkCallback(CkIndex_Stencil::completedSendingGhost(NULL), thisProxy(thisIndex.x, thisIndex.y, thisIndex.z));
+      // allocate a three dimensional array
+      temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
+      new_temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
+
+      for(k=0; k<blockDimZ+2; ++k)
+        for(j=0; j<blockDimY+2; ++j)
+          for(i=0; i<blockDimX+2; ++i)
+            temperature[index(i, j, k)] = 0.0;
+
+      iterations = 0;
+      imsg = 0;
+      constrainBC();
+      // start measuring time
+      if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
+        startTime = CkWallTimer();
+
+#if CMK_LBDB_ON
+      // set period arbitrarily small so that LB occurs when AtSync is called
+      // this is in case the default LBPERIOD is larger than the time to complete LBPERIOD_ITER
+      // iterations
+      getLBDB()->SetLBPeriod(0);
+#endif
+    }
+
+    void pup(PUP::er &p)
+    {
+      p|startTime;
+      p|iterations;
+      p|imsg;
+
+      size_t size = (blockDimX+2) * (blockDimY+2) * (blockDimZ+2);
+      if (p.isUnpacking()) {
+        cb = CkCallback(CkIndex_Stencil::completedSendingGhost(NULL), thisProxy(thisIndex.x, thisIndex.y, thisIndex.z));
+        temperature = new double[size];
+        new_temperature = new double[size];
+        counter = 0;
+      }
+      p(temperature, size);
+      p(new_temperature, size);
+    }
+
+    Stencil(CkMigrateMessage* m) { }
+
+    ~Stencil() {
+      delete [] temperature;
+      delete [] new_temperature;
+    }
+
+    // Send ghost faces to the six neighbors
+    void begin_iteration(void) {
+      iterations++;
+
+      // Copy different faces into messages
+      double *leftGhost =  new double[blockDimY*blockDimZ];
+      double *rightGhost =  new double[blockDimY*blockDimZ];
+      double *topGhost =  new double[blockDimX*blockDimZ];
+      double *bottomGhost =  new double[blockDimX*blockDimZ];
+      double *frontGhost =  new double[blockDimX*blockDimY];
+      double *backGhost =  new double[blockDimX*blockDimY];
+
+      for(int k=0; k<blockDimZ; ++k)
+        for(int j=0; j<blockDimY; ++j) {
+          leftGhost[k*blockDimY+j] = temperature[index(1, j+1, k+1)];
+          rightGhost[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)];
+        }
+
+      for(int k=0; k<blockDimZ; ++k)
+        for(int i=0; i<blockDimX; ++i) {
+          topGhost[k*blockDimX+i] = temperature[index(i+1, 1, k+1)];
+          bottomGhost[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)];
+        }
+
+      for(int j=0; j<blockDimY; ++j)
+        for(int i=0; i<blockDimX; ++i) {
+          frontGhost[j*blockDimX+i] = temperature[index(i+1, j+1, 1)];
+          backGhost[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)];
+        }
+
+      // Send my left face
+      thisProxy(wrap_x(thisIndex.x-1), thisIndex.y, thisIndex.z)
+        .receiveGhosts(iterations, RIGHT, blockDimY, blockDimZ, CkSendBuffer(leftGhost, cb, CK_BUFFER_UNREG));
+      // Send my right face
+      thisProxy(wrap_x(thisIndex.x+1), thisIndex.y, thisIndex.z)
+        .receiveGhosts(iterations, LEFT, blockDimY, blockDimZ, CkSendBuffer(rightGhost, cb, CK_BUFFER_UNREG));
+      // Send my bottom face
+      thisProxy(thisIndex.x, wrap_y(thisIndex.y-1), thisIndex.z)
+        .receiveGhosts(iterations, TOP, blockDimX, blockDimZ, CkSendBuffer(bottomGhost, cb, CK_BUFFER_UNREG));
+      // Send my top face
+      thisProxy(thisIndex.x, wrap_y(thisIndex.y+1), thisIndex.z)
+        .receiveGhosts(iterations, BOTTOM, blockDimX, blockDimZ, CkSendBuffer(topGhost, cb, CK_BUFFER_UNREG));
+      // Send my front face
+      thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z-1))
+        .receiveGhosts(iterations, BACK, blockDimX, blockDimY, CkSendBuffer(frontGhost, cb, CK_BUFFER_UNREG));
+      // Send my back face
+      thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z+1))
+        .receiveGhosts(iterations, FRONT, blockDimX, blockDimY, CkSendBuffer(backGhost, cb, CK_BUFFER_UNREG));
+
+      // control flow continues in completedSendingGhost
+    }
+
+    void processGhosts(int dir, int height, int width, double gh[]) {
+      switch(dir) {
+        case LEFT:
+          for(int k=0; k<width; ++k)
+            for(int j=0; j<height; ++j) {
+              temperature[index(0, j+1, k+1)] = gh[k*height+j];
+            }
+          break;
+        case RIGHT:
+          for(int k=0; k<width; ++k)
+            for(int j=0; j<height; ++j) {
+              temperature[index(blockDimX+1, j+1, k+1)] = gh[k*height+j];
+            }
+          break;
+        case BOTTOM:
+          for(int k=0; k<width; ++k)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, 0, k+1)] = gh[k*height+i];
+            }
+          break;
+        case TOP:
+          for(int k=0; k<width; ++k)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, blockDimY+1, k+1)] = gh[k*height+i];
+            }
+          break;
+        case FRONT:
+          for(int j=0; j<width; ++j)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, j+1, 0)] = gh[j*height+i];
+            }
+          break;
+        case BACK:
+          for(int j=0; j<width; ++j)
+            for(int i=0; i<height; ++i) {
+              temperature[index(i+1, j+1, blockDimZ+1)] = gh[j*height+i];
+            }
+          break;
+        default:
+          CkAbort("ERROR\n");
+      }
+    }
+
+    void check_and_compute() {
+      compute_kernel();
+
+      // calculate error
+      // not being done right now since we are doing a fixed no. of iterations
+
+      double *tmp;
+      tmp = temperature;
+      temperature = new_temperature;
+      new_temperature = tmp;
+
+      constrainBC();
+
+      if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0) {
+        double endTime = CkWallTimer();
+        CkPrintf("[%d] Time per iteration: %f %f\n", iterations, (endTime - startTime), endTime);
+      }
+
+      if(iterations == MAX_ITER)
+        contribute(CkCallback(CkReductionTarget(Main, report), mainProxy));
+      else {
+        if(thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0)
+          startTime = CkWallTimer();
+        if(iterations % LBPERIOD_ITER == 0)
+        {
+          AtSync();
+        }
+        else {
+          contribute(CkCallback(CkReductionTarget(Stencil, doStep), thisProxy));
+        }
+      }
+    }
+
+    // Check to see if we have received all neighbor values yet
+    // If all neighbor values have been received, we update our values and proceed
+    void compute_kernel() {
+      int itno = (int)ceil((double)iterations/(double)CHANGELOAD) * 5;
+      int index = thisIndex.x + thisIndex.y*num_chare_x + thisIndex.z*num_chare_x*num_chare_y;
+      int numChares = num_chare_x * num_chare_y * num_chare_z;
+      double work = 100.0;
+
+      if(index >= numChares*0.2 && index <=numChares*0.8) {
+        work = work * ((double)index/(double)numChares) + (double)itno;
+      } else
+        work = 10.0;
+
+#pragma unroll
+      for(int w=0; w<work; w++) {
+        for(int k=1; k<blockDimZ+1; ++k)
+          for(int j=1; j<blockDimY+1; ++j)
+            for(int i=1; i<blockDimX+1; ++i) {
+              // update my value based on the surrounding values
+              new_temperature[index(i, j, k)] = (temperature[index(i-1, j, k)]
+                  +  temperature[index(i+1, j, k)]
+                  +  temperature[index(i, j-1, k)]
+                  +  temperature[index(i, j+1, k)]
+                  +  temperature[index(i, j, k-1)]
+                  +  temperature[index(i, j, k+1)]
+                  +  temperature[index(i, j, k)] )
+                *  DIVIDEBY7;
+            } // end for
+      }
+    }
+
+    // Enforce some boundary conditions
+    void constrainBC() {
+      // Heat left, top and front faces of each chare's block
+      for(int k=1; k<blockDimZ+1; ++k)
+        for(int i=1; i<blockDimX+1; ++i)
+          temperature[index(i, 1, k)] = 255.0;
+      for(int k=1; k<blockDimZ+1; ++k)
+        for(int j=1; j<blockDimY+1; ++j)
+          temperature[index(1, j, k)] = 255.0;
+      for(int j=1; j<blockDimY+1; ++j)
+        for(int i=1; i<blockDimX+1; ++i)
+          temperature[index(i, j, 1)] = 255.0;
+    }
+
+    void ResumeFromSync() {
+      doStep();
+    }
+};
+
+#include "stencil3d.def.h"
diff --git a/examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/stencil3d.ci b/examples/charm++/zerocopy/entry_method_api/unreg/stencil3d/stencil3d.ci
new file mode 100644 (file)
index 0000000..d93540e
--- /dev/null
@@ -0,0 +1,49 @@
+mainmodule stencil3d {
+
+  readonly CProxy_Main mainProxy;
+  readonly int arrayDimX;
+  readonly int arrayDimY;
+  readonly int arrayDimZ;
+  readonly int blockDimX;
+  readonly int blockDimY;
+  readonly int blockDimZ;
+
+  readonly int num_chare_x;
+  readonly int num_chare_y;
+  readonly int num_chare_z;
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry [reductiontarget] void report();
+  };
+
+  array [3D] Stencil {
+    entry Stencil(void);
+    entry void begin_iteration(void);
+
+    entry void completedSendingGhost(CkDataMsg *msg);
+    entry void nextStep(){
+      for(imsg = 0; imsg < 6; imsg++) {
+        // "iterations" keeps track of messages across steps
+        when receiveGhosts[iterations] (int iter, int dir, int height,
+            int width, nocopy double ghosts[height*width])
+          serial "process_ghosts" {
+            processGhosts(dir, height, width, ghosts);
+          }
+      }
+      serial "do_work" {
+        check_and_compute();
+      }
+    }
+
+    entry void receiveGhosts(int iter, int dir, int height, int width,
+        nocopy double ghosts[height*width]);
+
+    entry [reductiontarget] void doStep() {
+      serial "begin_iteration" {
+        begin_iteration();
+      }
+    };
+  };
+
+};