Zerocopy Direct API: Support for QD 61/4561/7
authorNitin Bhat <nbhat4@illinois.edu>
Tue, 11 Sep 2018 18:07:59 +0000 (13:07 -0500)
committerNitin Bhat <nbhat4@illinois.edu>
Tue, 2 Oct 2018 22:01:26 +0000 (17:01 -0500)
Change-Id: Idf647b4bcc69836ed071abf8d3886ccfd0ba0e9e

src/ck-core/ck.h
src/ck-core/ckrdma.C
tests/charm++/Makefile
tests/charm++/zerocopy/Makefile [new file with mode: 0644]
tests/charm++/zerocopy/zerocopy_with_qd/Makefile [new file with mode: 0644]
tests/charm++/zerocopy/zerocopy_with_qd/zerocopy_with_qd.C [new file with mode: 0644]
tests/charm++/zerocopy/zerocopy_with_qd/zerocopy_with_qd.ci [new file with mode: 0644]

index 5957cc4a7a89869ebaff7383deb1164d466bfc7a..aa22e4aafd366ef98b9999a175f6054a74c9a1d5 100644 (file)
@@ -246,4 +246,8 @@ extern void _createGroup(CkGroupID groupID, envelope *env);
 extern void _createNodeGroup(CkGroupID groupID, envelope *env);
 extern int _getGroupIdx(int,int,int);
 static inline IrrGroup *_lookupGroupAndBufferIfNotThere(const CkCoreState *ck, const envelope *env,const CkGroupID &groupID);
+
+
+void QdCreate(int n);
+void QdProcess(int n);
 #endif
index f6adfc6ac34577ea6c2a404687d892f1f4b1d1f6..35a2c45390070bb5c03ab8004fe8889b0c2919f0 100644 (file)
@@ -3,6 +3,7 @@
  */
 
 #include "charm++.h"
+#include "ck.h"
 #include "converse.h"
 #include "cmirdmautils.h"
 
@@ -322,6 +323,9 @@ int getRdmaBufSize(envelope *env){
 // Ack handler function which invokes the callback
 void CkRdmaDirectAckHandler(void *ack) {
 
+  // Process QD to mark completion of the outstanding RDMA operation
+  QdProcess(1);
+
   NcpyOperationInfo *info = (NcpyOperationInfo *)(ack);
 
   CkCallback *srcCb = (CkCallback *)(info->srcAck);
@@ -496,7 +500,22 @@ void CkNcpyBuffer::get(CkNcpyBuffer &source){
 
 #endif
   } else if (transferMode == ncpyTransferMode::RDMA) {
+
+    int outstandingRdmaOps = 1; // used by true-RDMA layers
+
+#if CMK_ONESIDED_IMPL
+#if CMK_CONVERSE_MPI
+    outstandingRdmaOps += 1; // MPI layer invokes CmiDirectAckHandler twice as sender and receiver post separately
+#endif
+#else
+    outstandingRdmaOps += 1; // non-RDMA layers invoke CmiDirectAckHandler twice using regular messages
+#endif
+
+    // Create QD to ensure that outstanding rdmaGet call is accounted for
+    QdCreate(outstandingRdmaOps);
+
     rdmaGet(source);
+
   } else {
     CkAbort("Invalid ncpyTransferMode");
   }
@@ -601,7 +620,22 @@ void CkNcpyBuffer::put(CkNcpyBuffer &destination){
 
 #endif
   } else if (transferMode == ncpyTransferMode::RDMA) {
+
+    int outstandingRdmaOps = 1; // used by true-RDMA layers
+
+#if CMK_ONESIDED_IMPL
+#if CMK_CONVERSE_MPI
+    outstandingRdmaOps += 1; // MPI layer invokes CmiDirectAckHandler twice as sender and receiver post separately
+#endif
+#else
+    outstandingRdmaOps += 1; // non-RDMA layers invoke CmiDirectAckHandler twice using regular messages
+#endif
+
+    // Create QD to ensure that outstanding rdmaGet call is accounted for
+    QdCreate(outstandingRdmaOps);
+
     rdmaPut(destination);
+
   } else {
     CkAbort("Invalid ncpyTransferMode");
   }
index ae543f1e1fc908dc23c4ac1ec87dc12f65b5c939..8a3b2392e43232dc2dbfced56055f11675bd1787 100644 (file)
@@ -27,6 +27,7 @@ DIRS = \
   charmxi_parsing \
   jacobi3d \
   jacobi3d-sdag \
+  zerocopy \
 
 # skip sdag, megatest and commtest
 BGDIRS = \
diff --git a/tests/charm++/zerocopy/Makefile b/tests/charm++/zerocopy/Makefile
new file mode 100644 (file)
index 0000000..0e9f740
--- /dev/null
@@ -0,0 +1,21 @@
+DIRS = \
+  zerocopy_with_qd \
+
+TESTDIRS = $(DIRS)
+
+all: $(foreach i,$(DIRS),build-$i)
+
+test: $(foreach i,$(TESTDIRS),test-$i)
+
+clean: $(foreach i,$(DIRS),clean-$i)
+       rm -f TAGS #*#
+       rm -f core *~
+
+$(foreach i,$(DIRS),build-$i):
+       $(MAKE) -C $(subst build-,,$@) all OPTS='$(OPTS)'
+
+$(foreach i,$(DIRS),test-$i):
+       $(MAKE) -C $(subst test-,,$@) test OPTS='$(OPTS)' TESTOPTS='$(TESTOPTS)'
+
+$(foreach i,$(DIRS),clean-$i):
+       $(MAKE) -C $(subst clean-,,$@) clean OPTS='$(OPTS)'
diff --git a/tests/charm++/zerocopy/zerocopy_with_qd/Makefile b/tests/charm++/zerocopy/zerocopy_with_qd/Makefile
new file mode 100644 (file)
index 0000000..3aff046
--- /dev/null
@@ -0,0 +1,25 @@
+-include ../../../common.mk
+CHARMDIR = ../../../..
+CHARMC = $(CHARMDIR)/bin/charmc $(OPTS)
+
+all: zerocopy_with_qd
+
+OBJS = zerocopy_with_qd.o
+
+zerocopy_with_qd: $(OBJS)
+       $(CHARMC) -language charm++ -o zerocopy_with_qd $(OBJS)
+
+cifiles: zerocopy_with_qd.ci
+       $(CHARMC)  zerocopy_with_qd.ci
+       touch cifiles
+
+zerocopy_with_qd.o: zerocopy_with_qd.C cifiles
+       $(CHARMC) -c zerocopy_with_qd.C
+
+test: all
+       $(call run, +p1 ./zerocopy_with_qd 60)
+       $(call run, +p4 ./zerocopy_with_qd 100)
+
+clean:
+       rm -f *.decl.h *.def.h *.o
+       rm -f zerocopy_with_qd charmrun cifiles
diff --git a/tests/charm++/zerocopy/zerocopy_with_qd/zerocopy_with_qd.C b/tests/charm++/zerocopy/zerocopy_with_qd/zerocopy_with_qd.C
new file mode 100644 (file)
index 0000000..ea1d0e5
--- /dev/null
@@ -0,0 +1,96 @@
+#include "zerocopy_with_qd.decl.h"
+
+#define DEBUG(x) //x
+
+int numElements;
+CProxy_Main mProxy;
+
+class Main : public CBase_Main {
+  int srcCompletedCounter;
+  int destCompletedCounter;
+  public:
+    Main(CkArgMsg *m) {
+      if(m->argc !=2 ) {
+        CkAbort("Usage: ./zerocopy_with_qd <array size>, where <array size> is even\n");
+      }
+      numElements = atoi(m->argv[1]);
+      if(numElements % 2 != 0) {
+        CkAbort("<array size> argument is not even\n");
+      }
+      delete m;
+
+      srcCompletedCounter = 0;
+      destCompletedCounter = 0;
+      mProxy = thisProxy;
+
+      CProxy_testArr arr1 = CProxy_testArr::ckNew(numElements);
+      CkStartQD(CkCallback(CkIndex_Main::qdReached(), mProxy));
+    };
+
+    void qdReached() {
+      CkPrintf("[%d][%d][%d] Quiescence has been reached srcCompleted:%d, destCompleted:%d, 3(numElements/2)=%d\n", CmiMyPe(), CmiMyNode(), CmiMyRank(), srcCompletedCounter, destCompletedCounter, 3*numElements/2);
+      CkAssert(srcCompletedCounter == destCompletedCounter);
+      CkAssert(srcCompletedCounter == 3*numElements/2);
+      CkExit();
+    }
+
+    void zcSrcCompleted(CkDataMsg *m) {
+      srcCompletedCounter++;
+      DEBUG(CkPrintf("[%d][%d][%d] srcCompleted:%d, completed:%d\n", CmiMyPe(), CmiMyNode(), CmiMyRank(), srcCompletedCounter, CkGetRefNum(m));)
+    }
+
+    void zcDestCompleted(CkDataMsg *m) {
+      destCompletedCounter++;
+      DEBUG(CkPrintf("[%d][%d][%d] destCompleted:%d, completed:%d\n", CmiMyPe(), CmiMyNode(), CmiMyRank(), destCompletedCounter, CkGetRefNum(m));)
+    }
+};
+
+class testArr : public CBase_testArr {
+  int destIndex, size1, size2, size3;
+  char *buff1, *buff2, *buff3;
+  public:
+    testArr() {
+      DEBUG(CkPrintf("[%d][%d][%d] testArr element create %d \n", CmiMyPe(), CmiMyNode(), CmiMyRank(), thisIndex);)
+      destIndex = numElements - 1 - thisIndex;
+
+      size1 = 2001;
+      size2 = 67;
+      size3 = 4578;
+
+      buff1 = new char[size1];
+      buff2 = new char[size2];
+      buff3 = new char[size3];
+
+      if(thisIndex < numElements/2) {
+        CkCallback srcCompletionCb(CkIndex_Main::zcSrcCompleted(NULL),
+                                   mProxy);
+        // Create CkNcpyBuffer objects  and send it to the other side
+        srcCompletionCb.setRefNum(thisIndex);
+        CkNcpyBuffer src1(buff1, size1, srcCompletionCb);
+        CkNcpyBuffer src2(buff2, size2, srcCompletionCb);
+        CkNcpyBuffer src3(buff3, size3, srcCompletionCb);
+
+        thisProxy[destIndex].recvBufferInfo(src1, src2, src3);
+      }
+    }
+
+    // executed on half of the array elements
+    void recvBufferInfo(CkNcpyBuffer src1, CkNcpyBuffer src2, CkNcpyBuffer src3) {
+      // Create CkNcpyBuffer objects to serve as destinations and perform get on the data
+        CkCallback destCompletionCb(CkIndex_Main::zcDestCompleted(NULL),
+                                   mProxy);
+        destCompletionCb.setRefNum(thisIndex);
+        // Create CkNcpyBuffer objects  and send it to the other side
+        CkNcpyBuffer dest1(buff1, size1, destCompletionCb);
+        dest1.get(src1);
+
+        CkNcpyBuffer dest2(buff2, size2, destCompletionCb);
+        dest2.get(src2);
+
+        CkNcpyBuffer dest3(buff3, size3, destCompletionCb);
+        dest3.get(src3);
+
+        DEBUG(CkPrintf("[%d][%d][%d] Completed launching Gets %d\n", CmiMyPe(), CmiMyNode(), CmiMyRank(), thisIndex);)
+    }
+};
+#include "zerocopy_with_qd.def.h"
diff --git a/tests/charm++/zerocopy/zerocopy_with_qd/zerocopy_with_qd.ci b/tests/charm++/zerocopy/zerocopy_with_qd/zerocopy_with_qd.ci
new file mode 100644 (file)
index 0000000..f2bdbc6
--- /dev/null
@@ -0,0 +1,17 @@
+mainmodule zerocopy_with_qd {
+
+  readonly int numElements;
+  readonly CProxy_Main mProxy;
+
+  mainchare Main{
+    entry Main(CkArgMsg *m);
+    entry void qdReached();
+    entry void zcSrcCompleted(CkDataMsg *m);
+    entry void zcDestCompleted(CkDataMsg *m);
+  };
+
+  array [1D] testArr{
+    entry testArr();
+    entry void recvBufferInfo(CkNcpyBuffer src1, CkNcpyBuffer src2, CkNcpyBuffer src3);
+  };
+};