Feature #1952: Pass CkNcpyBuffer instead of CkNcpyAck in the ZC API callback 99/4399/14
authorNitin Bhat <nbhat4@illinois.edu>
Fri, 27 Jul 2018 21:33:58 +0000 (17:33 -0400)
committerNitin Bhat <nbhat4@illinois.edu>
Tue, 7 Aug 2018 20:09:59 +0000 (15:09 -0500)
Change-Id: I67ca7ba69ca2adc7bc2038af3e9f0c4a907dd588

18 files changed:
doc/charm++/zerocopyapi.tex
examples/charm++/zerocopy/direct_api/unreg/simple_get/simple_get.C
src/arch/gni/machine-onesided.c
src/arch/gni/machine-onesided.h
src/arch/gni/machine.C
src/arch/mpi/machine-onesided.c
src/arch/mpi/machine.C
src/arch/ofi/machine-onesided.c
src/arch/pamilrts/machine-onesided.c
src/arch/util/machine-rdma.h
src/arch/verbs/machine-ibverbs.c
src/arch/verbs/machine-onesided.c
src/ck-core/ckrdma.C
src/ck-core/ckrdma.h
src/conv-core/conv-rdma.c
src/conv-core/conv-rdma.h
src/util/cmirdmautils.c
src/util/cmirdmautils.h

index d4630b67e015a4612238d772bb7df8c2d4acec97..800afe0baf5f13aba7beb424b5311c9031b9b48c 100644 (file)
@@ -147,19 +147,23 @@ void destinationDone() \{
 \end{alltt}
 
 The callback methods can also take a pointer to a \texttt{CkDataMsg} message. This message can be
-used to access the original pointer passed into the buffer and another optional reference pointer that was
-initially set for the \kw{CkNcpyBuffer} object using the method \texttt{setRef}. The following code snippet
-illustrates the accessing of the original buffer pointer in the callback method by casting the \texttt{data}
-field of the \texttt{CkDataMsg} object into a \texttt{CkNcpyAck} object.
+used to access the original buffer information object i.e. the \texttt{CkNcpyBuffer} objects used for the
+zero copy transfer. The buffer information object available in the callback allows access to all its
+information including the buffer pointer and the arbitrary reference pointer set using the method
+\texttt{setRef}. It is important to note that only the source \texttt{CkNcpyBuffer} object is accessible
+using the \texttt{CkDataMsg} in the source callback and similarly, the destination \texttt{CkNcpyBuffer} object
+is accessible using the \texttt{CkDataMsg} in the destination callback.
+The following code snippet illustrates the accessing of the original buffer pointer in the callback
+method by casting the \texttt{data} field of the \texttt{CkDataMsg} object into a \texttt{CkNcpyBuffer} object.
 
 \begin{alltt}
 // Invoked by the runtime on source (Index 0)
 void sourceDone(CkDataMsg *msg) \{
-    // Cast msg->data to a CkNcpyAck
-    CkNcpyAck *ack = (CkNcpyAck *)(msg->data);
+    // Cast msg->data to a CkNcpyBuffer to get the source buffer information object
+    CkNcpyBuffer *source = (CkNcpyBuffer *)(msg->data);
 
     // access buffer pointer and free it
-    free(ack->ptr);
+    free(source->ptr);
 \}
 \end{alltt}
 
@@ -180,14 +184,14 @@ void sourceDone(CkDataMsg *msg) \{
     // update the buffer to the next pointer
     updateBuffer();
 
-    // Cast msg->data to a CkNcpyAck
-    CkNcpyAck *ack = (CkNcpyAck *)(msg->data);
+    // Cast msg->data to a CkNcpyBuffer
+    CkNcpyBuffer *src = (CkNcpyBuffer *)(msg->data);
 
     // access buffer pointer and free it
-    free(ack->ptr);
+    free(src->ptr);
 
     // get reference pointer
-    const void *refPtr = ack->ref;
+    const void *refPtr = src->ref;
 \}
 \end{alltt}
 
index c5562e3cf1ec7a568275e207f5c323f7d8c034e7..4e335eeb72d9c82fe9ab5e5e0823ee86625140b7 100644 (file)
@@ -124,9 +124,9 @@ public:
     CkAssert(thisIndex == 0);
     cbCounter++;
 
-    // Cast m->data as (CkNcpyAck *)
-    CkNcpyAck *ack = (CkNcpyAck *)(m->data);
-    ACK_DEBUG(("[%d][%d][%d] In source callback : Buffer Ptr: %p, Reference Ptr: %p and refnum is %d\n", thisIndex, CkMyPe(), CkMyNode(), ack->ptr, ack->ref, CkGetRefNum(m)));
+    // Cast m->data as (CkNcpyBuffer *)
+    CkNcpyBuffer *src = (CkNcpyBuffer *)(m->data);
+    ACK_DEBUG(("[%d][%d][%d] In source callback : Buffer Ptr: %p, Reference Ptr: %p and refnum is %d\n", thisIndex, CkMyPe(), CkMyNode(), src->ptr, src->ref, CkGetRefNum(m)));
 
     int refNum = CkGetRefNum(m);
 
@@ -139,10 +139,10 @@ public:
     }
 
     // Verify that source pointer is equal to the buffer pointer returned
-    CkAssert(srcPointer == ack->ptr);
+    CkAssert(srcPointer == src->ptr);
 
     // Verify that reference pointer is equal to the reference pointer returned
-    CkAssert(&valCounter == ack->ref);
+    CkAssert(&valCounter == src->ref);
 
     if(cbCounter == 3) {
       // Release Resources for my sources
@@ -160,9 +160,9 @@ public:
     CkAssert(thisIndex == 1);
     cbCounter++;
 
-    // Cast m->data as (CkNcpyAck *)
-    CkNcpyAck *ack = (CkNcpyAck *)(m->data);
-    ACK_DEBUG(("[%d][%d][%d] In destination callback : Buffer Ptr: %p, Reference Ptr: %p and refnum is %d\n", thisIndex, CkMyPe(), CkMyNode(), ack->ptr, ack->ref, CkGetRefNum(m)));
+    // Cast m->data as (CkNcpyBuffer *)
+    CkNcpyBuffer *dest = (CkNcpyBuffer *)(m->data);
+    ACK_DEBUG(("[%d][%d][%d] In destination callback : Buffer Ptr: %p, Reference Ptr: %p and refnum is %d\n", thisIndex, CkMyPe(), CkMyNode(), dest->ptr, dest->ref, CkGetRefNum(m)));
 
     int refNum = CkGetRefNum(m);
 
@@ -175,12 +175,10 @@ public:
     }
 
     // Verify that destination pointer is equal to the buffer pointer returned
-    CkAssert(destPointer == ack->ptr);
+    CkAssert(destPointer == dest->ptr);
 
     // Verify that reference pointer is equal to the reference pointer returned
-    CkAssert(&valCounter == ack->ref);
-
-
+    CkAssert(&valCounter == dest->ref);
 
     if(cbCounter == 3) {
       // Release Resources for my destinations
index faf2ae4ce2b6ee20b19ee859a0cbf6e879c22069..0110b0435dc5e5b6e69c5ae6452d553d2f35fdbd 100644 (file)
@@ -351,25 +351,9 @@ void LrtsSetRdmaBufferInfo(void *info, const void *ptr, int size, unsigned short
 }
 
 // Perform an RDMA Get call into the local destination address from the remote source address
-void LrtsIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
-
-  // Register local buffer if it is not registered
-  if(*destMode == CMK_BUFFER_UNREG) {
-    ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->mem_hndl =
-                                      registerDirectMem(ncpyOpInfo->destPtr,
-                                                        ncpyOpInfo->size,
-                                                        GNI_MEM_READWRITE);
-    *destMode = CMK_BUFFER_REG;
-
-    // set mem_hndl in the origDestLayerInfoPtr
-    ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->origDestLayerInfoPtr))->mem_hndl =
-                            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->mem_hndl;
-  }
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfo) {
 
-  if(*srcMode == CMK_BUFFER_UNREG) {
+  if(ncpyOpInfo->srcMode == CMK_BUFFER_UNREG) {
     // Remote buffer is unregistered, send a message to register it and perform PUT
 
 #if CMK_SMP
@@ -398,7 +382,7 @@ void LrtsIssueRget(
 
     uint64_t src_addr = (uint64_t)(ncpyOpInfo->srcPtr);
     uint64_t dest_addr = (uint64_t)(ncpyOpInfo->destPtr);
-    uint64_t length    = (uint64_t)(ncpyOpInfo->size);
+    uint64_t length    = (uint64_t)(ncpyOpInfo->srcSize);
 
     //check alignment as Rget in GNI requires 4 byte alignment for src_addr, dest_adder and size
     if(((src_addr % 4)==0) && ((dest_addr % 4)==0) && ((length % 4)==0)) {
@@ -410,10 +394,10 @@ void LrtsIssueRget(
       // perform GET directly
       gni_return_t status = post_rdma(
                             src_addr,
-                            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->mem_hndl,
+                            ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
                             dest_addr,
-                            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->mem_hndl,
-                            ncpyOpInfo->size,
+                            ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+                            ncpyOpInfo->srcSize,
                             (uint64_t)ncpyOpInfo,
                             CmiNodeOf(ncpyOpInfo->srcPe),
                             GNI_POST_RDMA_GET,
@@ -440,25 +424,8 @@ void LrtsIssueRget(
 }
 
 // Perform an RDMA Put call into the remote destination address from the local source address
-void LrtsIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
-
-  // Register local buffer if it is not registered
-  if(*srcMode == CMK_BUFFER_UNREG) {
-    ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->mem_hndl =
-                                      registerDirectMem(ncpyOpInfo->srcPtr,
-                                                        ncpyOpInfo->size,
-                                                        GNI_MEM_READ_ONLY);
-    *srcMode = CMK_BUFFER_REG;
-
-    // set mem_hndl in the origSrcLayerInfoPtr
-    ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->origSrcLayerInfoPtr))->mem_hndl =
-                            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->mem_hndl;
-  }
-
-  if(*destMode == CMK_BUFFER_UNREG) {
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfo) {
+  if(ncpyOpInfo->destMode == CMK_BUFFER_UNREG) {
     // Remote buffer is unregistered, send a message to register it and perform GET
 
     // send all the data to the source to register and perform a get
@@ -495,10 +462,10 @@ void LrtsIssueRput(
 #else // nonsmp mode
     // perform PUT directly
     gni_return_t status = post_rdma(dest_addr,
-                          ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->mem_hndl,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
                           src_addr,
-                          ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->mem_hndl,
-                          ncpyOpInfo->size,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+                          ncpyOpInfo->srcSize,
                           (uint64_t)ncpyOpInfo,
                           CmiNodeOf(ncpyOpInfo->destPe),
                           GNI_POST_RDMA_PUT,
@@ -537,10 +504,10 @@ void LrtsDeregisterMem(const void *ptr, void *info, int pe, unsigned short int m
 void _performOneRgetForWorkerThread(MSG_LIST *ptr) {
   NcpyOperationInfo *ncpyOpInfo = (NcpyOperationInfo *)(ptr->msg);
   post_rdma((uint64_t)ncpyOpInfo->srcPtr,
-            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->mem_hndl,
+            ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
             (uint64_t)ncpyOpInfo->destPtr,
-            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->mem_hndl,
-            ncpyOpInfo->size,
+            ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+            ncpyOpInfo->srcSize,
             (uint64_t)ncpyOpInfo,
             ptr->destNode,
             GNI_POST_RDMA_GET,
@@ -551,10 +518,10 @@ void _performOneRgetForWorkerThread(MSG_LIST *ptr) {
 void _performOneRputForWorkerThread(MSG_LIST *ptr) {
   NcpyOperationInfo *ncpyOpInfo = (NcpyOperationInfo *)(ptr->msg);
   post_rdma((uint64_t)ncpyOpInfo->destPtr,
-            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->mem_hndl,
+            ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
             (uint64_t)ncpyOpInfo->srcPtr,
-            ((CmiGNIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->mem_hndl,
-            ncpyOpInfo->size,
+            ((CmiGNIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+            ncpyOpInfo->srcSize,
             (uint64_t)ncpyOpInfo,
             ptr->destNode,
             GNI_POST_RDMA_PUT,
index 9e40b041af65374ec5d9d06230a288b2f4890b28..07b146b43a22ccf904edba1389f4ffce3bec1e9e 100644 (file)
@@ -183,36 +183,10 @@ typedef struct _cmi_gni_rzv_rdma_reverse_op {
 void LrtsSetRdmaBufferInfo(void *info, const void *ptr, int size, unsigned short int mode);
 
 // Perform an RDMA Get call into the local destination address from the remote source address
-void LrtsIssueRget(
-  const void* srcAddr,
-  void *srcInfo,
-  void *srcAck,
-  int srcAckSize,
-  int srcPe,
-  unsigned short int *srcMode,
-  const void* destAddr,
-  void *destInfo,
-  void *destAck,
-  int destAckSize,
-  int destPe,
-  unsigned short int *destMode,
-  int size);
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfo);
 
 // Perform an RDMA Put call into the remote destination address from the local source address
-void LrtsIssueRput(
-  const void* destAddr,
-  void *destInfo,
-  void *destAck,
-  int destAckSize,
-  int destPe,
-  unsigned short int *destMode,
-  const void* srcAddr,
-  void *srcInfo,
-  void *srcAck,
-  int srcAckSize,
-  int srcPe,
-  unsigned short int *srcMode,
-  int size);
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfo);
 
 // Method performs RDMA operations
 gni_return_t post_rdma(
index 335d53091c14f6a4d81fb805e54b35567b839e76..a027a343b492dc48c844a09a3c37776a214f91d7 100644 (file)
@@ -2366,10 +2366,10 @@ static void PumpNetworkSmsg()
                 CMI_GNI_UNLOCK(smsg_mailbox_lock)
 
                 post_rdma((uint64_t)newNcpyOpInfo->destPtr,
-                          ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->destLayerInfo))->mem_hndl,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
                           (uint64_t)newNcpyOpInfo->srcPtr,
-                          ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->srcLayerInfo))->mem_hndl,
-                          newNcpyOpInfo->size,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+                          newNcpyOpInfo->srcSize,
                           (uint64_t)newNcpyOpInfo,
                           CmiNodeOf(newNcpyOpInfo->destPe),
                           GNI_POST_RDMA_PUT,
@@ -2444,16 +2444,16 @@ static void PumpNetworkSmsg()
                 resetNcpyOpInfoPointers(newNcpyOpInfo);
 
                 // Register source buffer
-                ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->srcLayerInfo))->mem_hndl =
-                                              registerDirectMem(newNcpyOpInfo->srcPtr,
-                                                                newNcpyOpInfo->size,
+                ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl = 
+                                            registerDirectMem(newNcpyOpInfo->srcPtr,
+                                                                newNcpyOpInfo->srcSize,
                                                                 GNI_MEM_READ_ONLY);
 
                 post_rdma((uint64_t)newNcpyOpInfo->destPtr,
-                          ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->destLayerInfo))->mem_hndl,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
                           (uint64_t)newNcpyOpInfo->srcPtr,
-                          ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->srcLayerInfo))->mem_hndl,
-                          newNcpyOpInfo->size,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+                          newNcpyOpInfo->srcSize,
                           (uint64_t)newNcpyOpInfo,
                           CmiNodeOf(newNcpyOpInfo->destPe),
                           GNI_POST_RDMA_PUT,
@@ -2474,16 +2474,16 @@ static void PumpNetworkSmsg()
 
                 resetNcpyOpInfoPointers(newNcpyOpInfo);
 
-                ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->destLayerInfo))->mem_hndl =
+                ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl =
                                               registerDirectMem(newNcpyOpInfo->destPtr,
-                                                                newNcpyOpInfo->size,
+                                                                newNcpyOpInfo->srcSize,
                                                                 GNI_MEM_READWRITE);
 
                 post_rdma((uint64_t)newNcpyOpInfo->srcPtr,
-                          ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->srcLayerInfo))->mem_hndl,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
                           (uint64_t)newNcpyOpInfo->destPtr,
-                          ((CmiGNIRzvRdmaPtr_t *)(newNcpyOpInfo->destLayerInfo))->mem_hndl,
-                          newNcpyOpInfo->size,
+                          ((CmiGNIRzvRdmaPtr_t *)((char *)(newNcpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->mem_hndl,
+                          newNcpyOpInfo->srcSize,
                           (uint64_t)newNcpyOpInfo,
                           CmiNodeOf(newNcpyOpInfo->srcPe),
                           GNI_POST_RDMA_GET,
index 22c2ff865bf63c518b7b391fae75866fd51f4707..e5dc48970863d8fbef9909c3ddfb65a0b8414939 100644 (file)
@@ -81,10 +81,7 @@ void MPIPostOneBuffer(const void *buffer, void *ref, int size, int pe, int tag,
 /* Support for Nocopy Direct API */
 
 // Perform an RDMA Get call into the local destination address from the remote source address
-void LrtsIssueRget(
-  NcpyOperationInfo *ncpyOpInfoMsg,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfoMsg) {
 
   // Generate a new tag
   int tag = getNewMPITag();
@@ -115,14 +112,11 @@ void LrtsIssueRget(
 
   // Post an MPI_Irecv for the destination buffer with the tag
   // ONESIDED_BUFFER_DIRECT_RECV indicates that the method should post an irecv
-  MPIPostOneBuffer(ncpyOpInfoMsg->destPtr, ncpyOpInfoMsg, ncpyOpInfoMsg->size, ncpyOpInfoMsg->srcPe, tag, ONESIDED_BUFFER_DIRECT_RECV);
+  MPIPostOneBuffer(ncpyOpInfoMsg->destPtr, ncpyOpInfoMsg, ncpyOpInfoMsg->srcSize, ncpyOpInfoMsg->srcPe, tag, ONESIDED_BUFFER_DIRECT_RECV);
 }
 
 // Perform an RDMA Put call into the remote destination address from the local source address
-void LrtsIssueRput(
-  NcpyOperationInfo *ncpyOpInfoMsg,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfoMsg) {
 
   // Generate a new tag
   int tag = getNewMPITag();
@@ -152,7 +146,7 @@ void LrtsIssueRput(
 
   // Post an MPI_ISend for the source buffer with the tag
   // ONESIDED_BUFFER_DIRECT_SEND indicates that the method should post an isend
-  MPIPostOneBuffer(ncpyOpInfoMsg->srcPtr, ncpyOpInfoMsg, ncpyOpInfoMsg->size, ncpyOpInfoMsg->destPe, tag, ONESIDED_BUFFER_DIRECT_SEND);
+  MPIPostOneBuffer(ncpyOpInfoMsg->srcPtr, ncpyOpInfoMsg, ncpyOpInfoMsg->srcSize, ncpyOpInfoMsg->destPe, tag, ONESIDED_BUFFER_DIRECT_SEND);
 }
 
 // Method invoked to deregister source memory (Empty method to maintain API consistency)
index 9a17fdc78f637d18dd3f76916fcf338094f7669f..a2e927d582cd11f72fba1d5c80a2f76ade384113 100644 (file)
@@ -886,7 +886,7 @@ static int PumpMsgs(void) {
 
               MPIPostOneBuffer(myBuffer,
                                ncpyOpInfoMsg,
-                               ncpyOpInfoMsg->size,
+                               ncpyOpInfoMsg->srcSize,
                                otherPe,
                                ncpyOpInfoMsg->tag,
                                postMsgType);
index 033d6d31fb64d2203bce6d3fc36ab68e3ea24ba8..462944bf261da67770a0e80c5b75afbcafe16761 100644 (file)
@@ -233,7 +233,7 @@ void process_onesided_reg_and_put(struct fi_cq_tagged_entry *e, OFIRequest *req)
   // Do not free as this message
   ncpyOpInfo->freeMe = 0;
 
-  struct fid_mr *mr = registerDirectMemory(ncpyOpInfo->srcPtr, ncpyOpInfo->size);
+  struct fid_mr *mr = registerDirectMemory(ncpyOpInfo->srcPtr, ncpyOpInfo->srcSize);
   const char *rbuf  = (FI_MR_SCALABLE == context.mr_mode) ? 0 : (const char*)(ncpyOpInfo->destPtr);
 
   // Allocate a completion object for tracking write completion and ack handling
@@ -245,8 +245,8 @@ void process_onesided_reg_and_put(struct fi_cq_tagged_entry *e, OFIRequest *req)
       (char*)(ncpyOpInfo->srcPtr),
       rbuf,
       CmiNodeOf(ncpyOpInfo->destPe),
-      ((CmiOfiRdmaPtr_t *)(ncpyOpInfo->destLayerInfo))->key,
-      ncpyOpInfo->size,
+      ((CmiOfiRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->key,
+      ncpyOpInfo->srcSize,
       mr,
       ofi_onesided_direct_operation_callback,
       (void *)rdmaComp,
@@ -264,7 +264,7 @@ void process_onesided_reg_and_get(struct fi_cq_tagged_entry *e, OFIRequest *req)
   NcpyOperationInfo *ncpyOpInfo = (NcpyOperationInfo *)(data);
   resetNcpyOpInfoPointers(ncpyOpInfo);
 
-  struct fid_mr *mr = registerDirectMemory(ncpyOpInfo->destPtr, ncpyOpInfo->size);
+  struct fid_mr *mr = registerDirectMemory(ncpyOpInfo->destPtr, ncpyOpInfo->srcSize);
   const char *rbuf  = (FI_MR_SCALABLE == context.mr_mode) ? 0 : (const char*)(ncpyOpInfo->srcPtr);
 
   // Allocate a completion object for tracking write completion and ack handling
@@ -276,8 +276,8 @@ void process_onesided_reg_and_get(struct fi_cq_tagged_entry *e, OFIRequest *req)
       (char*)(ncpyOpInfo->destPtr),
       rbuf,
       CmiNodeOf(ncpyOpInfo->srcPe),
-      ((CmiOfiRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo))->key,
-      ncpyOpInfo->size,
+      ((CmiOfiRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->key,
+      ncpyOpInfo->srcSize,
       mr,
       ofi_onesided_direct_operation_callback,
       (void *)rdmaComp,
@@ -287,26 +287,11 @@ void process_onesided_reg_and_get(struct fi_cq_tagged_entry *e, OFIRequest *req)
 
 
 // Perform an RDMA Get call into the local destination address from the remote source address
-void LrtsIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfo) {
 
   OFIRequest *req;
 
-  // Register local buffer if it is not registered
-  if(*destMode == CMK_BUFFER_UNREG) {
-    CmiOfiRdmaPtr_t *dest_info = (CmiOfiRdmaPtr_t *)(ncpyOpInfo->destLayerInfo);
-    dest_info->mr = registerDirectMemory(ncpyOpInfo->destPtr, ncpyOpInfo->size);
-    dest_info->key = fi_mr_key(dest_info->mr);
-    *destMode = CMK_BUFFER_REG;
-
-    // set registration info in the origDestLayerInfoPtr
-    ((CmiOfiRdmaPtr_t *)(ncpyOpInfo->origDestLayerInfoPtr))->mr = dest_info->mr;
-    ((CmiOfiRdmaPtr_t *)(ncpyOpInfo->origDestLayerInfoPtr))->key = dest_info->key;
-  }
-
-  if(*srcMode == CMK_BUFFER_UNREG) {
+  if(ncpyOpInfo->srcMode == CMK_BUFFER_UNREG) {
     // Remote buffer is unregistered, send a message to register it and perform PUT
 #if USE_OFIREQUEST_CACHE
     req = alloc_request(context.request_cache);
@@ -330,8 +315,8 @@ void LrtsIssueRget(
              req);
   } else {
 
-    CmiOfiRdmaPtr_t *dest_info = (CmiOfiRdmaPtr_t *)(ncpyOpInfo->destLayerInfo);
-    CmiOfiRdmaPtr_t *src_info = (CmiOfiRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
+    CmiOfiRdmaPtr_t *dest_info = (CmiOfiRdmaPtr_t *)((char *)ncpyOpInfo->destLayerInfo + CmiGetRdmaCommonInfoSize());
+    CmiOfiRdmaPtr_t *src_info = (CmiOfiRdmaPtr_t *)((char *)ncpyOpInfo->srcLayerInfo + CmiGetRdmaCommonInfoSize());
 
     const char *rbuf        = (FI_MR_SCALABLE == context.mr_mode) ? 0 : (const char*)(ncpyOpInfo->srcPtr);
 
@@ -345,7 +330,7 @@ void LrtsIssueRget(
         rbuf,
         CmiNodeOf(ncpyOpInfo->srcPe),
         src_info->key,
-        ncpyOpInfo->size,
+        ncpyOpInfo->srcSize,
         dest_info->mr,
         ofi_onesided_direct_operation_callback,
         (void *)rdmaComp,
@@ -355,26 +340,11 @@ void LrtsIssueRget(
 }
 
 // Perform an RDMA Put call into the remote destination address from the local source address
-void LrtsIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfo) {
 
   OFIRequest *req;
 
-  // Register local buffer if it is not registered
-  if(*srcMode == CMK_BUFFER_UNREG) {
-    CmiOfiRdmaPtr_t *src_info = (CmiOfiRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
-    src_info->mr = registerDirectMemory(ncpyOpInfo->srcPtr, ncpyOpInfo->size);
-    src_info->key = fi_mr_key(src_info->mr);
-    *srcMode = CMK_BUFFER_REG;
-
-    // set registration info in the origSrcLayerInfoPtr
-    ((CmiOfiRdmaPtr_t *)(ncpyOpInfo->origSrcLayerInfoPtr))->mr = src_info->mr;
-    ((CmiOfiRdmaPtr_t *)(ncpyOpInfo->origSrcLayerInfoPtr))->key = src_info->key;
-  }
-
-  if(*destMode == CMK_BUFFER_UNREG) {
+  if(ncpyOpInfo->destMode == CMK_BUFFER_UNREG) {
     // Remote buffer is unregistered, send a message to register it and perform PUT
 #if USE_OFIREQUEST_CACHE
     req = alloc_request(context.request_cache);
@@ -398,8 +368,8 @@ void LrtsIssueRput(
              req);
   } else {
 
-    CmiOfiRdmaPtr_t *dest_info = (CmiOfiRdmaPtr_t *)(ncpyOpInfo->destLayerInfo);
-    CmiOfiRdmaPtr_t *src_info = (CmiOfiRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
+    CmiOfiRdmaPtr_t *dest_info = (CmiOfiRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize());
+    CmiOfiRdmaPtr_t *src_info = (CmiOfiRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize());
 
     const char *rbuf        = (FI_MR_SCALABLE == context.mr_mode) ? 0 : (const char*)(ncpyOpInfo->destPtr);
 
@@ -413,7 +383,7 @@ void LrtsIssueRput(
         rbuf,
         CmiNodeOf(ncpyOpInfo->destPe),
         dest_info->key,
-        ncpyOpInfo->size,
+        ncpyOpInfo->srcSize,
         src_info->mr,
         ofi_onesided_direct_operation_callback,
         (void *)rdmaComp,
index af7c6dc4326a8c741a6f143bfc5698de8db97619..0704a41f38c013a165c34872e7aad009bfbf8738 100644 (file)
@@ -212,7 +212,7 @@ void ncpyOpInfo_recv_done(pami_context_t ctxt, void *clientdata, pami_result_t r
 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
   CpvAccess(uselock) = 0;
 #endif
-  LrtsIssueRget(ncpyOpInfo, NULL, NULL);
+  LrtsIssueRget(ncpyOpInfo);
 #if CMK_SMP && !CMK_ENABLE_ASYNC_PROGRESS
   CpvAccess(uselock) = 1;
 #endif
@@ -265,10 +265,7 @@ void rzv_rdma_direct_recv_done (pami_context_t     ctxt,
 }
 
 // Perform an RDMA Get call into the local destination address from the remote source address
-void LrtsIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfo) {
 
 #if CMK_SMP && CMK_ENABLE_ASYNC_PROGRESS
   int c = CmiMyNode() % cmi_pami_numcontexts;
@@ -283,7 +280,7 @@ void LrtsIssueRget(
   size_t dst_context = 0;
 #endif
 
-  CmiPAMIRzvRdmaPtr_t *src_Info = (CmiPAMIRzvRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
+  CmiPAMIRzvRdmaPtr_t *src_Info = (CmiPAMIRzvRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize());
 
   INCR_ORECVS();
 
@@ -291,14 +288,11 @@ void LrtsIssueRget(
   pami_endpoint_t origin;
   PAMI_Endpoint_create (cmi_pami_client, (pami_task_t)CmiNodeOf(ncpyOpInfo->srcPe), dst_context, &origin);
 
-  getData(my_context, origin, (void *)(ncpyOpInfo->destPtr), ncpyOpInfo, dst_context, rzv_rdma_direct_recv_done, src_Info->offset, &src_Info->mregion, ncpyOpInfo->size);
+  getData(my_context, origin, (void *)(ncpyOpInfo->destPtr), ncpyOpInfo, dst_context, rzv_rdma_direct_recv_done, src_Info->offset, &src_Info->mregion, ncpyOpInfo->srcSize);
 }
 
 // Perform an RDMA Put call into the remote destination address from the local source address
-void LrtsIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfo) {
 
   // Create end point for destination node
   pami_endpoint_t target;
index 5cd0a3f522062ec5ded979007c4beb473b5c73c4..49e5d5974d2ccdd792401fda155704266eecac06 100644 (file)
@@ -199,15 +199,9 @@ typedef struct _cmi_rdma_direct_ack {
 /* Support for Nocopy Direct API */
 void LrtsSetRdmaBufferInfo(void *info, const void *ptr, int size, unsigned short int mode);
 void LrtsSetRdmaNcpyAck(RdmaAckHandlerFn fn);
-void LrtsIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode);
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfo);
 
-void LrtsIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode);
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfo);
 
 void LrtsDeregisterMem(const void *ptr, void *info, int pe, unsigned short int mode);
 
@@ -226,23 +220,15 @@ void CmiSetRdmaNcpyAck(RdmaAckHandlerFn fn){
 }
 
 /* Perform an RDMA Get operation into the local destination address from the remote source address*/
-void CmiIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
-
+void CmiIssueRget(NcpyOperationInfo *ncpyOpInfo) {
   // Use network RDMA for a PE on a remote host
-  LrtsIssueRget(ncpyOpInfo, srcMode, destMode);
+  LrtsIssueRget(ncpyOpInfo);
 }
 
 /* Perform an RDMA Put operation into the remote destination address from the local source address */
-void CmiIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
-
+void CmiIssueRput(NcpyOperationInfo *ncpyOpInfo) {
   // Use network RDMA for a PE on a remote host
-  LrtsIssueRput(ncpyOpInfo, srcMode, destMode);
+  LrtsIssueRput(ncpyOpInfo);
 }
 
 /* De-register registered memory for pointer */
index 9d8553e85518761485135a37ab59cf801828850d..ad931b0feb23bd16885418702d877b50f5b6b9a9 100644 (file)
@@ -1737,7 +1737,7 @@ static inline void processRecvWC(struct ibv_wc *recvWC,const int toBuffer){
                
                resetNcpyOpInfoPointers(newNcpyOpInfo);
                
-               struct ibv_mr *mr = registerDirectMemory(newNcpyOpInfo->srcPtr, newNcpyOpInfo->size);
+               struct ibv_mr *mr = registerDirectMemory(newNcpyOpInfo->srcPtr, newNcpyOpInfo->srcSize);
                struct infiRdmaPacket *rdmaPacket = (struct infiRdmaPacket *)malloc(sizeof(struct infiRdmaPacket));
                rdmaPacket->type = INFI_ONESIDED_DIRECT;
                rdmaPacket->localBuffer = newNcpyOpInfo;
@@ -1745,8 +1745,8 @@ static inline void processRecvWC(struct ibv_wc *recvWC,const int toBuffer){
                postRdma((uint64_t)(newNcpyOpInfo->srcPtr),
                        mr->lkey,
                        (uint64_t)(newNcpyOpInfo->destPtr),
-            ((CmiVerbsRdmaPtr_t *)(newNcpyOpInfo->destLayerInfo))->key,
-                       newNcpyOpInfo->size,
+            ((CmiVerbsRdmaPtr_t *)((char *)(newNcpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize()))->key,
+                       newNcpyOpInfo->srcSize,
                        newNcpyOpInfo->destPe,
                        (uint64_t)rdmaPacket,
                        IBV_WR_RDMA_WRITE);
@@ -1760,7 +1760,7 @@ static inline void processRecvWC(struct ibv_wc *recvWC,const int toBuffer){
                
                resetNcpyOpInfoPointers(newNcpyOpInfo);
                
-               struct ibv_mr *mr = registerDirectMemory(newNcpyOpInfo->destPtr, newNcpyOpInfo->size);
+               struct ibv_mr *mr = registerDirectMemory(newNcpyOpInfo->destPtr, newNcpyOpInfo->srcSize);
                
                struct infiRdmaPacket *rdmaPacket = (struct infiRdmaPacket *)malloc(sizeof(struct infiRdmaPacket));
                rdmaPacket->type = INFI_ONESIDED_DIRECT;
@@ -1769,8 +1769,8 @@ static inline void processRecvWC(struct ibv_wc *recvWC,const int toBuffer){
                postRdma((uint64_t)newNcpyOpInfo->destPtr,
                        mr->lkey,
                        (uint64_t)newNcpyOpInfo->srcPtr,
-                       ((CmiVerbsRdmaPtr_t *)(newNcpyOpInfo->srcLayerInfo))->key,
-                       newNcpyOpInfo->size,
+                       ((CmiVerbsRdmaPtr_t *)((char *)(newNcpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize()))->key,
+                       newNcpyOpInfo->srcSize,
                        newNcpyOpInfo->srcPe,
                        (uint64_t)rdmaPacket,
                        IBV_WR_RDMA_READ);
index 040a13c61e1a3a909b150c7ea30bf99038ce271f..3620fb949bb72a78fc541b807b80c230c563cbfa 100644 (file)
@@ -169,24 +169,9 @@ struct ibv_mr* registerDirectMemory(const void *addr, int size) {
 }
 
 // Perform an RDMA Get call into the local destination address from the remote source address
-void LrtsIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
-
-  // Register local buffer if it is not registered
-  if(*destMode == CMK_BUFFER_UNREG) {
-    CmiVerbsRdmaPtr_t *dest_info = (CmiVerbsRdmaPtr_t *)(ncpyOpInfo->destLayerInfo);
-    dest_info->mr = registerDirectMemory(ncpyOpInfo->destPtr, ncpyOpInfo->size);
-    dest_info->key = dest_info->mr->rkey;
-    *destMode = CMK_BUFFER_REG;
-
-    // set registration info in the origDestLayerInfoPtr
-    ((CmiVerbsRdmaPtr_t *)(ncpyOpInfo->origDestLayerInfoPtr))->mr = dest_info->mr;
-    ((CmiVerbsRdmaPtr_t *)(ncpyOpInfo->origDestLayerInfoPtr))->key = dest_info->key;
-  }
+void LrtsIssueRget(NcpyOperationInfo *ncpyOpInfo) {
 
-  if(*srcMode == CMK_BUFFER_UNREG) {
+  if(ncpyOpInfo->srcMode == CMK_BUFFER_UNREG) {
     // Remote buffer is unregistered, send a message to register it and perform PUT
     infiPacket packet;
     MallocInfiPacket(packet);
@@ -206,14 +191,14 @@ void LrtsIssueRget(
     rdmaPacket->type = INFI_ONESIDED_DIRECT;
     rdmaPacket->localBuffer = ncpyOpInfo;
 
-    CmiVerbsRdmaPtr_t *dest_info = (CmiVerbsRdmaPtr_t *)(ncpyOpInfo->destLayerInfo);
-    CmiVerbsRdmaPtr_t *src_info = (CmiVerbsRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
+    CmiVerbsRdmaPtr_t *dest_info = (CmiVerbsRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize());
+    CmiVerbsRdmaPtr_t *src_info = (CmiVerbsRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize());
 
     postRdma((uint64_t)(ncpyOpInfo->destPtr),
             dest_info->key,
             (uint64_t)(ncpyOpInfo->srcPtr),
             src_info->key,
-            ncpyOpInfo->size,
+            ncpyOpInfo->srcSize,
             ncpyOpInfo->srcPe,
             (uint64_t)rdmaPacket,
             IBV_WR_RDMA_READ);
@@ -221,24 +206,9 @@ void LrtsIssueRget(
 }
 
 // Perform an RDMA Put call into the remote destination address from the local source address
-void LrtsIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
-
-  // Register local buffer if it is not registered
-  if(*srcMode == CMK_BUFFER_UNREG) {
-    CmiVerbsRdmaPtr_t *src_info = (CmiVerbsRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
-    src_info->mr = registerDirectMemory(ncpyOpInfo->srcPtr, ncpyOpInfo->size);
-    src_info->key = src_info->mr->rkey;
-    *srcMode = CMK_BUFFER_REG;
-
-    // set registration info in the origSrcLayerInfoPtr
-    ((CmiVerbsRdmaPtr_t *)(ncpyOpInfo->origSrcLayerInfoPtr))->mr = src_info->mr;
-    ((CmiVerbsRdmaPtr_t *)(ncpyOpInfo->origSrcLayerInfoPtr))->key = src_info->key;
-  }
+void LrtsIssueRput(NcpyOperationInfo *ncpyOpInfo) {
 
-  if(*destMode == CMK_BUFFER_UNREG) {
+  if(ncpyOpInfo->destMode == CMK_BUFFER_UNREG) {
     // Remote buffer is unregistered, send a message to register it and perform GET
     infiPacket packet;
     MallocInfiPacket(packet);
@@ -249,6 +219,7 @@ void LrtsIssueRput(
     packet->ogm  = NULL;
 
     struct ibv_mr *packetKey = METADATAFIELD(ncpyOpInfo)->key;
+    CmiVerbsRdmaPtr_t *dest_info = (CmiVerbsRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize());
     OtherNode node = &nodes[CmiNodeOf(ncpyOpInfo->destPe)];
     EnqueuePacket(node, packet, ncpyOpInfo->ncpyOpInfoSize, packetKey);
 
@@ -257,14 +228,14 @@ void LrtsIssueRput(
     rdmaPacket->type = INFI_ONESIDED_DIRECT;
     rdmaPacket->localBuffer = ncpyOpInfo;
 
-    CmiVerbsRdmaPtr_t *src_info = (CmiVerbsRdmaPtr_t *)(ncpyOpInfo->srcLayerInfo);
-    CmiVerbsRdmaPtr_t *dest_info = (CmiVerbsRdmaPtr_t *)(ncpyOpInfo->destLayerInfo);
+    CmiVerbsRdmaPtr_t *src_info = (CmiVerbsRdmaPtr_t *)((char *)(ncpyOpInfo->srcLayerInfo) + CmiGetRdmaCommonInfoSize());
+    CmiVerbsRdmaPtr_t *dest_info = (CmiVerbsRdmaPtr_t *)((char *)(ncpyOpInfo->destLayerInfo) + CmiGetRdmaCommonInfoSize());
 
     postRdma((uint64_t)(ncpyOpInfo->srcPtr),
             src_info->key,
             (uint64_t)(ncpyOpInfo->destPtr),
             dest_info->key,
-            ncpyOpInfo->size,
+            ncpyOpInfo->srcSize,
             ncpyOpInfo->destPe,
             (uint64_t)rdmaPacket,
             IBV_WR_RDMA_WRITE);
index e8d612428ce7eed9b206b32671f737379de1293b..fc7e015b8591661431a7795535826202c46d3bcc 100644 (file)
@@ -324,31 +324,51 @@ void CkRdmaDirectAckHandler(void *ack) {
 
   NcpyOperationInfo *info = (NcpyOperationInfo *)(ack);
 
+  CkCallback *srcCb = (CkCallback *)(info->srcAck);
+  CkCallback *destCb = (CkCallback *)(info->destAck);
+
+  // reconstruct the CkNcpyBuffer object for the source
+  CkNcpyBuffer src;
+  src.ptr = info->srcPtr;
+  src.pe  = info->srcPe;
+  src.cnt = info->srcSize;
+  src.ref = info->srcRef;
+  src.mode = info->srcMode;
+  src.isRegistered = info->isSrcRegistered;
+  memcpy((char *)(&src.cb), srcCb, info->srcAckSize); // initialize cb
+  memcpy((char *)(src.layerInfo), info->srcLayerInfo, info->srcLayerSize); // initialize layerInfo
+
+  CkNcpyBuffer dest;
+  dest.ptr = info->destPtr;
+  dest.pe  = info->destPe;
+  dest.cnt = info->destSize;
+  dest.ref = info->destRef;
+  dest.mode = info->destMode;
+  dest.isRegistered = info->isDestRegistered;
+  memcpy((char *)(&dest.cb), destCb, info->destAckSize); // initialize cb
+  memcpy((char *)(dest.layerInfo), info->destLayerInfo, info->destLayerSize); // initialize layerInfo
+
   if(info->ackMode == 0 || info->ackMode == 1) {
-    //Invoke the sender's callback
-    CkNcpyAck srcAck(info->srcPtr, info->srcRef);
 
 #if CMK_SMP
     //call to callbackgroup to call the callback when calling from comm thread
     //this adds one more trip through the scheduler
-    _ckcallbackgroup[info->srcPe].call(*(CkCallback *)(info->srcAck), sizeof(CkNcpyAck), (const char *)(&srcAck));
+    _ckcallbackgroup[info->srcPe].call(*(CkCallback *)(info->srcAck), sizeof(CkNcpyBuffer), (const char *)(&src));
 #else
     //Invoke the destination callback
-    ((CkCallback *)(info->srcAck))->send(sizeof(CkNcpyAck), &srcAck);
+    ((CkCallback *)(info->srcAck))->send(sizeof(CkNcpyBuffer), &src);
 #endif
   }
 
   if(info->ackMode == 0 || info->ackMode == 2) {
-    //Invoke the receiver's callback
-    CkNcpyAck destAck(info->destPtr, info->destRef);
 
 #if CMK_SMP
     //call to callbackgroup to call the callback when calling from comm thread
     //this adds one more trip through the scheduler
-    _ckcallbackgroup[info->destPe].call(*(CkCallback *)(info->destAck), sizeof(CkNcpyAck), (const char *)(&destAck));
+    _ckcallbackgroup[info->destPe].call(*(CkCallback *)(info->destAck), sizeof(CkNcpyBuffer), (const char *)(&dest));
 #else
     //Invoke the destination callback
-    ((CkCallback *)(info->destAck))->send(sizeof(CkNcpyAck), &destAck);
+    ((CkCallback *)(info->destAck))->send(sizeof(CkNcpyBuffer), &dest);
 #endif
   }
 
@@ -390,10 +410,18 @@ void CkNcpyBuffer::cmaGet(CkNcpyBuffer &source) {
 
 void CkNcpyBuffer::rdmaGet(CkNcpyBuffer &source) {
 
-  int layerInfoSize = CMK_NOCOPY_DIRECT_BYTES;
+  int layerInfoSize = CMK_COMMON_NOCOPY_DIRECT_BYTES + CMK_NOCOPY_DIRECT_BYTES;
   int ackSize = sizeof(CkCallback);
 
-  // Create a general object that can be used across layers
+  if(mode == CK_BUFFER_UNREG) {
+    // register it because it is required for RGET
+    CmiSetRdmaBufferInfo(layerInfo + CmiGetRdmaCommonInfoSize(), ptr, cnt, mode);
+
+    isRegistered = true;
+    mode = CK_BUFFER_REG;
+  }
+
+  // Create a general object that can be used across layers and can store the state of the CkNcpyBuffer objects
   int ncpyObjSize = getNcpyOpInfoTotalSize(
                       layerInfoSize,
                       ackSize,
@@ -403,25 +431,28 @@ void CkNcpyBuffer::rdmaGet(CkNcpyBuffer &source) {
   NcpyOperationInfo *ncpyOpInfo = (NcpyOperationInfo *)CmiAlloc(ncpyObjSize);
 
   setNcpyOpInfo(source.ptr,
-                (char *)(&(source.layerInfo[0])) + CmiGetRdmaCommonInfoSize(),
+                (char *)(source.layerInfo),
                 layerInfoSize,
                 (char *)(&source.cb),
                 ackSize,
+                source.cnt,
+                source.mode,
+                source.isRegistered,
                 source.pe,
                 source.ref,
                 ptr,
-                (char *)(&(layerInfo[0])) + CmiGetRdmaCommonInfoSize(),
+                (char *)(layerInfo),
                 layerInfoSize,
                 (char *)(&cb),
                 ackSize,
+                cnt,
+                mode,
+                isRegistered,
                 pe,
                 ref,
-                cnt,
                 ncpyOpInfo);
 
-  CmiIssueRget(ncpyOpInfo,
-               &source.mode,
-               &mode);
+  CmiIssueRget(ncpyOpInfo);
 }
 
 // Perform a nocopy get operation into this destination using the passed source
@@ -442,26 +473,22 @@ void CkNcpyBuffer::get(CkNcpyBuffer &source){
   if(transferMode == ncpyTransferMode::MEMCPY) {
     memcpyGet(source);
 
-    //Invoke the receiver's callback
-    CkNcpyAck srcAck(ptr, ref);
-    cb.send(sizeof(CkNcpyAck), &srcAck);
+    //Invoke the source callback
+    source.cb.send(sizeof(CkNcpyBuffer), &source);
 
-    //Invoke the sender's callback
-    CkNcpyAck destAck(source.ptr, source.ref);
-    source.cb.send(sizeof(CkNcpyAck), &destAck);
+    //Invoke the destination callback
+    cb.send(sizeof(CkNcpyBuffer), this);
 
 #if CMK_USE_CMA
   } else if(transferMode == ncpyTransferMode::CMA) {
 
     cmaGet(source);
 
-    //Invoke the receiver's callback
-    CkNcpyAck srcAck(ptr, ref);
-    cb.send(sizeof(CkNcpyAck), &srcAck);
+    //Invoke the source callback
+    source.cb.send(sizeof(CkNcpyBuffer), &source);
 
-    //Invoke the sender's callback
-    CkNcpyAck destAck(source.ptr, source.ref);
-    source.cb.send(sizeof(CkNcpyAck), &destAck);
+    //Invoke the destination callback
+    cb.send(sizeof(CkNcpyBuffer), this);
 
 #endif
   } else if (transferMode == ncpyTransferMode::RDMA) {
@@ -491,10 +518,18 @@ void CkNcpyBuffer::cmaPut(CkNcpyBuffer &destination) {
 
 void CkNcpyBuffer::rdmaPut(CkNcpyBuffer &destination) {
 
-  int layerInfoSize = CMK_NOCOPY_DIRECT_BYTES;
+  int layerInfoSize = CMK_COMMON_NOCOPY_DIRECT_BYTES + CMK_NOCOPY_DIRECT_BYTES;
   int ackSize = sizeof(CkCallback);
 
-  // Create a general object that can be used across layers
+  if(mode == CK_BUFFER_UNREG) {
+    // register it because it is required for RPUT
+    CmiSetRdmaBufferInfo(layerInfo + CmiGetRdmaCommonInfoSize(), ptr, cnt, mode);
+
+    isRegistered = true;
+    mode = CK_BUFFER_REG;
+  }
+
+  // Create a general object that can be used across layers that can store the state of the CkNcpyBuffer objects
   int ncpyObjSize = getNcpyOpInfoTotalSize(
                       layerInfoSize,
                       ackSize,
@@ -504,25 +539,28 @@ void CkNcpyBuffer::rdmaPut(CkNcpyBuffer &destination) {
   NcpyOperationInfo *ncpyOpInfo = (NcpyOperationInfo *)CmiAlloc(ncpyObjSize);
 
   setNcpyOpInfo(ptr,
-                (char *)(&(layerInfo[0])) + CmiGetRdmaCommonInfoSize(),
+                (char *)(layerInfo),
                 layerInfoSize,
                 (char *)(&cb),
                 ackSize,
+                cnt,
+                mode,
+                isRegistered,
                 pe,
                 ref,
                 destination.ptr,
-                (char *)(&(destination.layerInfo[0])) + CmiGetRdmaCommonInfoSize(),
+                (char *)(destination.layerInfo),
                 layerInfoSize,
                 (char *)(&destination.cb),
                 ackSize,
+                destination.cnt,
+                destination.mode,
+                destination.isRegistered,
                 destination.pe,
                 destination.ref,
-                cnt,
                 ncpyOpInfo);
 
-  CmiIssueRput(ncpyOpInfo,
-               &mode,
-               &destination.mode);
+  CmiIssueRput(ncpyOpInfo);
 }
 
 // Perform a nocopy put operation into the passed destination using this source
@@ -542,25 +580,21 @@ void CkNcpyBuffer::put(CkNcpyBuffer &destination){
   if(transferMode == ncpyTransferMode::MEMCPY) {
     memcpyPut(destination);
 
-    //Invoke the source callback
-    CkNcpyAck srcAck(ptr, ref);
-    cb.send(sizeof(CkNcpyAck), &srcAck);
-
     //Invoke the destination callback
-    CkNcpyAck destAck(destination.ptr, destination.ref);
-    destination.cb.send(sizeof(CkNcpyAck), &destAck);
+    destination.cb.send(sizeof(CkNcpyBuffer), &destination);
+
+    //Invoke the source callback
+    cb.send(sizeof(CkNcpyBuffer), this);
 
 #if CMK_USE_CMA
   } else if(transferMode == ncpyTransferMode::CMA) {
     cmaPut(destination);
 
-    //Invoke the source callback
-    CkNcpyAck srcAck(ptr, ref);
-    cb.send(sizeof(CkNcpyAck), &srcAck);
-
     //Invoke the destination callback
-    CkNcpyAck destAck(destination.ptr, destination.ref);
-    destination.cb.send(sizeof(CkNcpyAck), &destAck);
+    destination.cb.send(sizeof(CkNcpyBuffer), &destination);
+
+    //Invoke the source callback
+    cb.send(sizeof(CkNcpyBuffer), this);
 
 #endif
   } else if (transferMode == ncpyTransferMode::RDMA) {
index 5b1d1cead693ea99650c297cf4049381a58ddafc..0394585de128734edfc5f11c3c378080ac5b1604 100644 (file)
@@ -107,29 +107,13 @@ int getRdmaBufSize(envelope *env);
 void CkRdmaAckHandler(void *cookie);
 void CkRdmaDirectAckHandler(void *ack);
 
-// Class to represent an acknowledgement structure
-class CkNcpyAck{
-  public:
-  // pointer to the buffer
-  const void *ptr;
-
-  // reference pointer
-  // This is an optional arbitrary pointer set by the user before performing the get/put
-  // operation. It is returned back in the CkNcpyAck object.
-  const void *ref;
-
-  CkNcpyAck(const void *ptr_, const void *ref_) : ptr(ptr_), ref(ref_) {}
-};
-
-PUPbytes(CkNcpyAck);
-
 // Class to represent an RDMA buffer
 class CkNcpyBuffer{
-  private:
+
+  public:
   // bool to indicate registration for current values of ptr and cnt on pe
   bool isRegistered;
 
-  public:
   // pointer to the buffer
   const void *ptr;
 
index dde3d4df6598cdc69a6d873de75d01e04a6a8391..5a40de4cb64bf21299b4d25925e5adab3d0ba0f0 100644 (file)
@@ -37,9 +37,7 @@ static void getRequestHandler(ConverseRdmaMsg *getReqMsg){
   ncpyOpInfo->freeMe = 0;
 
   // Get is implemented internally using a call to Put
-  CmiIssueRput(ncpyOpInfo,
-               NULL,
-               NULL);
+  CmiIssueRput(ncpyOpInfo);
 }
 
 // Invoked when this PE receives a large array as the target of an Rput or the initiator of an Rget
@@ -52,7 +50,7 @@ static void putDataHandler(ConverseRdmaMsg *payloadMsg) {
   // copy the received messsage into the user's destination address
   memcpy((char *)ncpyOpInfo->destPtr,
          (char *)payloadMsg + sizeof(ConverseRdmaMsg) + ncpyOpInfo->ncpyOpInfoSize,
-         ncpyOpInfo->size);
+         ncpyOpInfo->srcSize);
 
   // Invoke the destination ack
   ncpyOpInfo->ackMode = 2;
@@ -72,10 +70,7 @@ void CmiSetRdmaNcpyAck(RdmaAckCallerFn fn) {
   ncpyAckHandlerFn = fn;
 }
 
-void CmiIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void CmiIssueRget(NcpyOperationInfo *ncpyOpInfo) {
 
   int ncpyOpInfoSize = ncpyOpInfo->ncpyOpInfoSize;
 
@@ -94,13 +89,10 @@ void CmiIssueRget(
   CmiFree(ncpyOpInfo);
 }
 
-void CmiIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode) {
+void CmiIssueRput(NcpyOperationInfo *ncpyOpInfo) {
 
   int ncpyOpInfoSize = ncpyOpInfo->ncpyOpInfoSize;
-  int size = ncpyOpInfo->size;
+  int size = ncpyOpInfo->srcSize;
 
   // Send a ConverseRdmaMsg to the other PE sending the array
   ConverseRdmaMsg *payloadMsg = (ConverseRdmaMsg *)CmiAlloc(sizeof(ConverseRdmaMsg) + ncpyOpInfoSize + size);
index e3b25a686c1e551c060b99f9ad48a777690a7326..a07488e251010892c1b7d19cf0a7db70f9e47e68 100644 (file)
@@ -33,10 +33,7 @@ void CmiSetRdmaNcpyAck(RdmaAckCallerFn fn);
  * When the runtime invokes destAck on the destination (initiator), it indicates that the data has been successfully received in the
  * destAddr buffer.
  */
-void CmiIssueRget(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode);
+void CmiIssueRget(NcpyOperationInfo *ncpyOpInfo);
 
 /* CmiIssueRput initiates an RDMA write operation, transferring 'size' bytes of data from the local address, 'srcAddr' to the address space of 'destPe'.
  * When the runtime invokes srcAck on the source (initiator), it indicates safety to overwrite or free the srcAddr buffer.
@@ -44,10 +41,7 @@ void CmiIssueRget(
  * destAddr buffer.
  */
 
-void CmiIssueRput(
-  NcpyOperationInfo *ncpyOpInfo,
-  unsigned short int *srcMode,
-  unsigned short int *destMode);
+void CmiIssueRput(NcpyOperationInfo *ncpyOpInfo);
 
 void CmiDeregisterMem(const void *ptr, void *info, int pe, unsigned short int mode);
 
index 5b2a053b455d1eb105cda6bf7f982c8682af5b55..1c331c1d92f00334cedaf239f01ff579dcfe3ef2 100644 (file)
@@ -17,6 +17,9 @@ void setNcpyOpInfo(
     int srcLayerSize,
     char *srcAck,
     int srcAckSize,
+    int srcSize,
+    unsigned short int srcMode,
+    unsigned short int isSrcRegistered,
     int srcPe,
     const void *srcRef,
     const void *destPtr,
@@ -24,9 +27,11 @@ void setNcpyOpInfo(
     int destLayerSize,
     char *destAck,
     int destAckSize,
+    int destSize,
+    unsigned short int destMode,
+    unsigned short int isDestRegistered,
     int destPe,
     const void *destRef,
-    int size,
     NcpyOperationInfo *ncpyOpInfo) {
 
   // memcpy srcLayerInfo
@@ -49,20 +54,23 @@ void setNcpyOpInfo(
   ncpyOpInfo->srcRef = srcRef;
   ncpyOpInfo->srcLayerSize = srcLayerSize;
   ncpyOpInfo->srcAckSize = srcAckSize;
-  ncpyOpInfo->origSrcLayerInfoPtr = srcLayerInfo;
+  ncpyOpInfo->srcSize = srcSize;
+  ncpyOpInfo->srcMode = srcMode;
+  ncpyOpInfo->isSrcRegistered = isSrcRegistered;
 
   ncpyOpInfo->destPtr = destPtr;
   ncpyOpInfo->destPe = destPe;
   ncpyOpInfo->destRef = destRef;
   ncpyOpInfo->destLayerSize = destLayerSize;
   ncpyOpInfo->destAckSize = destAckSize;
-  ncpyOpInfo->origDestLayerInfoPtr = destLayerInfo;
+  ncpyOpInfo->destSize = destSize;
+  ncpyOpInfo->destMode = destMode;
+  ncpyOpInfo->isDestRegistered = isDestRegistered;
 
   ncpyOpInfo->ackMode = 0;
   ncpyOpInfo->freeMe  = 1;
 
   ncpyOpInfo->ncpyOpInfoSize = sizeof(NcpyOperationInfo) + srcLayerSize + destLayerSize + srcAckSize + destAckSize;
-  ncpyOpInfo->size = size;
 }
 
 
index c70e3d26a63c5f2a7e516858a733c1bdeb84cd81..9de55e527c45e9b70cc6ba1200fc09c94d6c87cd 100644 (file)
@@ -14,20 +14,24 @@ typedef struct ncpystruct{
 
   const void *srcPtr;
   int srcPe;
-  char *origSrcLayerInfoPtr;
   char *srcLayerInfo;
   int srcLayerSize;
   char *srcAck;
   int srcAckSize;
+  int srcSize;
+  unsigned short int srcMode;
+  unsigned short int isSrcRegistered;
   const void *srcRef;
 
   const void *destPtr;
   int destPe;
-  char *origDestLayerInfoPtr;
   char *destLayerInfo;
   int destLayerSize;
   char *destAck;
   int destAckSize;
+  int destSize;
+  unsigned short int destMode;
+  unsigned short int isDestRegistered;
   const void *destRef;
 
   // Variables used for ack handling
@@ -37,7 +41,6 @@ typedef struct ncpystruct{
   int freeMe; // 1 for free, 0 for do not free
 
   int ncpyOpInfoSize;
-  int size;
 
 }NcpyOperationInfo;
 
@@ -48,23 +51,27 @@ int getNcpyOpInfoTotalSize(
   int destAckSize);
 
 void setNcpyOpInfo(
-    const void *srcPtr,
-    char *srcLayerInfo,
-    int srcLayerSize,
-    char *srcAck,
-    int srcAckSize,
-    int srcPe,
-    const void *srcRef,
-    const void *destPtr,
-    char *destLayerInfo,
-    int destLayerSize,
-    char *destAck,
-    int destAckSize,
-    int destPe,
-    const void *destRef,
-    int size,
-    NcpyOperationInfo *ncpyOpInfo);
-
+  const void *srcPtr,
+  char *srcLayerInfo,
+  int srcLayerSize,
+  char *srcAck,
+  int srcAckSize,
+  int srcSize,
+  unsigned short int srcMode,
+  unsigned short int isSrcRegistered,
+  int srcPe,
+  const void *srcRef,
+  const void *destPtr,
+  char *destLayerInfo,
+  int destLayerSize,
+  char *destAck,
+  int destAckSize,
+  int destSize,
+  unsigned short int destMode,
+  unsigned short int isdestRegistered,
+  int destPe,
+  const void *destRef,
+  NcpyOperationInfo *ncpyOpInfo);
 
 void resetNcpyOpInfoPointers(NcpyOperationInfo *ncpyOpInfo);
 #endif