First check-in for the work on extractin common codes from MPI, LAPI and DCMF layer...
[charm.git] / src / arch / bluegenep / machine.c
1 #include <stdio.h>
2 #include <errno.h>
3 #include <stdlib.h>
4 #include <unistd.h>
5 #include <math.h>
6 #include <string.h>
7 #include <malloc.h>
8 #include <assert.h>
9
10 #include "converse.h"
11 #include "machine.h"
12 #include "pcqueue.h"
13
14 #include <bpcore/ppc450_inlines.h>
15 #include "dcmf.h"
16 #include "dcmf_multisend.h"
17
18 /* =======Beginning of Definitions of Performance-Specific Macros =======*/
19 /* =======End of Definitions of Performance-Specific Macros =======*/
20
21 /* =======Beginning of Definitions of Msg Header Specific Macros =======*/
22 /* =======End of Definitions of Msg Header Specific Macros =======*/
23
24 /* =====Beginning of Definitions of Message-Corruption Related Macros=====*/
25 #define CMI_MAGIC(msg)                   ((CmiMsgHeaderBasic *)msg)->magic
26 #define CHARM_MAGIC_NUMBER               126
27
28 #if CMK_ERROR_CHECKING
29 static int checksum_flag = 0;
30 extern unsigned char computeCheckSum(unsigned char *data, int len);
31
32 #define CMI_SET_CHECKSUM(msg, len)      \
33         if (checksum_flag)  {   \
34           ((CmiMsgHeaderBasic *)msg)->cksum = 0;        \
35           ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len);        \
36         }
37
38 #define CMI_CHECK_CHECKSUM(msg, len)    \
39         if (checksum_flag)      \
40           if (computeCheckSum((unsigned char*)msg, len) != 0)  { \
41             printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
42             { \
43             int count; \
44             for(count = 0; count < len; count++) { \
45                 printf("%2x", msg[count]);                 \
46             } \
47             }                                             \
48             printf("------------------------------\n\n"); \
49             CmiAbort("Fatal error: checksum doesn't agree!\n"); \
50           }
51 #else
52 #define CMI_SET_CHECKSUM(msg, len)
53 #define CMI_CHECK_CHECKSUM(msg, len)
54 #endif
55 /* =====End of Definitions of Message-Corruption Related Macros=====*/
56
57
58 /* =====Beginning of Declarations of Machine Specific Variables===== */
59 typedef struct ProcState {
60     /* PCQueue      sendMsgBuf; */      /* per processor message sending queue */
61     CmiNodeLock  recvLock;              /* for cs->recv */
62     CmiNodeLock bcastLock;
63 } ProcState;
64
65 static ProcState  *procState;
66
67 volatile int msgQueueLen;
68 volatile int outstanding_recvs;
69
70 DCMF_Protocol_t  cmi_dcmf_short_registration __attribute__((__aligned__(16)));
71 DCMF_Protocol_t  cmi_dcmf_eager_registration __attribute__((__aligned__(16)));
72 DCMF_Protocol_t  cmi_dcmf_rzv_registration   __attribute__((__aligned__(16)));
73 DCMF_Protocol_t  cmi_dcmf_multicast_registration   __attribute__((__aligned__(16)));
74
75
76 typedef struct msg_list {
77     char              * msg;
78 //    int                 size;
79 //    int                 destpe;
80     int               * pelist;
81 //    DCMF_Callback_t     cb;
82 //    DCQuad              info __attribute__((__aligned__(16)));
83     DCMF_Request_t      send __attribute__((__aligned__(16)));
84 } SMSG_LIST __attribute__((__aligned__(16)));
85
86 #define MAX_NUM_SMSGS   64
87 CpvDeclare(PCQueue, smsg_list_q);
88 static SMSG_LIST * smsg_allocate();
89 static void smsg_free (SMSG_LIST *smsg);
90
91 /* =====End of Declarations of Machine Specific Variables===== */
92
93
94 /* =====Beginning of Declarations of Machine Specific Functions===== */
95 /* Utility functions */
96 char *ALIGN_16(char *p) {
97     return((char *)((((unsigned long)p)+0xf)&0xfffffff0));
98 }
99
100 void mysleep (int cycles) { /* approximate sleep command */
101     unsigned long long start = DCMF_Timebase();
102     unsigned long long end = start + cycles;
103     while (start < end)
104         start = DCMF_Timebase();
105     return;
106 }
107 static void SendMsgsUntil(int);
108
109 /* ######Begining of Machine-specific RDMA related functions###### */
110 #define BGP_USE_AM_DIRECT 1
111 /* #define BGP_USE_RDMA_DIRECT 1 */
112 /* #define CMI_DIRECT_DEBUG 1 */
113 #if BGP_USE_AM_DIRECT
114
115 DCMF_Protocol_t  cmi_dcmf_direct_registration __attribute__((__aligned__(16)));
116 /** The receive side of a put implemented in DCMF_Send */
117
118 typedef struct {
119     void *recverBuf;
120     void (*callbackFnPtr)(void *);
121     void *callbackData;
122     DCMF_Request_t *DCMF_rq_t;
123 } dcmfDirectMsgHeader;
124
125 /* nothing for us to do here */
126 #if (DCMF_VERSION_MAJOR >= 2)
127 void direct_send_done_cb(void*nothing, DCMF_Error_t *err)
128 #else
129 void direct_send_done_cb(void*nothing)
130 #endif
131 {
132 #if CMI_DIRECT_DEBUG
133     CmiPrintf("[%d] RDMA send_done_cb\n", CmiMyPe());
134 #endif
135 }
136
137 DCMF_Callback_t  directcb;
138
139 void     direct_short_pkt_recv (void             * clientdata,
140                                 const DCQuad     * info,
141                                 unsigned           count,
142                                 unsigned           senderrank,
143                                 const char       * buffer,
144                                 const unsigned     sndlen) {
145 #if CMI_DIRECT_DEBUG
146     CmiPrintf("[%d] RDMA direct_short_pkt_recv\n", CmiMyPe());
147 #endif
148     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
149     CmiMemcpy(msgHead->recverBuf, buffer, sndlen);
150     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
151 }
152
153
154 #if (DCMF_VERSION_MAJOR >= 2)
155 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
156 #else
157 typedef void (*cbhdlr) (void *);
158 #endif
159
160 DCMF_Request_t * direct_first_pkt_recv_done (void              * clientdata,
161         const DCQuad      * info,
162         unsigned            count,
163         unsigned            senderrank,
164         const unsigned      sndlen,
165         unsigned          * rcvlen,
166         char             ** buffer,
167         DCMF_Callback_t   * cb
168                                             ) {
169 #if CMI_DIRECT_DEBUG
170     CmiPrintf("[%d] RDMA direct_first_pkt_recv_done\n", CmiMyPe());
171 #endif
172     /* pull the data we need out of the header */
173     *rcvlen=sndlen;
174     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
175     cb->function= (cbhdlr)msgHead->callbackFnPtr;
176     cb->clientdata=msgHead->callbackData;
177     *buffer=msgHead->recverBuf;
178     return msgHead->DCMF_rq_t;
179 }
180 #endif /* end of #if BGP_USE_AM_DIRECT */
181
182 #ifdef BGP_USE_RDMA_DIRECT
183 static struct DCMF_Callback_t dcmf_rdma_cb_ack;
184
185 DCMF_Protocol_t  cmi_dcmf_direct_put_registration __attribute__((__aligned__(16)));
186 DCMF_Protocol_t  cmi_dcmf_direct_get_registration __attribute__((__aligned__(16)));
187 DCMF_Protocol_t  cmi_dcmf_direct_rdma_registration __attribute__((__aligned__(16)));
188 /** The receive side of a DCMF_Put notification implemented in DCMF_Send */
189
190 typedef struct {
191     void (*callbackFnPtr)(void *);
192     void *callbackData;
193 } dcmfDirectRDMAMsgHeader;
194
195 #if (DCMF_VERSION_MAJOR >= 2)
196 void direct_send_rdma_done_cb(void*nothing, DCMF_Error_t *err)
197 #else
198 void direct_send_rdma_done_cb(void*nothing)
199 #endif
200 {
201 #if CMI_DIRECT_DEBUG
202     CmiPrintf("[%d] RDMA send_rdma_done_cb result %d\n", CmiMyPe());
203 #endif
204
205
206 }
207
208 DCMF_Callback_t  directcb;
209
210 void     direct_short_rdma_pkt_recv (void             * clientdata,
211                                      const DCQuad     * info,
212                                      unsigned           count,
213                                      unsigned           senderrank,
214                                      const char       * buffer,
215                                      const unsigned     sndlen) {
216 #if CMI_DIRECT_DEBUG
217     CmiPrintf("[%d] RDMA direct_short_rdma_pkt_recv\n", CmiMyPe());
218 #endif
219     dcmfDirectRDMAMsgHeader *msgHead=  (dcmfDirectRDMAMsgHeader *) info;
220     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
221 }
222
223 #if (DCMF_VERSION_MAJOR >= 2)
224 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
225 #else
226 typedef void (*cbhdlr) (void *);
227 #endif
228
229 DCMF_Request_t * direct_first_rdma_pkt_recv_done (void              * clientdata,
230         const DCQuad      * info,
231         unsigned            count,
232         unsigned            senderrank,
233         const unsigned      sndlen,
234         unsigned          * rcvlen,
235         char             ** buffer,
236         DCMF_Callback_t   * cb
237                                                  ) {
238     CmiAbort("direct_first_rdma_pkt_recv should not be called");
239 }
240 #endif /* end of #if BGP_USE_RDMA_DIRECT */
241 /* ######End of Machine-specific RDMA related functions###### */
242
243
244 /* ### Beginning of Communication-Op Related Functions ### */
245 /* The machine-specific send-related function */
246 #if (DCMF_VERSION_MAJOR >= 2)
247 static void send_done(void *data, DCMF_Error_t *err);
248 static void send_multi_done(void *data, DCMF_Error_t *err);
249 #else
250 static void send_done(void *data);
251 static void send_multi_done(void *data);
252 #endif
253 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode);
254 #define CmiMachineSpecificSendFunc MachineSpecificSendForDCMF
255
256 /* The machine-specific recv-related function (on the receiver side) */
257 #if (DCMF_VERSION_MAJOR >= 2)
258 static void recv_done(void *clientdata, DCMF_Error_t * err);
259 #else
260 static void recv_done(void *clientdata);
261 #endif
262 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
263         unsigned            count,
264         unsigned            senderrank,
265         const unsigned      sndlen,
266         unsigned            connid,
267         void              * clientdata,
268         unsigned          * rcvlen,
269         char             ** buffer,
270         unsigned          * pw,
271         DCMF_Callback_t   * cb
272                                            );
273 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
274                                       const DCQuad      * info,
275                                       unsigned            count,
276                                       unsigned            senderrank,
277                                       const unsigned      sndlen,
278                                       unsigned          * rcvlen,
279                                       char             ** buffer,
280                                       DCMF_Callback_t   * cb
281                                      );
282
283 /* ### End of Communication-Op Related Functions ### */
284
285 /* ### Beginning of Machine-startup Related Functions ### */
286 static void MachineInitForDCMF(int argc, char **argv, int *numNodes, int *myNodeID);
287 #define MachineSpecificInit MachineInitForDCMF
288
289 static void MachinePreCommonInitForDCMF(int everReturn);
290 static void MachinePostCommonInitForDCMF(int everReturn);
291 #define MachineSpecificPreCommonInit MachinePreCommonInitForDCMF
292 #define MachineSpecificPostCommonInit MachinePostCommonInitForDCMF
293 /* ### End of Machine-startup Related Functions ### */
294
295 /* ### Beginning of Machine-running Related Functions ### */
296 static void AdvanceCommunicationForDCMF();
297 #define MachineSpecificAdvanceCommunication AdvanceCommunicationForDCMF
298
299 static void DrainResourcesForDCMF();
300 #define MachineSpecificDrainResources AdvanceCommunicationForDCMF
301
302 static void MachineExitForDCMF();
303 #define MachineSpecificExit AdvanceCommunicationForDCMF
304
305 /* ### End of Machine-running Related Functions ### */
306
307 /* ### Beginning of Idle-state Related Functions ### */
308
309 /* ### End of Idle-state Related Functions ### */
310
311 /* =====End of Declarations of Machine Specific Functions===== */
312
313 /**
314  *  Macros that overwrites the common codes, such as
315  *  CMK_SMP_NO_COMMTHD, NETWORK_PROGRESS_PERIOD_DEFAULT,
316  *  USE_COMMON_SYNC_P2P, CMK_HAS_SIZE_IN_MSGHDR,
317  *  CMK_OFFLOAD_BCAST_PROCESS etc.
318  */
319 #define CMK_OFFLOAD_BCAST_PROCESS 1
320 #include "machine-common.c"
321
322 /*######Beginning of functions related with Communication-Op functions ######*/
323
324 /* Utility functions */
325 static inline SMSG_LIST * smsg_allocate() {
326     SMSG_LIST *smsg = (SMSG_LIST *)PCQueuePop(CpvAccess(smsg_list_q));
327     if (smsg != NULL)
328         return smsg;
329
330     void * buf = malloc(sizeof(SMSG_LIST));
331     assert(buf!=NULL);
332     assert (((unsigned)buf & 0x0f) == 0);
333
334     return (SMSG_LIST *) buf;
335 }
336
337 static inline void smsg_free (SMSG_LIST *smsg) {
338     int size = PCQueueLength (CpvAccess(smsg_list_q));
339     if (size < MAX_NUM_SMSGS)
340         PCQueuePush (CpvAccess(smsg_list_q), (char *) smsg);
341     else
342         free (smsg);
343 }
344
345 static void SendMsgsUntil(int targetm) {
346     while (msgQueueLen>targetm) {
347 #if CMK_SMP
348         DCMF_CriticalSection_enter (0);
349 #endif
350
351         while (DCMF_Messager_advance()>0);
352
353 #if CMK_SMP
354         DCMF_CriticalSection_exit (0);
355 #endif
356     }
357 }
358
359 /* Send functions */
360 /* The callback on sender side */
361 #if (DCMF_VERSION_MAJOR >= 2)
362 static void send_done(void *data, DCMF_Error_t *err)
363 #else
364 static void send_done(void *data)
365 #endif
366 /* send done callback: sets the smsg entry to done */
367 {
368     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
369     CmiFree(msg_tmp->msg);
370     smsg_free (msg_tmp);
371     msgQueueLen--;
372 }
373
374 #if (DCMF_VERSION_MAJOR >= 2)
375 static void send_multi_done(void *data, DCMF_Error_t *err)
376 #else
377 static void send_multi_done(void *data)
378 #endif
379 /* send done callback: sets the smsg entry to done */
380 {
381     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
382     CmiFree(msg_tmp->msg);
383     free(msg_tmp->pelist);
384     smsg_free(msg_tmp);
385     msgQueueLen--;
386 }
387
388 /* The machine specific send function */
389 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode) {
390     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
391     //msg_tmp->destpe = destNode;
392     //msg_tmp->size = size;
393     msg_tmp->msg = msg;
394
395     DCMF_Callback_t cb;
396     DCQuad info;
397
398     cb.function = send_done;
399     cb.clientdata = msg_tmp;
400
401
402 #if CMK_ERROR_CHECKING
403     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
404     CMI_SET_CHECKSUM(msg, size);
405 #endif
406     CMI_MSG_SIZE(msg) = size;
407
408     //msg_tmp->cb.function = send_done;
409     //msg_tmp->cb.clientdata   =   msg_tmp;
410
411     DCMF_Protocol_t *protocol = NULL;
412
413     if (size < 224)
414         protocol = &cmi_dcmf_short_registration;
415     else if (size < 2048)
416         protocol = &cmi_dcmf_eager_registration;
417     else
418         protocol = &cmi_dcmf_rzv_registration;
419
420 #if CMK_SMP
421     DCMF_CriticalSection_enter (0);
422 #endif
423
424     msgQueueLen ++;
425     /*
426      * Original one:
427      *     DCMF_Send (protocol, &msg_tmp->send, msg_tmp->cb,
428                    DCMF_MATCH_CONSISTENCY, msg_tmp->destpe,
429                    msg_tmp->size, msg_tmp->msg, &msg_tmp->info, 1);
430            Ref:http://dcmf.anl-external.org/docs/mpi:dcmfd/group__SEND.html
431      */
432     DCMF_Send (protocol, &msg_tmp->send, cb, DCMF_MATCH_CONSISTENCY,
433                destNode, size, msg, &info, 0);
434
435 #if CMK_SMP
436     DCMF_CriticalSection_exit (0);
437 #endif
438
439     return 0;
440 }
441
442 #define MAX_MULTICAST 128
443 DCMF_Opcode_t  CmiOpcodeList [MAX_MULTICAST];
444
445 void  machineMulticast(int npes, int *pelist, int size, char* msg) {
446     CQdCreate(CpvAccess(cQdState), npes);
447
448     CmiAssert (npes < MAX_MULTICAST);
449
450 #if CMK_ERROR_CHECKING
451     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
452     CMI_SET_CHECKSUM(msg, size);
453 #endif
454
455     CMI_MSG_SIZE(msg) = size;
456     CMI_SET_BROADCAST_ROOT(msg,0);
457
458     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
459
460     //msg_tmp->destpe    = -1;      //multicast operation
461     //msg_tmp->size      = size * npes; //keep track of #bytes outstanding
462     msg_tmp->msg       = msg;
463     msg_tmp->pelist    = pelist;
464
465     DCMF_Multicast_t  mcast_info __attribute__((__aligned__(16)));
466     DCQuad info;
467
468     mcast_info.registration   = & cmi_dcmf_multicast_registration;
469     mcast_info.request        = & msg_tmp->send;
470     mcast_info.cb_done.function    =   send_multi_done;
471     mcast_info.cb_done.clientdata  =   msg_tmp;
472     mcast_info.consistency    =   DCMF_MATCH_CONSISTENCY;
473     mcast_info.connection_id  =   CmiMyPe();
474     mcast_info.bytes          =   size;
475     mcast_info.src            =   msg;
476     mcast_info.nranks         =   npes;
477     mcast_info.ranks          =   (unsigned *)pelist;
478     mcast_info.opcodes        =   CmiOpcodeList;   //static list of MAX_MULTICAST entires with 0 in them
479     mcast_info.flags          =   0;
480     mcast_info.msginfo        =   &info;
481     //mcast_info.count          =   1;
482     mcast_info.count          =   0;
483
484 #if CMK_SMP
485     DCMF_CriticalSection_enter (0);
486 #endif
487     msgQueueLen++;
488     DCMF_Multicast (&mcast_info);
489
490 #if CMK_SMP
491     DCMF_CriticalSection_exit (0);
492 #endif
493 }
494
495 /* Recv functions */
496 /* The callback on the recv side */
497 #if (DCMF_VERSION_MAJOR >= 2)
498 static void recv_done(void *clientdata, DCMF_Error_t * err)
499 #else
500 static void recv_done(void *clientdata)
501 #endif
502 /* recv done callback: push the recved msg to recv queue */
503 {
504
505     char *msg = (char *) clientdata;
506
507     /*printf ("NODE[%d] Recv message done with msg rank %d\n", CmiMyNode(), CMI_DEST_RANK(msg));*/
508     MACHSTATE3(2,"[%d] recv_done begin with msg %p size=%d { ", CmiMyNode(), msg, CMI_MSG_SIZE(msg));
509 #if CMK_ERROR_CHECKING
510     int sndlen = CMI_MSG_SIZE(msg);
511     CMI_CHECK_CHECKSUM(msg, sndlen);
512     if (CMI_MAGIC(msg) != CHARM_MAGIC_NUMBER) { /* received a non-charm msg */
513         CmiAbort("Charm++ Warning: Non Charm++ Message Received. \n");
514         return;
515     }
516 #endif
517
518     handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);
519
520     outstanding_recvs--;
521     MACHSTATE(2,"} recv_done end ");
522     return;
523 }
524
525 void short_pkt_recv (void             * clientdata,
526                      const DCQuad     * info,
527                      unsigned           count,
528                      unsigned           senderrank,
529                      const char       * buffer,
530                      const unsigned     sndlen) {
531     outstanding_recvs ++;
532     int alloc_size = sndlen;
533
534     char * new_buffer = (char *)CmiAlloc(alloc_size);
535     CmiMemcpy (new_buffer, buffer, sndlen);
536
537 #if (DCMF_VERSION_MAJOR >= 2)
538     recv_done (new_buffer, NULL);
539 #else
540     recv_done (new_buffer);
541 #endif
542 }
543
544 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
545         unsigned            count,
546         unsigned            senderrank,
547         const unsigned      sndlen,
548         unsigned            connid,
549         void              * clientdata,
550         unsigned          * rcvlen,
551         char             ** buffer,
552         unsigned          * pw,
553         DCMF_Callback_t   * cb
554                                            ) {
555     outstanding_recvs ++;
556     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
557     /*printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
558     /* printf ("Receiving %d bytes\n", sndlen); */
559     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
560
561     *buffer = (char *)CmiAlloc(alloc_size);
562     cb->function = recv_done;
563     cb->clientdata = *buffer;
564
565     *pw  = 0x7fffffff;
566     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
567 }
568
569 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
570                                       const DCQuad      * info,
571                                       unsigned            count,
572                                       unsigned            senderrank,
573                                       const unsigned      sndlen,
574                                       unsigned          * rcvlen,
575                                       char             ** buffer,
576                                       DCMF_Callback_t   * cb
577                                      ) {
578     outstanding_recvs ++;
579     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
580     /* printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
581     /* printf ("Receiving %d bytes\n", sndlen); */
582     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
583
584     *buffer = (char *)CmiAlloc(alloc_size);
585     cb->function = recv_done;
586     cb->clientdata = *buffer;
587
588     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
589 }
590
591 #if 0
592 /* -----------------------------------------
593  * Rectangular broadcast implementation
594  * -----------------------------------------
595  */
596 unsigned int *ranklist;
597 BGTsC_t        barrier;
598 #define MAX_COMM  256
599 static void * comm_table [MAX_COMM];
600
601 typedef struct rectbcast_msg {
602     BGTsRC_t           request;
603     DCMF_Callback_t    cb;
604     char              *msg;
605 } RectBcastInfo;
606
607
608 static void bcast_done (void *data) {
609     RectBcastInfo *rinfo = (RectBcastInfo *) data;
610     CmiFree (rinfo->msg);
611     free (rinfo);
612 }
613
614 static  void *   getRectBcastRequest (unsigned comm) {
615     return comm_table [comm];
616 }
617
618
619 static  void *  bcast_recv     (unsigned               root,
620                                 unsigned               comm,
621                                 const unsigned         sndlen,
622                                 unsigned             * rcvlen,
623                                 char                ** rcvbuf,
624                                 DCMF_Callback_t      * const cb) {
625
626     int alloc_size = sndlen + sizeof(BGTsRC_t) + 16;
627
628     *rcvlen = sndlen;  /* to avoid malloc(0) which might
629                                    return NULL */
630
631     *rcvbuf       =  (char *)CmiAlloc(alloc_size);
632     cb->function  =   recv_done;
633     cb->clientdata = *rcvbuf;
634
635     return (BGTsRC_t *) ALIGN_16 (*rcvbuf + sndlen);
636
637 }
638
639
640 extern void bgl_machine_RectBcast (unsigned                 commid,
641                                    const char             * sndbuf,
642                                    unsigned                 sndlen) {
643     RectBcastInfo *rinfo  =   (RectBcastInfo *) malloc (sizeof(RectBcastInfo));
644     rinfo->cb.function    =   bcast_done;
645     rinfo->cb.clientdata  =   rinfo;
646
647     BGTsRC_AsyncBcast_start (commid, &rinfo->request, &rinfo->cb, sndbuf, sndlen);
648
649 }
650
651 extern void        bgl_machine_RectBcastInit  (unsigned               commID,
652         const BGTsRC_Geometry_t* geometry) {
653
654     CmiAssert (commID < 256);
655     CmiAssert (comm_table [commID] == NULL);
656
657     BGTsRC_t *request =  (BGTsRC_t *) malloc (sizeof (BGTsRC_t));
658     comm_table [commID] = request;
659
660     BGTsRC_AsyncBcast_init  (request, commID,  geometry);
661 }
662
663 /*--------------------------------------------------------------
664  *----- End Rectangular Broadcast Implementation ---------------
665  *--------------------------------------------------------------*/
666 #endif
667
668
669 /*######End of functions related with Communication-Op functions ######*/
670
671
672 /* ######Beginning of functions related with communication progress ###### */
673 static INLINE_KEYWORD void AdvanceCommunicationForDCMF() {
674 #if CMK_SMP
675     DCMF_CriticalSection_enter (0);
676 #endif
677
678     while (DCMF_Messager_advance()>0);
679     //DCMF_Messager_advance();
680
681 #if CMK_SMP
682     DCMF_CriticalSection_exit (0);
683 #endif
684 }
685 /* ######End of functions related with communication progress ###### */
686
687 /* Network progress function is used to poll the network when for
688    messages. This flushes receive buffers on some  implementations*/
689 #if CMK_MACHINE_PROGRESS_DEFINED
690 void CmiMachineProgressImpl() {
691     AdvanceCommunicationForDCMF();
692 #if CMK_IMMEDIATE_MSG
693     CmiHandleImmediate();
694 #endif
695 }
696 #endif
697
698 /* ######Beginning of functions related with exiting programs###### */
699 static void DrainResourcesForDCMF() {
700     while (msgQueueLen > 0 || outstanding_recvs > 0) {
701         AdvanceCommunicationForDCMF();
702     }
703 }
704
705 static void MachineExitForDCMF() {
706     DCMF_Messager_finalize();
707     exit(EXIT_SUCCESS);
708 }
709 /* ######End of functions related with exiting programs###### */
710
711
712 /* ######Beginning of functions related with starting programs###### */
713 /**
714  *  Obtain the number of nodes, my node id, and consuming machine layer
715  *  specific arguments
716  */
717 static void MachineInitForDCMF(int argc, char **argv, int *numNodes, int *myNodeID) {
718
719     DCMF_Messager_initialize();
720
721 #if CMK_SMP
722     DCMF_Configure_t  config_in, config_out;
723     config_in.thread_level= DCMF_THREAD_MULTIPLE;
724     config_in.interrupts  = DCMF_INTERRUPTS_OFF;
725
726     DCMF_Messager_configure(&config_in, &config_out);
727     //assert (config_out.thread_level == DCMF_THREAD_MULTIPLE); //not supported in vn mode
728 #endif
729
730     DCMF_Send_Configuration_t short_config, eager_config, rzv_config;
731
732
733     short_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
734     short_config.cb_recv_short = short_pkt_recv;
735     short_config.cb_recv       = first_pkt_recv_done;
736
737 #if (DCMF_VERSION_MAJOR >= 3)
738     short_config.network  = DCMF_DEFAULT_NETWORK;
739 #elif (DCMF_VERSION_MAJOR == 2)
740     short_config.network  = DCMF_DefaultNetwork;
741 #endif
742
743     eager_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
744     eager_config.cb_recv_short = short_pkt_recv;
745     eager_config.cb_recv       = first_pkt_recv_done;
746 #if (DCMF_VERSION_MAJOR >= 3)
747     eager_config.network  = DCMF_DEFAULT_NETWORK;
748 #elif (DCMF_VERSION_MAJOR == 2)
749     eager_config.network  = DCMF_DefaultNetwork;
750 #endif
751
752 #ifdef  OPT_RZV
753 #warning "Enabling Optimize Rzv"
754     rzv_config.protocol        = DCMF_RZV_SEND_PROTOCOL;
755 #else
756     rzv_config.protocol        = DCMF_DEFAULT_SEND_PROTOCOL;
757 #endif
758     rzv_config.cb_recv_short   = short_pkt_recv;
759     rzv_config.cb_recv         = first_pkt_recv_done;
760 #if (DCMF_VERSION_MAJOR >= 3)
761     rzv_config.network  = DCMF_DEFAULT_NETWORK;
762 #elif (DCMF_VERSION_MAJOR == 2)
763     rzv_config.network  = DCMF_DefaultNetwork;
764 #endif
765
766     DCMF_Send_register (&cmi_dcmf_short_registration, &short_config);
767     DCMF_Send_register (&cmi_dcmf_eager_registration, &eager_config);
768     DCMF_Send_register (&cmi_dcmf_rzv_registration,   &rzv_config);
769
770 #ifdef BGP_USE_AM_DIRECT
771     DCMF_Send_Configuration_t direct_config;
772     direct_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
773     direct_config.cb_recv_short = direct_short_pkt_recv;
774     direct_config.cb_recv       = direct_first_pkt_recv_done;
775 #if (DCMF_VERSION_MAJOR >= 3)
776     direct_config.network  = DCMF_DEFAULT_NETWORK;
777 #elif (DCMF_VERSION_MAJOR == 2)
778     direct_config.network  = DCMF_DefaultNetwork;
779 #endif
780     DCMF_Send_register (&cmi_dcmf_direct_registration,   &direct_config);
781     directcb.function=direct_send_done_cb;
782     directcb.clientdata=NULL;
783 #endif
784
785 #ifdef BGP_USE_RDMA_DIRECT
786     /* notification protocol */
787     DCMF_Send_Configuration_t direct_rdma_config;
788     direct_rdma_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
789     direct_rdma_config.cb_recv_short = direct_short_rdma_pkt_recv;
790     direct_rdma_config.cb_recv       = direct_first_rdma_pkt_recv_done;
791 #if (DCMF_VERSION_MAJOR >= 3)
792     direct_rdma_config.network  = DCMF_DEFAULT_NETWORK;
793 #elif (DCMF_VERSION_MAJOR == 2)
794     direct_rdma_config.network  = DCMF_DefaultNetwork;
795 #endif
796     DCMF_Send_register (&cmi_dcmf_direct_rdma_registration,   &direct_rdma_config);
797     directcb.function=direct_send_rdma_done_cb;
798     directcb.clientdata=NULL;
799     /* put protocol */
800     DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL };
801     DCMF_Put_register (&cmi_dcmf_direct_put_registration, &put_configuration);
802     DCMF_Get_Configuration_t get_configuration = { DCMF_DEFAULT_GET_PROTOCOL };
803     DCMF_Get_register (&cmi_dcmf_direct_get_registration, &get_configuration);
804
805 #endif
806     //fprintf(stderr, "Initializing Eager Protocol\n");
807
808     *numNodes = DCMF_Messager_size();
809     *myNodeID = DCMF_Messager_rank();
810
811     CmiBarrier();
812     CmiBarrier();
813     CmiBarrier();
814
815     /* NOTE: the following codes requires #PEs, which is not available
816      * until this function finishes. And it allocate O(p) space */
817     int totalPEs = _Cmi_mynodesize * (*numNodes);
818     DCMF_Multicast_Configuration_t mconfig;
819     mconfig.protocol = DCMF_MEMFIFO_DMA_MSEND_PROTOCOL;
820     mconfig.cb_recv  = first_multi_pkt_recv_done;
821     mconfig.clientdata = NULL;
822     mconfig.connectionlist = (void **) malloc (totalPEs * sizeof(unsigned long));
823     mconfig.nconnections = totalPEs;
824     DCMF_Multicast_register(&cmi_dcmf_multicast_registration, &mconfig);
825
826     int actualNodeSize = _Cmi_mynodesize;
827 #if !CMK_SMP_NO_COMMTHD
828     actualNodeSize++; //considering the extra comm thread
829 #endif
830
831     procState = (ProcState *)CmiAlloc((actualNodeSize) * sizeof(ProcState));
832     for (int i=0; i<actualNodeSize; i++) {
833         /*    procState[i].sendMsgBuf = PCQueueCreate();   */
834         procState[i].recvLock = CmiCreateLock();
835         procState[i].bcastLock = CmiCreateLock();
836     }
837
838     /* checksum flag */
839     if (CmiGetArgFlag(argv,"+checksum")) {
840 #if CMK_ERROR_CHECKING
841         checksum_flag = 1;
842         if (*myNodeID == 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
843 #else
844         if (*myNodeID == 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
845 #endif
846     }
847
848 }
849
850 static void MachinePreCommonInitForDCMF(int everReturn) {
851     CpvInitialize(PCQueue, smsg_list_q);
852     CpvAccess(smsg_list_q) = PCQueueCreate();
853 }
854
855 static void MachinePostCommonInitForDCMF(int everReturn) {
856 #if !CMK_SMP || CMK_SMP_NO_COMMTHD
857     CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)CmiNotifyIdle,NULL);
858 #endif
859
860     CmiBarrier();
861 }
862 /* ######End of functions related with starting programs###### */
863
864 /***********************************************************************
865  *
866  * Abort function:
867  *
868  ************************************************************************/
869
870 void CmiAbort(const char *message) {
871     CmiError("------------- Processor %d Exiting: Called CmiAbort ------------\n"
872              "{snd:%d,rcv:%d} Reason: %s\n",CmiMyPe(),
873              msgQueueLen, outstanding_recvs, message);
874
875 #if 0
876     /* Since it's a abort, why bother to drain the resources? The system
877      * should clean it self
878      */
879     /* FIXME: what happens in the SMP mode??? */
880     DrainResourcesForDCMF();
881 #endif
882     assert(0);
883 }
884
885
886 /*********** Beginning of MULTICAST/VECTOR SENDING FUNCTIONS **************/
887 /*
888
889  * In relations to some flags, some other delivery functions may be needed.
890  */
891
892 #if !CMK_MULTICAST_LIST_USE_COMMON_CODE
893
894 void CmiSyncListSendFn(int npes, int *pes, int size, char *msg) {
895     char *copymsg = CopyMsg(msg, size);
896     CmiFreeListSendFn(npes, pes, size, copymsg);
897 }
898
899 /* Currently disable optimized multicast for non-SMP as it fails
900  * for hybrid ldb in NAMD as reported by Gengbin --Chao Mei
901  */
902 #if !CMK_SMP
903 #define OPTIMIZED_MULTICAST  0
904 #else
905 #define OPTIMIZED_MULTICAST  1
906 #endif
907
908 #if OPTIMIZED_MULTICAST
909 #warning "Using Optimized Multicast"
910 #endif
911
912 void CmiFreeListSendFn(int npes, int *pes, int size, char *msg) {
913     CmiAssert(npes>=1);
914     if (npes==1) {
915         CmiFreeSendFn(pes[0], size, msg);
916         return;
917     }
918
919     //if(CmiMyRank()==CmiMyNodeSize()) printf("CmiFreeListSendFn on comm thd on node %d\n", CmiMyNode());
920     //printf("%d: In Free List Send Fn\n", CmiMyPe());
921
922     int i;
923 #if OPTIMIZED_MULTICAST
924     int *newpelist = pes;
925     int new_npes = npes;
926 #if CMK_SMP
927     newpelist = (int *)malloc(sizeof(int)*npes);
928     new_npes = 0;
929     for (i=0; i<npes; i++) {
930         if (CmiNodeOf(pes[i]) == CmiMyNode()) {
931             CmiSyncSend(pes[i], size, msg);
932         } else {
933             newpelist[new_npes++] = pes[i];
934         }
935     }
936     if (new_npes == 0) {
937         CmiFree(msg);
938         return;
939     }
940 #endif
941
942     CMI_SET_BROADCAST_ROOT(msg,0);
943     CMI_MSG_SIZE(msg) = size;
944 #if CMK_ERROR_CHECKING
945     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
946     CMI_SET_CHECKSUM(msg, size);
947 #endif
948
949     CQdCreate(CpvAccess(cQdState), new_npes);
950     machineMulticast (new_npes, newpelist, size, msg);
951 #else /* non-optimized multicast */
952
953     for (i=0; i<npes-1; i++) {
954 #if !CMK_SMP
955         CmiReference(msg);
956         CmiFreeSendFn(pes[i], size, msg);
957 #else
958     CmiSyncSend(pes[i], size, msg);
959 #endif
960     }
961     CmiFreeSendFn(pes[npes-1], size, msg);
962 #endif /* end of #if OPTIMIZED_MULTICAST */
963 }
964 #endif /* end of #if !CMK_MULTICAST_LIST_USE_COMMON_CODE */
965
966 /*********** End of MULTICAST/VECTOR SENDING FUNCTIONS **************/
967
968 /**************************  TIMER FUNCTIONS **************************/
969
970 /************Barrier Related Functions****************/
971
972
973 #include "manytomany.c"
974
975 /*********************************************************************************************
976 This section is for CmiDirect. This is a variant of the  persistent communication in which
977 the user can transfer data between processors without using Charm++ messages. This lets the user
978 send and receive data from the middle of his arrays without any copying on either send or receive
979 side
980 *********************************************************************************************/
981
982
983 #ifdef BGP_USE_AM_DIRECT
984
985 #include "cmidirect.h"
986
987 /* We can avoid a receiver side lookup by just sending the whole shebang.
988    DCMF header is in units of quad words (16 bytes), so we'd need less than a
989    quad word for the handle if we just sent that and did a lookup. Or exactly
990    2 quad words for the buffer pointer, callback pointer, callback
991    data pointer, and DCMF_Request_t pointer with no lookup.
992
993    Since CmiDirect is generally going to be used for messages which aren't
994    tiny, the extra 16 bytes is not likely to impact performance noticably and
995    not having to lookup handles in tables simplifies the code enormously.
996
997    EJB   2008/4/2
998 */
999
1000
1001 /**
1002  To be called on the receiver to create a handle and return its number
1003 **/
1004 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1005     /* with two-sided primitives we just bundle the buffer and callback info into the handle so the sender can remind us about it later. */
1006     struct infiDirectUserHandle userHandle;
1007     userHandle.handle=1; /* doesn't matter on BG/P*/
1008     userHandle.senderNode=senderNode;
1009     userHandle.recverNode=_Cmi_mynode;
1010     userHandle.recverBufSize=recvBufSize;
1011     userHandle.recverBuf=recvBuf;
1012     userHandle.initialValue=initialValue;
1013     userHandle.callbackFnPtr=callbackFnPtr;
1014     userHandle.callbackData=callbackData;
1015     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1016 #if CMI_DIRECT_DEBUG
1017     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1018 #endif
1019     return userHandle;
1020 }
1021
1022 /****
1023  To be called on the sender to attach the sender's buffer to this handle
1024 ******/
1025
1026 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1027
1028     /* one-sided primitives would require registration of memory */
1029
1030     /* with two-sided primitives we just record the sender buf in the handle */
1031     userHandle->senderBuf=sendBuf;
1032     CmiAssert(sendBufSize==userHandle->recverBufSize);
1033     userHandle->DCMF_rq_tsend = (DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1034 #if CMI_DIRECT_DEBUG
1035     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1036 #endif
1037
1038 }
1039
1040 /****
1041 To be called on the sender to do the actual data transfer
1042 ******/
1043 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1044     /** invoke a DCMF_Send with the direct callback */
1045     DCMF_Protocol_t *protocol = NULL;
1046     protocol = &cmi_dcmf_direct_registration;
1047     /* local copy */
1048     CmiAssert(userHandle->recverBuf!=NULL);
1049     CmiAssert(userHandle->senderBuf!=NULL);
1050     CmiAssert(userHandle->recverBufSize>0);
1051     if (userHandle->recverNode== _Cmi_mynode) {
1052 #if CMI_DIRECT_DEBUG
1053         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1054 #endif
1055
1056         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1057         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1058     } else {
1059         dcmfDirectMsgHeader msgHead;
1060         msgHead.recverBuf=userHandle->recverBuf;
1061         msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1062         msgHead.callbackData=userHandle->callbackData;
1063         msgHead.DCMF_rq_t=(DCMF_Request_t *) userHandle->DCMF_rq_trecv;
1064 #if CMK_SMP
1065         DCMF_CriticalSection_enter (0);
1066 #endif
1067 #if CMI_DIRECT_DEBUG
1068         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1069 #endif
1070         DCMF_Send (protocol,
1071                    (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1072                    directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1073                    userHandle->recverBufSize, userHandle->senderBuf,
1074                    (struct DCQuad *) &(msgHead), 2);
1075
1076 #if CMK_SMP
1077         DCMF_CriticalSection_exit (0);
1078 #endif
1079     }
1080 }
1081
1082 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1083     CmiAbort("Not Implemented, switch to #define BGP_USE_RDMA_DIRECT");
1084 }
1085
1086 /**** up to the user to safely call this */
1087 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1088     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1089 #if CMK_SMP
1090     DCMF_CriticalSection_enter (0);
1091 #endif
1092     CmiFree(userHandle->DCMF_rq_tsend);
1093 #if CMK_SMP
1094     DCMF_CriticalSection_exit (0);
1095 #endif
1096
1097 }
1098
1099 /**** up to the user to safely call this */
1100 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1101     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1102 #if CMK_SMP
1103     DCMF_CriticalSection_enter (0);
1104 #endif
1105     CmiFree(userHandle->DCMF_rq_trecv);
1106
1107 #if CMK_SMP
1108     DCMF_CriticalSection_exit (0);
1109 #endif
1110 }
1111
1112
1113 /**** Should not be called the first time *********/
1114 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1115     /* no op on BGP */
1116 }
1117
1118 /**** Should not be called the first time *********/
1119 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1120     /* no op on BGP */
1121 }
1122
1123 /**** Should not be called the first time *********/
1124 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1125     /* no op on BGP */
1126 }
1127
1128 #endif /* BGP_USE_AM_DIRECT*/
1129
1130 #ifdef BGP_USE_RDMA_DIRECT
1131
1132 #include "cmidirect.h"
1133
1134 /*
1135    Notification protocol passes callback function and data in a single
1136    quadword.  This occurs in a message triggered by the sender side ack
1137    callback and therefore has higher latency than polling, but is guaranteed
1138    to be semantically correct.  The latency for a single packet that isn't
1139    hitting charm/converse should be pretty minimal, but you could run into
1140    sender side progress issues.  The alternative of polling on the out of band
1141    byte scheme creates correctness issues in that the data really has to be
1142    out of band and you rely on the buffer being written in order.  It also has
1143    annoying polling issues.  A third scheme could add a second put to a
1144    control region to poll upon and force sequential consistency between
1145    puts. Its not really clear that this would be faster or avoid the progress
1146    issue since you run into the same issues to enforce that sequential
1147    consistency.
1148
1149    EJB   2011/1/20
1150 */
1151
1152
1153 /* local function to use the ack as our signal to send a remote notify */
1154 static void CmiNotifyRemoteRDMA(void *handle, struct DCMF_Error_t *error) {
1155     struct infiDirectUserHandle *userHandle= (struct infiDirectUserHandle *) handle;
1156     dcmfDirectRDMAMsgHeader msgHead;
1157     msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1158     msgHead.callbackData=userHandle->callbackData;
1159 #if CMK_SMP
1160     DCMF_CriticalSection_enter (0);
1161 #endif
1162 #if CMI_DIRECT_DEBUG
1163     CmiPrintf("[%d] RDMA notify put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p \n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1164 #endif
1165     DCMF_Result res=DCMF_Send (&cmi_dcmf_direct_rdma_registration,
1166                                userHandle->DCMF_rq_tsend,
1167                                directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1168                                sizeof(dcmfDirectRDMAMsgHeader),
1169
1170                                userHandle->DCMF_notify_buf,
1171                                (struct DCQuad *) &(msgHead), 1);
1172 //    CmiAssert(res==DCMF_SUCCESS);
1173 #if CMK_SMP
1174     DCMF_CriticalSection_exit (0);
1175 #endif
1176 }
1177
1178 /**
1179  To be called on the receiver to create a handle and return its number
1180 **/
1181
1182
1183 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1184     /* one-sided primitives require registration of memory */
1185     struct infiDirectUserHandle userHandle;
1186     size_t numbytesRegistered=0;
1187     DCMF_Result regresult=DCMF_Memregion_create( &userHandle.DCMF_recverMemregion,
1188                           &numbytesRegistered,
1189                           recvBufSize,
1190                           recvBuf,
1191                           0);
1192     CmiAssert(numbytesRegistered==recvBufSize);
1193     CmiAssert(regresult==DCMF_SUCCESS);
1194
1195
1196     userHandle.handle=1; /* doesn't matter on BG/P*/
1197     userHandle.senderNode=senderNode;
1198     userHandle.recverNode=_Cmi_mynode;
1199     userHandle.recverBufSize=recvBufSize;
1200     userHandle.recverBuf=recvBuf;
1201     userHandle.initialValue=initialValue;
1202     userHandle.callbackFnPtr=callbackFnPtr;
1203     userHandle.callbackData=callbackData;
1204     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1205 #if CMI_DIRECT_DEBUG
1206     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1207 #endif
1208     return userHandle;
1209 }
1210
1211 /****
1212  To be called on the sender to attach the sender's buffer to this handle
1213 ******/
1214
1215 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1216     /* one-sided primitives would require registration of memory */
1217     userHandle->senderBuf=sendBuf;
1218     CmiAssert(sendBufSize==userHandle->recverBufSize);
1219     userHandle->DCMF_rq_tsend =(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1220     size_t numbytesRegistered=0;  // set as return value from create
1221     userHandle->DCMF_notify_buf=ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+32));
1222     userHandle->DCMF_notify_cb.function=CmiNotifyRemoteRDMA;
1223     userHandle->DCMF_notify_cb.clientdata=userHandle;
1224     DCMF_Result regresult=DCMF_Memregion_create( &userHandle->DCMF_senderMemregion,
1225                           &numbytesRegistered,
1226                           sendBufSize,
1227                           sendBuf,
1228                           0);
1229     CmiAssert(numbytesRegistered==sendBufSize);
1230     CmiAssert(regresult==DCMF_SUCCESS);
1231
1232 #if CMI_DIRECT_DEBUG
1233     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1234 #endif
1235
1236 }
1237
1238
1239 /****
1240 To be called on the sender to do the actual data transfer
1241 ******/
1242 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1243     /** invoke a DCMF_Put with the direct callback */
1244
1245     CmiAssert(userHandle->recverBuf!=NULL);
1246     CmiAssert(userHandle->senderBuf!=NULL);
1247     CmiAssert(userHandle->recverBufSize>0);
1248     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1249 #if CMI_DIRECT_DEBUG
1250         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1251 #endif
1252
1253         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1254         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1255     } else {
1256 #if CMK_SMP
1257         DCMF_CriticalSection_enter (0);
1258 #endif
1259 #if CMI_DIRECT_DEBUG
1260         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1261 #endif
1262         DCMF_Result
1263         Res= DCMF_Put(&cmi_dcmf_direct_put_registration,
1264                       userHandle->DCMF_rq_tsend,
1265                       directcb, DCMF_RELAXED_CONSISTENCY,
1266                       userHandle->recverNode,
1267                       userHandle->recverBufSize,
1268                       &userHandle->DCMF_senderMemregion,
1269                       &userHandle->DCMF_recverMemregion,
1270                       0, /* offsets are zero */
1271                       0,
1272                       userHandle->DCMF_notify_cb
1273                      );
1274         CmiAssert(Res==DCMF_SUCCESS);
1275 #if CMK_SMP
1276         DCMF_CriticalSection_exit (0);
1277 #endif
1278     }
1279 }
1280
1281 /****
1282 To be called on the receiver to initiate the actual data transfer
1283 ******/
1284 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1285     /** invoke a DCMF_Get with the direct callback */
1286
1287     CmiAssert(userHandle->recverBuf!=NULL);
1288     CmiAssert(userHandle->senderBuf!=NULL);
1289     CmiAssert(userHandle->recverBufSize>0);
1290     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1291 #if CMI_DIRECT_DEBUG
1292         CmiPrintf("[%d] RDMA local get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1293 #endif
1294
1295         CmiMemcpy(userHandle->senderBuf,userHandle->recverBuf,userHandle->recverBufSize);
1296         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1297     } else {
1298         struct DCMF_Callback_t done_cb;
1299         done_cb.function=userHandle->callbackFnPtr;
1300         done_cb.clientdata=userHandle->callbackData;
1301 #if CMK_SMP
1302         DCMF_CriticalSection_enter (0);
1303 #endif
1304 #if CMI_DIRECT_DEBUG
1305         CmiPrintf("[%d] RDMA get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1306 #endif
1307         DCMF_Result
1308         Res= DCMF_Get(&cmi_dcmf_direct_get_registration,
1309                       (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1310                       done_cb, DCMF_RELAXED_CONSISTENCY,
1311                       userHandle->recverNode,
1312                       userHandle->recverBufSize,
1313                       & userHandle->DCMF_recverMemregion,
1314                       & userHandle->DCMF_senderMemregion,
1315                       0, /* offsets are zero */
1316                       0
1317                      );
1318         CmiAssert(Res==DCMF_SUCCESS);
1319
1320
1321 #if CMK_SMP
1322         DCMF_CriticalSection_exit (0);
1323 #endif
1324     }
1325 }
1326
1327 /**** up to the user to safely call this */
1328 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1329     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1330 #if CMK_SMP
1331     DCMF_CriticalSection_enter (0);
1332 #endif
1333
1334     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_senderMemregion);
1335     CmiFree(userHandle->DCMF_notify_buf);
1336     CmiFree(userHandle->DCMF_rq_tsend);
1337 #if CMK_SMP
1338     DCMF_CriticalSection_exit (0);
1339 #endif
1340
1341 }
1342
1343 /**** up to the user to safely call this */
1344 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1345     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1346 #if CMK_SMP
1347     DCMF_CriticalSection_enter (0);
1348 #endif
1349
1350     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_recverMemregion);
1351     CmiFree(userHandle->DCMF_rq_trecv);
1352
1353 #if CMK_SMP
1354     DCMF_CriticalSection_exit (0);
1355 #endif
1356 }
1357
1358
1359
1360 /**** Should not be called the first time *********/
1361 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1362     /* no op on BGP */
1363 }
1364
1365 /**** Should not be called the first time *********/
1366 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1367     /* no op on BGP */
1368 }
1369
1370 /**** Should not be called the first time *********/
1371 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1372     /* no op on BGP */
1373 }
1374
1375 #endif /* BGP_USE_RDMA_DIRECT*/
1376
1377 /*@}*/
1378