added empty implementation for CmiBarrier and CmiBarrierZero as in the old one
[charm.git] / src / arch / bluegenep / machine.c
1 #include <stdio.h>
2 #include <errno.h>
3 #include <stdlib.h>
4 #include <unistd.h>
5 #include <math.h>
6 #include <string.h>
7 #include <malloc.h>
8 #include <assert.h>
9
10 #include "converse.h"
11 #include "machine.h"
12 #include "pcqueue.h"
13
14 #include <bpcore/ppc450_inlines.h>
15 #include "dcmf.h"
16 #include "dcmf_multisend.h"
17
18 /* =======Beginning of Definitions of Performance-Specific Macros =======*/
19 /* =======End of Definitions of Performance-Specific Macros =======*/
20
21 /* =======Beginning of Definitions of Msg Header Specific Macros =======*/
22 /* =======End of Definitions of Msg Header Specific Macros =======*/
23
24 /* =====Beginning of Definitions of Message-Corruption Related Macros=====*/
25 #define CMI_MAGIC(msg)                   ((CmiMsgHeaderBasic *)msg)->magic
26 #define CHARM_MAGIC_NUMBER               126
27
28 #if CMK_ERROR_CHECKING
29 static int checksum_flag = 0;
30 extern unsigned char computeCheckSum(unsigned char *data, int len);
31
32 #define CMI_SET_CHECKSUM(msg, len)      \
33         if (checksum_flag)  {   \
34           ((CmiMsgHeaderBasic *)msg)->cksum = 0;        \
35           ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len);        \
36         }
37
38 #define CMI_CHECK_CHECKSUM(msg, len)    \
39         if (checksum_flag)      \
40           if (computeCheckSum((unsigned char*)msg, len) != 0)  { \
41             printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
42             { \
43             int count; \
44             for(count = 0; count < len; count++) { \
45                 printf("%2x", msg[count]);                 \
46             } \
47             }                                             \
48             printf("------------------------------\n\n"); \
49             CmiAbort("Fatal error: checksum doesn't agree!\n"); \
50           }
51 #else
52 #define CMI_SET_CHECKSUM(msg, len)
53 #define CMI_CHECK_CHECKSUM(msg, len)
54 #endif
55 /* =====End of Definitions of Message-Corruption Related Macros=====*/
56
57
58 /* =====Beginning of Declarations of Machine Specific Variables===== */
59 typedef struct ProcState {
60     /* PCQueue      sendMsgBuf; */      /* per processor message sending queue */
61     CmiNodeLock  recvLock;              /* for cs->recv */
62     CmiNodeLock bcastLock;
63 } ProcState;
64
65 static ProcState  *procState;
66
67 volatile int msgQueueLen;
68 volatile int outstanding_recvs;
69
70 DCMF_Protocol_t  cmi_dcmf_short_registration __attribute__((__aligned__(16)));
71 DCMF_Protocol_t  cmi_dcmf_eager_registration __attribute__((__aligned__(16)));
72 DCMF_Protocol_t  cmi_dcmf_rzv_registration   __attribute__((__aligned__(16)));
73 DCMF_Protocol_t  cmi_dcmf_multicast_registration   __attribute__((__aligned__(16)));
74
75
76 typedef struct msg_list {
77     char              * msg;
78 //    int                 size;
79 //    int                 destpe;
80     int               * pelist;
81 //    DCMF_Callback_t     cb;
82 //    DCQuad              info __attribute__((__aligned__(16)));
83     DCMF_Request_t      send __attribute__((__aligned__(16)));
84 } SMSG_LIST __attribute__((__aligned__(16)));
85
86 #define MAX_NUM_SMSGS   64
87 CpvDeclare(PCQueue, smsg_list_q);
88 static SMSG_LIST * smsg_allocate();
89 static void smsg_free (SMSG_LIST *smsg);
90
91 /* =====End of Declarations of Machine Specific Variables===== */
92
93
94 /* =====Beginning of Declarations of Machine Specific Functions===== */
95 /* Utility functions */
96 char *ALIGN_16(char *p) {
97     return((char *)((((unsigned long)p)+0xf)&0xfffffff0));
98 }
99
100 void mysleep (int cycles) { /* approximate sleep command */
101     unsigned long long start = DCMF_Timebase();
102     unsigned long long end = start + cycles;
103     while (start < end)
104         start = DCMF_Timebase();
105     return;
106 }
107 static void SendMsgsUntil(int);
108
109 /* ######Begining of Machine-specific RDMA related functions###### */
110 #define BGP_USE_AM_DIRECT 1
111 /* #define BGP_USE_RDMA_DIRECT 1 */
112 /* #define CMI_DIRECT_DEBUG 1 */
113 #if BGP_USE_AM_DIRECT
114
115 DCMF_Protocol_t  cmi_dcmf_direct_registration __attribute__((__aligned__(16)));
116 /** The receive side of a put implemented in DCMF_Send */
117
118 typedef struct {
119     void *recverBuf;
120     void (*callbackFnPtr)(void *);
121     void *callbackData;
122     DCMF_Request_t *DCMF_rq_t;
123 } dcmfDirectMsgHeader;
124
125 /* nothing for us to do here */
126 #if (DCMF_VERSION_MAJOR >= 2)
127 void direct_send_done_cb(void*nothing, DCMF_Error_t *err)
128 #else
129 void direct_send_done_cb(void*nothing)
130 #endif
131 {
132 #if CMI_DIRECT_DEBUG
133     CmiPrintf("[%d] RDMA send_done_cb\n", CmiMyPe());
134 #endif
135 }
136
137 DCMF_Callback_t  directcb;
138
139 void     direct_short_pkt_recv (void             * clientdata,
140                                 const DCQuad     * info,
141                                 unsigned           count,
142                                 unsigned           senderrank,
143                                 const char       * buffer,
144                                 const unsigned     sndlen) {
145 #if CMI_DIRECT_DEBUG
146     CmiPrintf("[%d] RDMA direct_short_pkt_recv\n", CmiMyPe());
147 #endif
148     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
149     CmiMemcpy(msgHead->recverBuf, buffer, sndlen);
150     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
151 }
152
153
154 #if (DCMF_VERSION_MAJOR >= 2)
155 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
156 #else
157 typedef void (*cbhdlr) (void *);
158 #endif
159
160 DCMF_Request_t * direct_first_pkt_recv_done (void              * clientdata,
161         const DCQuad      * info,
162         unsigned            count,
163         unsigned            senderrank,
164         const unsigned      sndlen,
165         unsigned          * rcvlen,
166         char             ** buffer,
167         DCMF_Callback_t   * cb
168                                             ) {
169 #if CMI_DIRECT_DEBUG
170     CmiPrintf("[%d] RDMA direct_first_pkt_recv_done\n", CmiMyPe());
171 #endif
172     /* pull the data we need out of the header */
173     *rcvlen=sndlen;
174     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
175     cb->function= (cbhdlr)msgHead->callbackFnPtr;
176     cb->clientdata=msgHead->callbackData;
177     *buffer=msgHead->recverBuf;
178     return msgHead->DCMF_rq_t;
179 }
180 #endif /* end of #if BGP_USE_AM_DIRECT */
181
182 #ifdef BGP_USE_RDMA_DIRECT
183 static struct DCMF_Callback_t dcmf_rdma_cb_ack;
184
185 DCMF_Protocol_t  cmi_dcmf_direct_put_registration __attribute__((__aligned__(16)));
186 DCMF_Protocol_t  cmi_dcmf_direct_get_registration __attribute__((__aligned__(16)));
187 DCMF_Protocol_t  cmi_dcmf_direct_rdma_registration __attribute__((__aligned__(16)));
188 /** The receive side of a DCMF_Put notification implemented in DCMF_Send */
189
190 typedef struct {
191     void (*callbackFnPtr)(void *);
192     void *callbackData;
193 } dcmfDirectRDMAMsgHeader;
194
195 #if (DCMF_VERSION_MAJOR >= 2)
196 void direct_send_rdma_done_cb(void*nothing, DCMF_Error_t *err)
197 #else
198 void direct_send_rdma_done_cb(void*nothing)
199 #endif
200 {
201 #if CMI_DIRECT_DEBUG
202     CmiPrintf("[%d] RDMA send_rdma_done_cb result %d\n", CmiMyPe());
203 #endif
204
205
206 }
207
208 DCMF_Callback_t  directcb;
209
210 void     direct_short_rdma_pkt_recv (void             * clientdata,
211                                      const DCQuad     * info,
212                                      unsigned           count,
213                                      unsigned           senderrank,
214                                      const char       * buffer,
215                                      const unsigned     sndlen) {
216 #if CMI_DIRECT_DEBUG
217     CmiPrintf("[%d] RDMA direct_short_rdma_pkt_recv\n", CmiMyPe());
218 #endif
219     dcmfDirectRDMAMsgHeader *msgHead=  (dcmfDirectRDMAMsgHeader *) info;
220     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
221 }
222
223 #if (DCMF_VERSION_MAJOR >= 2)
224 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
225 #else
226 typedef void (*cbhdlr) (void *);
227 #endif
228
229 DCMF_Request_t * direct_first_rdma_pkt_recv_done (void              * clientdata,
230         const DCQuad      * info,
231         unsigned            count,
232         unsigned            senderrank,
233         const unsigned      sndlen,
234         unsigned          * rcvlen,
235         char             ** buffer,
236         DCMF_Callback_t   * cb
237                                                  ) {
238     CmiAbort("direct_first_rdma_pkt_recv should not be called");
239 }
240 #endif /* end of #if BGP_USE_RDMA_DIRECT */
241 /* ######End of Machine-specific RDMA related functions###### */
242
243
244 /* ### Beginning of Communication-Op Related Functions ### */
245 /* The machine-specific send-related function */
246 #if (DCMF_VERSION_MAJOR >= 2)
247 static void send_done(void *data, DCMF_Error_t *err);
248 static void send_multi_done(void *data, DCMF_Error_t *err);
249 #else
250 static void send_done(void *data);
251 static void send_multi_done(void *data);
252 #endif
253 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode);
254 #define CmiMachineSpecificSendFunc MachineSpecificSendForDCMF
255
256 /* The machine-specific recv-related function (on the receiver side) */
257 #if (DCMF_VERSION_MAJOR >= 2)
258 static void recv_done(void *clientdata, DCMF_Error_t * err);
259 #else
260 static void recv_done(void *clientdata);
261 #endif
262 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
263         unsigned            count,
264         unsigned            senderrank,
265         const unsigned      sndlen,
266         unsigned            connid,
267         void              * clientdata,
268         unsigned          * rcvlen,
269         char             ** buffer,
270         unsigned          * pw,
271         DCMF_Callback_t   * cb
272                                            );
273 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
274                                       const DCQuad      * info,
275                                       unsigned            count,
276                                       unsigned            senderrank,
277                                       const unsigned      sndlen,
278                                       unsigned          * rcvlen,
279                                       char             ** buffer,
280                                       DCMF_Callback_t   * cb
281                                      );
282
283 /* ### End of Communication-Op Related Functions ### */
284
285 /* ### Beginning of Machine-startup Related Functions ### */
286 static void MachineInitForDCMF(int argc, char **argv, int *numNodes, int *myNodeID);
287 #define MachineSpecificInit MachineInitForDCMF
288
289 static void MachinePreCommonInitForDCMF(int everReturn);
290 static void MachinePostCommonInitForDCMF(int everReturn);
291 #define MachineSpecificPreCommonInit MachinePreCommonInitForDCMF
292 #define MachineSpecificPostCommonInit MachinePostCommonInitForDCMF
293 /* ### End of Machine-startup Related Functions ### */
294
295 /* ### Beginning of Machine-running Related Functions ### */
296 static void AdvanceCommunicationForDCMF();
297 #define MachineSpecificAdvanceCommunication AdvanceCommunicationForDCMF
298
299 static void DrainResourcesForDCMF();
300 #define MachineSpecificDrainResources AdvanceCommunicationForDCMF
301
302 static void MachineExitForDCMF();
303 #define MachineSpecificExit AdvanceCommunicationForDCMF
304
305 /* ### End of Machine-running Related Functions ### */
306
307 /* ### Beginning of Idle-state Related Functions ### */
308
309 /* ### End of Idle-state Related Functions ### */
310
311 /* =====End of Declarations of Machine Specific Functions===== */
312
313 /**
314  *  Macros that overwrites the common codes, such as
315  *  CMK_SMP_NO_COMMTHD, NETWORK_PROGRESS_PERIOD_DEFAULT,
316  *  USE_COMMON_SYNC_P2P, CMK_HAS_SIZE_IN_MSGHDR,
317  *  CMK_OFFLOAD_BCAST_PROCESS etc.
318  */
319 #define CMK_OFFLOAD_BCAST_PROCESS 1
320 #include "machine-common.c"
321
322 /*######Beginning of functions related with Communication-Op functions ######*/
323
324 /* Utility functions */
325 static inline SMSG_LIST * smsg_allocate() {
326     SMSG_LIST *smsg = (SMSG_LIST *)PCQueuePop(CpvAccess(smsg_list_q));
327     if (smsg != NULL)
328         return smsg;
329
330     void * buf = malloc(sizeof(SMSG_LIST));
331     assert(buf!=NULL);
332     assert (((unsigned)buf & 0x0f) == 0);
333
334     return (SMSG_LIST *) buf;
335 }
336
337 static inline void smsg_free (SMSG_LIST *smsg) {
338     int size = PCQueueLength (CpvAccess(smsg_list_q));
339     if (size < MAX_NUM_SMSGS)
340         PCQueuePush (CpvAccess(smsg_list_q), (char *) smsg);
341     else
342         free (smsg);
343 }
344
345 static void SendMsgsUntil(int targetm) {
346     while (msgQueueLen>targetm) {
347 #if CMK_SMP
348         DCMF_CriticalSection_enter (0);
349 #endif
350
351         while (DCMF_Messager_advance()>0);
352
353 #if CMK_SMP
354         DCMF_CriticalSection_exit (0);
355 #endif
356     }
357 }
358
359 /* Send functions */
360 /* The callback on sender side */
361 #if (DCMF_VERSION_MAJOR >= 2)
362 static void send_done(void *data, DCMF_Error_t *err)
363 #else
364 static void send_done(void *data)
365 #endif
366 /* send done callback: sets the smsg entry to done */
367 {
368     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
369     CmiFree(msg_tmp->msg);
370     smsg_free (msg_tmp);
371     msgQueueLen--;
372 }
373
374 #if (DCMF_VERSION_MAJOR >= 2)
375 static void send_multi_done(void *data, DCMF_Error_t *err)
376 #else
377 static void send_multi_done(void *data)
378 #endif
379 /* send done callback: sets the smsg entry to done */
380 {
381     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
382     CmiFree(msg_tmp->msg);
383     free(msg_tmp->pelist);
384     smsg_free(msg_tmp);
385     msgQueueLen--;
386 }
387
388 /* The machine specific send function */
389 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode) {
390     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
391     //msg_tmp->destpe = destNode;
392     //msg_tmp->size = size;
393     msg_tmp->msg = msg;
394
395     DCMF_Callback_t cb;
396     DCQuad info;
397
398     cb.function = send_done;
399     cb.clientdata = msg_tmp;
400
401
402 #if CMK_ERROR_CHECKING
403     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
404     CMI_SET_CHECKSUM(msg, size);
405 #endif
406     CMI_MSG_SIZE(msg) = size;
407
408     //msg_tmp->cb.function = send_done;
409     //msg_tmp->cb.clientdata   =   msg_tmp;
410
411     DCMF_Protocol_t *protocol = NULL;
412
413     if (size < 224)
414         protocol = &cmi_dcmf_short_registration;
415     else if (size < 2048)
416         protocol = &cmi_dcmf_eager_registration;
417     else
418         protocol = &cmi_dcmf_rzv_registration;
419
420 #if CMK_SMP
421     DCMF_CriticalSection_enter (0);
422 #endif
423
424     msgQueueLen ++;
425     /*
426      * Original one:
427      *     DCMF_Send (protocol, &msg_tmp->send, msg_tmp->cb,
428                    DCMF_MATCH_CONSISTENCY, msg_tmp->destpe,
429                    msg_tmp->size, msg_tmp->msg, &msg_tmp->info, 1);
430            Ref:http://dcmf.anl-external.org/docs/mpi:dcmfd/group__SEND.html
431      */
432     DCMF_Send (protocol, &msg_tmp->send, cb, DCMF_MATCH_CONSISTENCY,
433                destNode, size, msg, &info, 0);
434
435 #if CMK_SMP
436     DCMF_CriticalSection_exit (0);
437 #endif
438
439     return 0;
440 }
441
442 #define MAX_MULTICAST 128
443 DCMF_Opcode_t  CmiOpcodeList [MAX_MULTICAST];
444
445 void  machineMulticast(int npes, int *pelist, int size, char* msg) {
446     CQdCreate(CpvAccess(cQdState), npes);
447
448     CmiAssert (npes < MAX_MULTICAST);
449
450 #if CMK_ERROR_CHECKING
451     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
452     CMI_SET_CHECKSUM(msg, size);
453 #endif
454
455     CMI_MSG_SIZE(msg) = size;
456     CMI_SET_BROADCAST_ROOT(msg,0);
457
458     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
459
460     //msg_tmp->destpe    = -1;      //multicast operation
461     //msg_tmp->size      = size * npes; //keep track of #bytes outstanding
462     msg_tmp->msg       = msg;
463     msg_tmp->pelist    = pelist;
464
465     DCMF_Multicast_t  mcast_info __attribute__((__aligned__(16)));
466     DCQuad info;
467
468     mcast_info.registration   = & cmi_dcmf_multicast_registration;
469     mcast_info.request        = & msg_tmp->send;
470     mcast_info.cb_done.function    =   send_multi_done;
471     mcast_info.cb_done.clientdata  =   msg_tmp;
472     mcast_info.consistency    =   DCMF_MATCH_CONSISTENCY;
473     mcast_info.connection_id  =   CmiMyPe();
474     mcast_info.bytes          =   size;
475     mcast_info.src            =   msg;
476     mcast_info.nranks         =   npes;
477     mcast_info.ranks          =   (unsigned *)pelist;
478     mcast_info.opcodes        =   CmiOpcodeList;   //static list of MAX_MULTICAST entires with 0 in them
479     mcast_info.flags          =   0;
480     mcast_info.msginfo        =   &info;
481     //mcast_info.count          =   1;
482     mcast_info.count          =   0;
483
484 #if CMK_SMP
485     DCMF_CriticalSection_enter (0);
486 #endif
487     msgQueueLen++;
488     DCMF_Multicast (&mcast_info);
489
490 #if CMK_SMP
491     DCMF_CriticalSection_exit (0);
492 #endif
493 }
494
495 /* Recv functions */
496 /* The callback on the recv side */
497 #if (DCMF_VERSION_MAJOR >= 2)
498 static void recv_done(void *clientdata, DCMF_Error_t * err)
499 #else
500 static void recv_done(void *clientdata)
501 #endif
502 /* recv done callback: push the recved msg to recv queue */
503 {
504
505     char *msg = (char *) clientdata;
506
507     /*printf ("NODE[%d] Recv message done with msg rank %d\n", CmiMyNode(), CMI_DEST_RANK(msg));*/
508     MACHSTATE3(2,"[%d] recv_done begin with msg %p size=%d { ", CmiMyNode(), msg, CMI_MSG_SIZE(msg));
509 #if CMK_ERROR_CHECKING
510     int sndlen = CMI_MSG_SIZE(msg);
511     CMI_CHECK_CHECKSUM(msg, sndlen);
512     if (CMI_MAGIC(msg) != CHARM_MAGIC_NUMBER) { /* received a non-charm msg */
513         CmiAbort("Charm++ Warning: Non Charm++ Message Received. \n");
514         return;
515     }
516 #endif
517
518     handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);
519
520     outstanding_recvs--;
521     MACHSTATE(2,"} recv_done end ");
522     return;
523 }
524
525 void short_pkt_recv (void             * clientdata,
526                      const DCQuad     * info,
527                      unsigned           count,
528                      unsigned           senderrank,
529                      const char       * buffer,
530                      const unsigned     sndlen) {
531     outstanding_recvs ++;
532     int alloc_size = sndlen;
533
534     char * new_buffer = (char *)CmiAlloc(alloc_size);
535     CmiMemcpy (new_buffer, buffer, sndlen);
536
537 #if (DCMF_VERSION_MAJOR >= 2)
538     recv_done (new_buffer, NULL);
539 #else
540     recv_done (new_buffer);
541 #endif
542 }
543
544 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
545         unsigned            count,
546         unsigned            senderrank,
547         const unsigned      sndlen,
548         unsigned            connid,
549         void              * clientdata,
550         unsigned          * rcvlen,
551         char             ** buffer,
552         unsigned          * pw,
553         DCMF_Callback_t   * cb
554                                            ) {
555     outstanding_recvs ++;
556     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
557     /*printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
558     /* printf ("Receiving %d bytes\n", sndlen); */
559     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
560
561     *buffer = (char *)CmiAlloc(alloc_size);
562     cb->function = recv_done;
563     cb->clientdata = *buffer;
564
565     *pw  = 0x7fffffff;
566     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
567 }
568
569 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
570                                       const DCQuad      * info,
571                                       unsigned            count,
572                                       unsigned            senderrank,
573                                       const unsigned      sndlen,
574                                       unsigned          * rcvlen,
575                                       char             ** buffer,
576                                       DCMF_Callback_t   * cb
577                                      ) {
578     outstanding_recvs ++;
579     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
580     /* printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
581     /* printf ("Receiving %d bytes\n", sndlen); */
582     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
583
584     *buffer = (char *)CmiAlloc(alloc_size);
585     cb->function = recv_done;
586     cb->clientdata = *buffer;
587
588     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
589 }
590
591 #if 0
592 /* -----------------------------------------
593  * Rectangular broadcast implementation
594  * -----------------------------------------
595  */
596 unsigned int *ranklist;
597 BGTsC_t        barrier;
598 #define MAX_COMM  256
599 static void * comm_table [MAX_COMM];
600
601 typedef struct rectbcast_msg {
602     BGTsRC_t           request;
603     DCMF_Callback_t    cb;
604     char              *msg;
605 } RectBcastInfo;
606
607
608 static void bcast_done (void *data) {
609     RectBcastInfo *rinfo = (RectBcastInfo *) data;
610     CmiFree (rinfo->msg);
611     free (rinfo);
612 }
613
614 static  void *   getRectBcastRequest (unsigned comm) {
615     return comm_table [comm];
616 }
617
618
619 static  void *  bcast_recv     (unsigned               root,
620                                 unsigned               comm,
621                                 const unsigned         sndlen,
622                                 unsigned             * rcvlen,
623                                 char                ** rcvbuf,
624                                 DCMF_Callback_t      * const cb) {
625
626     int alloc_size = sndlen + sizeof(BGTsRC_t) + 16;
627
628     *rcvlen = sndlen;  /* to avoid malloc(0) which might
629                                    return NULL */
630
631     *rcvbuf       =  (char *)CmiAlloc(alloc_size);
632     cb->function  =   recv_done;
633     cb->clientdata = *rcvbuf;
634
635     return (BGTsRC_t *) ALIGN_16 (*rcvbuf + sndlen);
636
637 }
638
639
640 extern void bgl_machine_RectBcast (unsigned                 commid,
641                                    const char             * sndbuf,
642                                    unsigned                 sndlen) {
643     RectBcastInfo *rinfo  =   (RectBcastInfo *) malloc (sizeof(RectBcastInfo));
644     rinfo->cb.function    =   bcast_done;
645     rinfo->cb.clientdata  =   rinfo;
646
647     BGTsRC_AsyncBcast_start (commid, &rinfo->request, &rinfo->cb, sndbuf, sndlen);
648
649 }
650
651 extern void        bgl_machine_RectBcastInit  (unsigned               commID,
652         const BGTsRC_Geometry_t* geometry) {
653
654     CmiAssert (commID < 256);
655     CmiAssert (comm_table [commID] == NULL);
656
657     BGTsRC_t *request =  (BGTsRC_t *) malloc (sizeof (BGTsRC_t));
658     comm_table [commID] = request;
659
660     BGTsRC_AsyncBcast_init  (request, commID,  geometry);
661 }
662
663 /*--------------------------------------------------------------
664  *----- End Rectangular Broadcast Implementation ---------------
665  *--------------------------------------------------------------*/
666 #endif
667
668
669 /*######End of functions related with Communication-Op functions ######*/
670
671
672 /* ######Beginning of functions related with communication progress ###### */
673 static INLINE_KEYWORD void AdvanceCommunicationForDCMF() {
674 #if CMK_SMP
675     DCMF_CriticalSection_enter (0);
676 #endif
677
678     while (DCMF_Messager_advance()>0);
679     //DCMF_Messager_advance();
680
681 #if CMK_SMP
682     DCMF_CriticalSection_exit (0);
683 #endif
684 }
685 /* ######End of functions related with communication progress ###### */
686
687 /* Network progress function is used to poll the network when for
688    messages. This flushes receive buffers on some  implementations*/
689 #if CMK_MACHINE_PROGRESS_DEFINED
690 void CmiMachineProgressImpl() {
691     AdvanceCommunicationForDCMF();
692 #if CMK_IMMEDIATE_MSG
693     CmiHandleImmediate();
694 #endif
695 }
696 #endif
697
698 /* ######Beginning of functions related with exiting programs###### */
699 static void DrainResourcesForDCMF() {
700     while (msgQueueLen > 0 || outstanding_recvs > 0) {
701         AdvanceCommunicationForDCMF();
702     }
703 }
704
705 static void MachineExitForDCMF() {
706     DCMF_Messager_finalize();
707     exit(EXIT_SUCCESS);
708 }
709 /* ######End of functions related with exiting programs###### */
710
711
712 /* ######Beginning of functions related with starting programs###### */
713 /**
714  *  Obtain the number of nodes, my node id, and consuming machine layer
715  *  specific arguments
716  */
717 static void MachineInitForDCMF(int argc, char **argv, int *numNodes, int *myNodeID) {
718
719     DCMF_Messager_initialize();
720
721 #if CMK_SMP
722     DCMF_Configure_t  config_in, config_out;
723     config_in.thread_level= DCMF_THREAD_MULTIPLE;
724     config_in.interrupts  = DCMF_INTERRUPTS_OFF;
725
726     DCMF_Messager_configure(&config_in, &config_out);
727     //assert (config_out.thread_level == DCMF_THREAD_MULTIPLE); //not supported in vn mode
728 #endif
729
730     DCMF_Send_Configuration_t short_config, eager_config, rzv_config;
731
732
733     short_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
734     short_config.cb_recv_short = short_pkt_recv;
735     short_config.cb_recv       = first_pkt_recv_done;
736
737 #if (DCMF_VERSION_MAJOR >= 3)
738     short_config.network  = DCMF_DEFAULT_NETWORK;
739 #elif (DCMF_VERSION_MAJOR == 2)
740     short_config.network  = DCMF_DefaultNetwork;
741 #endif
742
743     eager_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
744     eager_config.cb_recv_short = short_pkt_recv;
745     eager_config.cb_recv       = first_pkt_recv_done;
746 #if (DCMF_VERSION_MAJOR >= 3)
747     eager_config.network  = DCMF_DEFAULT_NETWORK;
748 #elif (DCMF_VERSION_MAJOR == 2)
749     eager_config.network  = DCMF_DefaultNetwork;
750 #endif
751
752 #ifdef  OPT_RZV
753 #warning "Enabling Optimize Rzv"
754     rzv_config.protocol        = DCMF_RZV_SEND_PROTOCOL;
755 #else
756     rzv_config.protocol        = DCMF_DEFAULT_SEND_PROTOCOL;
757 #endif
758     rzv_config.cb_recv_short   = short_pkt_recv;
759     rzv_config.cb_recv         = first_pkt_recv_done;
760 #if (DCMF_VERSION_MAJOR >= 3)
761     rzv_config.network  = DCMF_DEFAULT_NETWORK;
762 #elif (DCMF_VERSION_MAJOR == 2)
763     rzv_config.network  = DCMF_DefaultNetwork;
764 #endif
765
766     DCMF_Send_register (&cmi_dcmf_short_registration, &short_config);
767     DCMF_Send_register (&cmi_dcmf_eager_registration, &eager_config);
768     DCMF_Send_register (&cmi_dcmf_rzv_registration,   &rzv_config);
769
770 #ifdef BGP_USE_AM_DIRECT
771     DCMF_Send_Configuration_t direct_config;
772     direct_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
773     direct_config.cb_recv_short = direct_short_pkt_recv;
774     direct_config.cb_recv       = direct_first_pkt_recv_done;
775 #if (DCMF_VERSION_MAJOR >= 3)
776     direct_config.network  = DCMF_DEFAULT_NETWORK;
777 #elif (DCMF_VERSION_MAJOR == 2)
778     direct_config.network  = DCMF_DefaultNetwork;
779 #endif
780     DCMF_Send_register (&cmi_dcmf_direct_registration,   &direct_config);
781     directcb.function=direct_send_done_cb;
782     directcb.clientdata=NULL;
783 #endif
784
785 #ifdef BGP_USE_RDMA_DIRECT
786     /* notification protocol */
787     DCMF_Send_Configuration_t direct_rdma_config;
788     direct_rdma_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
789     direct_rdma_config.cb_recv_short = direct_short_rdma_pkt_recv;
790     direct_rdma_config.cb_recv       = direct_first_rdma_pkt_recv_done;
791 #if (DCMF_VERSION_MAJOR >= 3)
792     direct_rdma_config.network  = DCMF_DEFAULT_NETWORK;
793 #elif (DCMF_VERSION_MAJOR == 2)
794     direct_rdma_config.network  = DCMF_DefaultNetwork;
795 #endif
796     DCMF_Send_register (&cmi_dcmf_direct_rdma_registration,   &direct_rdma_config);
797     directcb.function=direct_send_rdma_done_cb;
798     directcb.clientdata=NULL;
799     /* put protocol */
800     DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL };
801     DCMF_Put_register (&cmi_dcmf_direct_put_registration, &put_configuration);
802     DCMF_Get_Configuration_t get_configuration = { DCMF_DEFAULT_GET_PROTOCOL };
803     DCMF_Get_register (&cmi_dcmf_direct_get_registration, &get_configuration);
804
805 #endif
806     //fprintf(stderr, "Initializing Eager Protocol\n");
807
808     *numNodes = DCMF_Messager_size();
809     *myNodeID = DCMF_Messager_rank();
810
811     CmiBarrier();
812     CmiBarrier();
813     CmiBarrier();
814
815     /* NOTE: the following codes requires #PEs, which is not available
816      * until this function finishes. And it allocate O(p) space */
817     int totalPEs = _Cmi_mynodesize * (*numNodes);
818     DCMF_Multicast_Configuration_t mconfig;
819     mconfig.protocol = DCMF_MEMFIFO_DMA_MSEND_PROTOCOL;
820     mconfig.cb_recv  = first_multi_pkt_recv_done;
821     mconfig.clientdata = NULL;
822     mconfig.connectionlist = (void **) malloc (totalPEs * sizeof(unsigned long));
823     mconfig.nconnections = totalPEs;
824     DCMF_Multicast_register(&cmi_dcmf_multicast_registration, &mconfig);
825
826     int actualNodeSize = _Cmi_mynodesize;
827 #if !CMK_SMP_NO_COMMTHD
828     actualNodeSize++; //considering the extra comm thread
829 #endif
830
831     procState = (ProcState *)CmiAlloc((actualNodeSize) * sizeof(ProcState));
832     for (int i=0; i<actualNodeSize; i++) {
833         /*    procState[i].sendMsgBuf = PCQueueCreate();   */
834         procState[i].recvLock = CmiCreateLock();
835         procState[i].bcastLock = CmiCreateLock();
836     }
837
838     /* checksum flag */
839     if (CmiGetArgFlag(argv,"+checksum")) {
840 #if CMK_ERROR_CHECKING
841         checksum_flag = 1;
842         if (*myNodeID == 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
843 #else
844         if (*myNodeID == 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
845 #endif
846     }
847
848 }
849
850 static void MachinePreCommonInitForDCMF(int everReturn) {
851     CpvInitialize(PCQueue, smsg_list_q);
852     CpvAccess(smsg_list_q) = PCQueueCreate();
853 }
854
855 static void MachinePostCommonInitForDCMF(int everReturn) {
856 #if !CMK_SMP || CMK_SMP_NO_COMMTHD
857     CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)CmiNotifyIdle,NULL);
858 #endif
859
860     CmiBarrier();
861 }
862 /* ######End of functions related with starting programs###### */
863
864 /***********************************************************************
865  *
866  * Abort function:
867  *
868  ************************************************************************/
869
870 void CmiAbort(const char *message) {
871     CmiError("------------- Processor %d Exiting: Called CmiAbort ------------\n"
872              "{snd:%d,rcv:%d} Reason: %s\n",CmiMyPe(),
873              msgQueueLen, outstanding_recvs, message);
874
875 #if 0
876     /* Since it's a abort, why bother to drain the resources? The system
877      * should clean it self
878      */
879     /* FIXME: what happens in the SMP mode??? */
880     DrainResourcesForDCMF();
881 #endif
882     assert(0);
883 }
884
885
886 /*********** Beginning of MULTICAST/VECTOR SENDING FUNCTIONS **************/
887 /*
888
889  * In relations to some flags, some other delivery functions may be needed.
890  */
891
892 #if !CMK_MULTICAST_LIST_USE_COMMON_CODE
893
894 void CmiSyncListSendFn(int npes, int *pes, int size, char *msg) {
895     char *copymsg = CopyMsg(msg, size);
896     CmiFreeListSendFn(npes, pes, size, copymsg);
897 }
898
899 /* Currently disable optimized multicast for non-SMP as it fails
900  * for hybrid ldb in NAMD as reported by Gengbin --Chao Mei
901  */
902 #if !CMK_SMP
903 #define OPTIMIZED_MULTICAST  0
904 #else
905 #define OPTIMIZED_MULTICAST  1
906 #endif
907
908 #if OPTIMIZED_MULTICAST
909 #warning "Using Optimized Multicast"
910 #endif
911
912 void CmiFreeListSendFn(int npes, int *pes, int size, char *msg) {
913     CmiAssert(npes>=1);
914     if (npes==1) {
915         CmiFreeSendFn(pes[0], size, msg);
916         return;
917     }
918
919     //if(CmiMyRank()==CmiMyNodeSize()) printf("CmiFreeListSendFn on comm thd on node %d\n", CmiMyNode());
920     //printf("%d: In Free List Send Fn\n", CmiMyPe());
921
922     int i;
923 #if OPTIMIZED_MULTICAST
924     int *newpelist = pes;
925     int new_npes = npes;
926 #if CMK_SMP
927     newpelist = (int *)malloc(sizeof(int)*npes);
928     new_npes = 0;
929     for (i=0; i<npes; i++) {
930         if (CmiNodeOf(pes[i]) == CmiMyNode()) {
931             CmiSyncSend(pes[i], size, msg);
932         } else {
933             newpelist[new_npes++] = pes[i];
934         }
935     }
936     if (new_npes == 0) {
937         CmiFree(msg);
938         return;
939     }
940 #endif
941
942     CMI_SET_BROADCAST_ROOT(msg,0);
943     CMI_MSG_SIZE(msg) = size;
944 #if CMK_ERROR_CHECKING
945     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
946     CMI_SET_CHECKSUM(msg, size);
947 #endif
948
949     CQdCreate(CpvAccess(cQdState), new_npes);
950     machineMulticast (new_npes, newpelist, size, msg);
951 #else /* non-optimized multicast */
952
953     for (i=0; i<npes-1; i++) {
954 #if !CMK_SMP
955         CmiReference(msg);
956         CmiFreeSendFn(pes[i], size, msg);
957 #else
958     CmiSyncSend(pes[i], size, msg);
959 #endif
960     }
961     CmiFreeSendFn(pes[npes-1], size, msg);
962 #endif /* end of #if OPTIMIZED_MULTICAST */
963 }
964 #endif /* end of #if !CMK_MULTICAST_LIST_USE_COMMON_CODE */
965
966 /*********** End of MULTICAST/VECTOR SENDING FUNCTIONS **************/
967
968 /**************************  TIMER FUNCTIONS **************************/
969
970 /************Barrier Related Functions****************/
971 /* Barrier related functions */
972 /*TODO: does DCMF provide any Barrrier related functions ??? --Chao Mei */
973 /* Barrier needs to be implemented!!! -Chao Mei */
974 /* These two barriers are only needed by CmiTimerInit to synchronize all the
975    threads. They do not need to provide a general barrier. */
976 int CmiBarrier() {
977     return 0;
978 }
979 int CmiBarrierZero() {
980     return 0;
981 }
982
983 #include "manytomany.c"
984
985 /*********************************************************************************************
986 This section is for CmiDirect. This is a variant of the  persistent communication in which
987 the user can transfer data between processors without using Charm++ messages. This lets the user
988 send and receive data from the middle of his arrays without any copying on either send or receive
989 side
990 *********************************************************************************************/
991
992
993 #ifdef BGP_USE_AM_DIRECT
994
995 #include "cmidirect.h"
996
997 /* We can avoid a receiver side lookup by just sending the whole shebang.
998    DCMF header is in units of quad words (16 bytes), so we'd need less than a
999    quad word for the handle if we just sent that and did a lookup. Or exactly
1000    2 quad words for the buffer pointer, callback pointer, callback
1001    data pointer, and DCMF_Request_t pointer with no lookup.
1002
1003    Since CmiDirect is generally going to be used for messages which aren't
1004    tiny, the extra 16 bytes is not likely to impact performance noticably and
1005    not having to lookup handles in tables simplifies the code enormously.
1006
1007    EJB   2008/4/2
1008 */
1009
1010
1011 /**
1012  To be called on the receiver to create a handle and return its number
1013 **/
1014 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1015     /* with two-sided primitives we just bundle the buffer and callback info into the handle so the sender can remind us about it later. */
1016     struct infiDirectUserHandle userHandle;
1017     userHandle.handle=1; /* doesn't matter on BG/P*/
1018     userHandle.senderNode=senderNode;
1019     userHandle.recverNode=_Cmi_mynode;
1020     userHandle.recverBufSize=recvBufSize;
1021     userHandle.recverBuf=recvBuf;
1022     userHandle.initialValue=initialValue;
1023     userHandle.callbackFnPtr=callbackFnPtr;
1024     userHandle.callbackData=callbackData;
1025     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1026 #if CMI_DIRECT_DEBUG
1027     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1028 #endif
1029     return userHandle;
1030 }
1031
1032 /****
1033  To be called on the sender to attach the sender's buffer to this handle
1034 ******/
1035
1036 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1037
1038     /* one-sided primitives would require registration of memory */
1039
1040     /* with two-sided primitives we just record the sender buf in the handle */
1041     userHandle->senderBuf=sendBuf;
1042     CmiAssert(sendBufSize==userHandle->recverBufSize);
1043     userHandle->DCMF_rq_tsend = (DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1044 #if CMI_DIRECT_DEBUG
1045     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1046 #endif
1047
1048 }
1049
1050 /****
1051 To be called on the sender to do the actual data transfer
1052 ******/
1053 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1054     /** invoke a DCMF_Send with the direct callback */
1055     DCMF_Protocol_t *protocol = NULL;
1056     protocol = &cmi_dcmf_direct_registration;
1057     /* local copy */
1058     CmiAssert(userHandle->recverBuf!=NULL);
1059     CmiAssert(userHandle->senderBuf!=NULL);
1060     CmiAssert(userHandle->recverBufSize>0);
1061     if (userHandle->recverNode== _Cmi_mynode) {
1062 #if CMI_DIRECT_DEBUG
1063         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1064 #endif
1065
1066         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1067         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1068     } else {
1069         dcmfDirectMsgHeader msgHead;
1070         msgHead.recverBuf=userHandle->recverBuf;
1071         msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1072         msgHead.callbackData=userHandle->callbackData;
1073         msgHead.DCMF_rq_t=(DCMF_Request_t *) userHandle->DCMF_rq_trecv;
1074 #if CMK_SMP
1075         DCMF_CriticalSection_enter (0);
1076 #endif
1077 #if CMI_DIRECT_DEBUG
1078         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1079 #endif
1080         DCMF_Send (protocol,
1081                    (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1082                    directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1083                    userHandle->recverBufSize, userHandle->senderBuf,
1084                    (struct DCQuad *) &(msgHead), 2);
1085
1086 #if CMK_SMP
1087         DCMF_CriticalSection_exit (0);
1088 #endif
1089     }
1090 }
1091
1092 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1093     CmiAbort("Not Implemented, switch to #define BGP_USE_RDMA_DIRECT");
1094 }
1095
1096 /**** up to the user to safely call this */
1097 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1098     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1099 #if CMK_SMP
1100     DCMF_CriticalSection_enter (0);
1101 #endif
1102     CmiFree(userHandle->DCMF_rq_tsend);
1103 #if CMK_SMP
1104     DCMF_CriticalSection_exit (0);
1105 #endif
1106
1107 }
1108
1109 /**** up to the user to safely call this */
1110 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1111     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1112 #if CMK_SMP
1113     DCMF_CriticalSection_enter (0);
1114 #endif
1115     CmiFree(userHandle->DCMF_rq_trecv);
1116
1117 #if CMK_SMP
1118     DCMF_CriticalSection_exit (0);
1119 #endif
1120 }
1121
1122
1123 /**** Should not be called the first time *********/
1124 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1125     /* no op on BGP */
1126 }
1127
1128 /**** Should not be called the first time *********/
1129 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1130     /* no op on BGP */
1131 }
1132
1133 /**** Should not be called the first time *********/
1134 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1135     /* no op on BGP */
1136 }
1137
1138 #endif /* BGP_USE_AM_DIRECT*/
1139
1140 #ifdef BGP_USE_RDMA_DIRECT
1141
1142 #include "cmidirect.h"
1143
1144 /*
1145    Notification protocol passes callback function and data in a single
1146    quadword.  This occurs in a message triggered by the sender side ack
1147    callback and therefore has higher latency than polling, but is guaranteed
1148    to be semantically correct.  The latency for a single packet that isn't
1149    hitting charm/converse should be pretty minimal, but you could run into
1150    sender side progress issues.  The alternative of polling on the out of band
1151    byte scheme creates correctness issues in that the data really has to be
1152    out of band and you rely on the buffer being written in order.  It also has
1153    annoying polling issues.  A third scheme could add a second put to a
1154    control region to poll upon and force sequential consistency between
1155    puts. Its not really clear that this would be faster or avoid the progress
1156    issue since you run into the same issues to enforce that sequential
1157    consistency.
1158
1159    EJB   2011/1/20
1160 */
1161
1162
1163 /* local function to use the ack as our signal to send a remote notify */
1164 static void CmiNotifyRemoteRDMA(void *handle, struct DCMF_Error_t *error) {
1165     struct infiDirectUserHandle *userHandle= (struct infiDirectUserHandle *) handle;
1166     dcmfDirectRDMAMsgHeader msgHead;
1167     msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1168     msgHead.callbackData=userHandle->callbackData;
1169 #if CMK_SMP
1170     DCMF_CriticalSection_enter (0);
1171 #endif
1172 #if CMI_DIRECT_DEBUG
1173     CmiPrintf("[%d] RDMA notify put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p \n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1174 #endif
1175     DCMF_Result res=DCMF_Send (&cmi_dcmf_direct_rdma_registration,
1176                                userHandle->DCMF_rq_tsend,
1177                                directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1178                                sizeof(dcmfDirectRDMAMsgHeader),
1179
1180                                userHandle->DCMF_notify_buf,
1181                                (struct DCQuad *) &(msgHead), 1);
1182 //    CmiAssert(res==DCMF_SUCCESS);
1183 #if CMK_SMP
1184     DCMF_CriticalSection_exit (0);
1185 #endif
1186 }
1187
1188 /**
1189  To be called on the receiver to create a handle and return its number
1190 **/
1191
1192
1193 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1194     /* one-sided primitives require registration of memory */
1195     struct infiDirectUserHandle userHandle;
1196     size_t numbytesRegistered=0;
1197     DCMF_Result regresult=DCMF_Memregion_create( &userHandle.DCMF_recverMemregion,
1198                           &numbytesRegistered,
1199                           recvBufSize,
1200                           recvBuf,
1201                           0);
1202     CmiAssert(numbytesRegistered==recvBufSize);
1203     CmiAssert(regresult==DCMF_SUCCESS);
1204
1205
1206     userHandle.handle=1; /* doesn't matter on BG/P*/
1207     userHandle.senderNode=senderNode;
1208     userHandle.recverNode=_Cmi_mynode;
1209     userHandle.recverBufSize=recvBufSize;
1210     userHandle.recverBuf=recvBuf;
1211     userHandle.initialValue=initialValue;
1212     userHandle.callbackFnPtr=callbackFnPtr;
1213     userHandle.callbackData=callbackData;
1214     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1215 #if CMI_DIRECT_DEBUG
1216     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1217 #endif
1218     return userHandle;
1219 }
1220
1221 /****
1222  To be called on the sender to attach the sender's buffer to this handle
1223 ******/
1224
1225 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1226     /* one-sided primitives would require registration of memory */
1227     userHandle->senderBuf=sendBuf;
1228     CmiAssert(sendBufSize==userHandle->recverBufSize);
1229     userHandle->DCMF_rq_tsend =(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1230     size_t numbytesRegistered=0;  // set as return value from create
1231     userHandle->DCMF_notify_buf=ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+32));
1232     userHandle->DCMF_notify_cb.function=CmiNotifyRemoteRDMA;
1233     userHandle->DCMF_notify_cb.clientdata=userHandle;
1234     DCMF_Result regresult=DCMF_Memregion_create( &userHandle->DCMF_senderMemregion,
1235                           &numbytesRegistered,
1236                           sendBufSize,
1237                           sendBuf,
1238                           0);
1239     CmiAssert(numbytesRegistered==sendBufSize);
1240     CmiAssert(regresult==DCMF_SUCCESS);
1241
1242 #if CMI_DIRECT_DEBUG
1243     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1244 #endif
1245
1246 }
1247
1248
1249 /****
1250 To be called on the sender to do the actual data transfer
1251 ******/
1252 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1253     /** invoke a DCMF_Put with the direct callback */
1254
1255     CmiAssert(userHandle->recverBuf!=NULL);
1256     CmiAssert(userHandle->senderBuf!=NULL);
1257     CmiAssert(userHandle->recverBufSize>0);
1258     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1259 #if CMI_DIRECT_DEBUG
1260         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1261 #endif
1262
1263         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1264         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1265     } else {
1266 #if CMK_SMP
1267         DCMF_CriticalSection_enter (0);
1268 #endif
1269 #if CMI_DIRECT_DEBUG
1270         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1271 #endif
1272         DCMF_Result
1273         Res= DCMF_Put(&cmi_dcmf_direct_put_registration,
1274                       userHandle->DCMF_rq_tsend,
1275                       directcb, DCMF_RELAXED_CONSISTENCY,
1276                       userHandle->recverNode,
1277                       userHandle->recverBufSize,
1278                       &userHandle->DCMF_senderMemregion,
1279                       &userHandle->DCMF_recverMemregion,
1280                       0, /* offsets are zero */
1281                       0,
1282                       userHandle->DCMF_notify_cb
1283                      );
1284         CmiAssert(Res==DCMF_SUCCESS);
1285 #if CMK_SMP
1286         DCMF_CriticalSection_exit (0);
1287 #endif
1288     }
1289 }
1290
1291 /****
1292 To be called on the receiver to initiate the actual data transfer
1293 ******/
1294 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1295     /** invoke a DCMF_Get with the direct callback */
1296
1297     CmiAssert(userHandle->recverBuf!=NULL);
1298     CmiAssert(userHandle->senderBuf!=NULL);
1299     CmiAssert(userHandle->recverBufSize>0);
1300     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1301 #if CMI_DIRECT_DEBUG
1302         CmiPrintf("[%d] RDMA local get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1303 #endif
1304
1305         CmiMemcpy(userHandle->senderBuf,userHandle->recverBuf,userHandle->recverBufSize);
1306         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1307     } else {
1308         struct DCMF_Callback_t done_cb;
1309         done_cb.function=userHandle->callbackFnPtr;
1310         done_cb.clientdata=userHandle->callbackData;
1311 #if CMK_SMP
1312         DCMF_CriticalSection_enter (0);
1313 #endif
1314 #if CMI_DIRECT_DEBUG
1315         CmiPrintf("[%d] RDMA get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1316 #endif
1317         DCMF_Result
1318         Res= DCMF_Get(&cmi_dcmf_direct_get_registration,
1319                       (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1320                       done_cb, DCMF_RELAXED_CONSISTENCY,
1321                       userHandle->recverNode,
1322                       userHandle->recverBufSize,
1323                       & userHandle->DCMF_recverMemregion,
1324                       & userHandle->DCMF_senderMemregion,
1325                       0, /* offsets are zero */
1326                       0
1327                      );
1328         CmiAssert(Res==DCMF_SUCCESS);
1329
1330
1331 #if CMK_SMP
1332         DCMF_CriticalSection_exit (0);
1333 #endif
1334     }
1335 }
1336
1337 /**** up to the user to safely call this */
1338 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1339     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1340 #if CMK_SMP
1341     DCMF_CriticalSection_enter (0);
1342 #endif
1343
1344     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_senderMemregion);
1345     CmiFree(userHandle->DCMF_notify_buf);
1346     CmiFree(userHandle->DCMF_rq_tsend);
1347 #if CMK_SMP
1348     DCMF_CriticalSection_exit (0);
1349 #endif
1350
1351 }
1352
1353 /**** up to the user to safely call this */
1354 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1355     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1356 #if CMK_SMP
1357     DCMF_CriticalSection_enter (0);
1358 #endif
1359
1360     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_recverMemregion);
1361     CmiFree(userHandle->DCMF_rq_trecv);
1362
1363 #if CMK_SMP
1364     DCMF_CriticalSection_exit (0);
1365 #endif
1366 }
1367
1368
1369
1370 /**** Should not be called the first time *********/
1371 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1372     /* no op on BGP */
1373 }
1374
1375 /**** Should not be called the first time *********/
1376 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1377     /* no op on BGP */
1378 }
1379
1380 /**** Should not be called the first time *********/
1381 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1382     /* no op on BGP */
1383 }
1384
1385 #endif /* BGP_USE_RDMA_DIRECT*/
1386
1387 /*@}*/
1388