650285cf0682ed1b59586be99de7d1b97c731dde
[charm.git] / src / arch / bluegenep / machine.c
1 #include <stdio.h>
2 #include <errno.h>
3 #include <stdlib.h>
4 #include <unistd.h>
5 #include <math.h>
6 #include <string.h>
7 #include <malloc.h>
8 #include <assert.h>
9
10 #include "converse.h"
11 #include "machine.h"
12 #include "pcqueue.h"
13
14 #include <bpcore/ppc450_inlines.h>
15 #include "dcmf.h"
16 #include "dcmf_multisend.h"
17
18 /* =======Beginning of Definitions of Performance-Specific Macros =======*/
19 /* =======End of Definitions of Performance-Specific Macros =======*/
20
21 /* =======Beginning of Definitions of Msg Header Specific Macros =======*/
22 /* =======End of Definitions of Msg Header Specific Macros =======*/
23
24 /* =====Beginning of Definitions of Message-Corruption Related Macros=====*/
25 #define CMI_MAGIC(msg)                   ((CmiMsgHeaderBasic *)msg)->magic
26 #define CHARM_MAGIC_NUMBER               126
27
28 #if CMK_ERROR_CHECKING
29 static int checksum_flag = 0;
30 extern unsigned char computeCheckSum(unsigned char *data, int len);
31
32 #define CMI_SET_CHECKSUM(msg, len)      \
33         if (checksum_flag)  {   \
34           ((CmiMsgHeaderBasic *)msg)->cksum = 0;        \
35           ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len);        \
36         }
37
38 #define CMI_CHECK_CHECKSUM(msg, len)    \
39         if (checksum_flag)      \
40           if (computeCheckSum((unsigned char*)msg, len) != 0)  { \
41             printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
42             { \
43             int count; \
44             for(count = 0; count < len; count++) { \
45                 printf("%2x", msg[count]);                 \
46             } \
47             }                                             \
48             printf("------------------------------\n\n"); \
49             CmiAbort("Fatal error: checksum doesn't agree!\n"); \
50           }
51 #else
52 #define CMI_SET_CHECKSUM(msg, len)
53 #define CMI_CHECK_CHECKSUM(msg, len)
54 #endif
55 /* =====End of Definitions of Message-Corruption Related Macros=====*/
56
57
58 /* =====Beginning of Declarations of Machine Specific Variables===== */
59 typedef struct ProcState {
60     /* PCQueue      sendMsgBuf; */      /* per processor message sending queue */
61     CmiNodeLock  recvLock;              /* for cs->recv */
62     CmiNodeLock bcastLock;
63 } ProcState;
64
65 static ProcState  *procState;
66
67 volatile int msgQueueLen;
68 volatile int outstanding_recvs;
69
70 DCMF_Protocol_t  cmi_dcmf_short_registration __attribute__((__aligned__(16)));
71 DCMF_Protocol_t  cmi_dcmf_eager_registration __attribute__((__aligned__(16)));
72 DCMF_Protocol_t  cmi_dcmf_rzv_registration   __attribute__((__aligned__(16)));
73 DCMF_Protocol_t  cmi_dcmf_multicast_registration   __attribute__((__aligned__(16)));
74
75
76 typedef struct msg_list {
77     char              * msg;
78 //    int                 size;
79 //    int                 destpe;
80     int               * pelist;
81 //    DCMF_Callback_t     cb;
82 //    DCQuad              info __attribute__((__aligned__(16)));
83     DCMF_Request_t      send __attribute__((__aligned__(16)));
84 } SMSG_LIST __attribute__((__aligned__(16)));
85
86 #define MAX_NUM_SMSGS   64
87 CpvDeclare(PCQueue, smsg_list_q);
88 static SMSG_LIST * smsg_allocate();
89 static void smsg_free (SMSG_LIST *smsg);
90
91 /* =====End of Declarations of Machine Specific Variables===== */
92
93
94 /* =====Beginning of Declarations of Machine Specific Functions===== */
95 /* Utility functions */
96 char *ALIGN_16(char *p) {
97     return((char *)((((unsigned long)p)+0xf)&0xfffffff0));
98 }
99
100 void mysleep (int cycles) { /* approximate sleep command */
101     unsigned long long start = DCMF_Timebase();
102     unsigned long long end = start + cycles;
103     while (start < end)
104         start = DCMF_Timebase();
105     return;
106 }
107 static void SendMsgsUntil(int);
108
109 /* ######Begining of Machine-specific RDMA related functions###### */
110 #define BGP_USE_AM_DIRECT 1
111 /* #define BGP_USE_RDMA_DIRECT 1 */
112 /* #define CMI_DIRECT_DEBUG 1 */
113 #if BGP_USE_AM_DIRECT
114
115 DCMF_Protocol_t  cmi_dcmf_direct_registration __attribute__((__aligned__(16)));
116 /** The receive side of a put implemented in DCMF_Send */
117
118 typedef struct {
119     void *recverBuf;
120     void (*callbackFnPtr)(void *);
121     void *callbackData;
122     DCMF_Request_t *DCMF_rq_t;
123 } dcmfDirectMsgHeader;
124
125 /* nothing for us to do here */
126 #if (DCMF_VERSION_MAJOR >= 2)
127 void direct_send_done_cb(void*nothing, DCMF_Error_t *err)
128 #else
129 void direct_send_done_cb(void*nothing)
130 #endif
131 {
132 #if CMI_DIRECT_DEBUG
133     CmiPrintf("[%d] RDMA send_done_cb\n", CmiMyPe());
134 #endif
135 }
136
137 DCMF_Callback_t  directcb;
138
139 void     direct_short_pkt_recv (void             * clientdata,
140                                 const DCQuad     * info,
141                                 unsigned           count,
142                                 unsigned           senderrank,
143                                 const char       * buffer,
144                                 const unsigned     sndlen) {
145 #if CMI_DIRECT_DEBUG
146     CmiPrintf("[%d] RDMA direct_short_pkt_recv\n", CmiMyPe());
147 #endif
148     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
149     CmiMemcpy(msgHead->recverBuf, buffer, sndlen);
150     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
151 }
152
153
154 #if (DCMF_VERSION_MAJOR >= 2)
155 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
156 #else
157 typedef void (*cbhdlr) (void *);
158 #endif
159
160 DCMF_Request_t * direct_first_pkt_recv_done (void              * clientdata,
161         const DCQuad      * info,
162         unsigned            count,
163         unsigned            senderrank,
164         const unsigned      sndlen,
165         unsigned          * rcvlen,
166         char             ** buffer,
167         DCMF_Callback_t   * cb
168                                             ) {
169 #if CMI_DIRECT_DEBUG
170     CmiPrintf("[%d] RDMA direct_first_pkt_recv_done\n", CmiMyPe());
171 #endif
172     /* pull the data we need out of the header */
173     *rcvlen=sndlen;
174     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
175     cb->function= (cbhdlr)msgHead->callbackFnPtr;
176     cb->clientdata=msgHead->callbackData;
177     *buffer=msgHead->recverBuf;
178     return msgHead->DCMF_rq_t;
179 }
180 #endif /* end of #if BGP_USE_AM_DIRECT */
181
182 #ifdef BGP_USE_RDMA_DIRECT
183 static struct DCMF_Callback_t dcmf_rdma_cb_ack;
184
185 DCMF_Protocol_t  cmi_dcmf_direct_put_registration __attribute__((__aligned__(16)));
186 DCMF_Protocol_t  cmi_dcmf_direct_get_registration __attribute__((__aligned__(16)));
187 DCMF_Protocol_t  cmi_dcmf_direct_rdma_registration __attribute__((__aligned__(16)));
188 /** The receive side of a DCMF_Put notification implemented in DCMF_Send */
189
190 typedef struct {
191     void (*callbackFnPtr)(void *);
192     void *callbackData;
193 } dcmfDirectRDMAMsgHeader;
194
195 #if (DCMF_VERSION_MAJOR >= 2)
196 void direct_send_rdma_done_cb(void*nothing, DCMF_Error_t *err)
197 #else
198 void direct_send_rdma_done_cb(void*nothing)
199 #endif
200 {
201 #if CMI_DIRECT_DEBUG
202     CmiPrintf("[%d] RDMA send_rdma_done_cb result %d\n", CmiMyPe());
203 #endif
204
205
206 }
207
208 DCMF_Callback_t  directcb;
209
210 void     direct_short_rdma_pkt_recv (void             * clientdata,
211                                      const DCQuad     * info,
212                                      unsigned           count,
213                                      unsigned           senderrank,
214                                      const char       * buffer,
215                                      const unsigned     sndlen) {
216 #if CMI_DIRECT_DEBUG
217     CmiPrintf("[%d] RDMA direct_short_rdma_pkt_recv\n", CmiMyPe());
218 #endif
219     dcmfDirectRDMAMsgHeader *msgHead=  (dcmfDirectRDMAMsgHeader *) info;
220     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
221 }
222
223 #if (DCMF_VERSION_MAJOR >= 2)
224 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
225 #else
226 typedef void (*cbhdlr) (void *);
227 #endif
228
229 DCMF_Request_t * direct_first_rdma_pkt_recv_done (void              * clientdata,
230         const DCQuad      * info,
231         unsigned            count,
232         unsigned            senderrank,
233         const unsigned      sndlen,
234         unsigned          * rcvlen,
235         char             ** buffer,
236         DCMF_Callback_t   * cb
237                                                  ) {
238     CmiAbort("direct_first_rdma_pkt_recv should not be called");
239 }
240 #endif /* end of #if BGP_USE_RDMA_DIRECT */
241 /* ######End of Machine-specific RDMA related functions###### */
242
243
244 /* ### Beginning of Communication-Op Related Functions ### */
245 /* The machine-specific send-related function */
246 #if (DCMF_VERSION_MAJOR >= 2)
247 static void send_done(void *data, DCMF_Error_t *err);
248 static void send_multi_done(void *data, DCMF_Error_t *err);
249 #else
250 static void send_done(void *data);
251 static void send_multi_done(void *data);
252 #endif
253 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode);
254 #define LrtsSendFunc MachineSpecificSendForDCMF
255
256 /* The machine-specific recv-related function (on the receiver side) */
257 #if (DCMF_VERSION_MAJOR >= 2)
258 static void recv_done(void *clientdata, DCMF_Error_t * err);
259 #else
260 static void recv_done(void *clientdata);
261 #endif
262 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
263         unsigned            count,
264         unsigned            senderrank,
265         const unsigned      sndlen,
266         unsigned            connid,
267         void              * clientdata,
268         unsigned          * rcvlen,
269         char             ** buffer,
270         unsigned          * pw,
271         DCMF_Callback_t   * cb
272                                            );
273 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
274                                       const DCQuad      * info,
275                                       unsigned            count,
276                                       unsigned            senderrank,
277                                       const unsigned      sndlen,
278                                       unsigned          * rcvlen,
279                                       char             ** buffer,
280                                       DCMF_Callback_t   * cb
281                                      );
282
283 /* ### End of Communication-Op Related Functions ### */
284
285 /* ### Beginning of Machine-startup Related Functions ### */
286 static void MachineInitForDCMF(int *argc, char ***argv, int *numNodes, int *myNodeID);
287 #define LrtsInit MachineInitForDCMF
288
289 static void MachinePreCommonInitForDCMF(int everReturn);
290 static void MachinePostCommonInitForDCMF(int everReturn);
291 #define LrtsPreCommonInit MachinePreCommonInitForDCMF
292 #define LrtsPostCommonInit MachinePostCommonInitForDCMF
293 /* ### End of Machine-startup Related Functions ### */
294
295 /* ### Beginning of Machine-running Related Functions ### */
296 static void AdvanceCommunicationForDCMF();
297 #define LrtsAdvanceCommunication AdvanceCommunicationForDCMF
298
299 static void DrainResourcesForDCMF();
300 #define LrtsDrainResources DrainResourcesForDCMF
301
302 static void MachineExitForDCMF();
303 #define LrtsExit MachineExitForDCMF
304
305 /* ### End of Machine-running Related Functions ### */
306
307 /* ### Beginning of Idle-state Related Functions ### */
308
309 /* ### End of Idle-state Related Functions ### */
310
311 static void MachinePostNonLocalForDCMF();
312 #define LrtsPostNonLocal MachinePostNonLocalForDCMF
313
314 /* =====End of Declarations of Machine Specific Functions===== */
315
316 /**
317  *  Macros that overwrites the common codes, such as
318  *  CMK_SMP_NO_COMMTHD, NETWORK_PROGRESS_PERIOD_DEFAULT,
319  *  USE_COMMON_SYNC_P2P, CMK_HAS_SIZE_IN_MSGHDR,
320  *  CMK_OFFLOAD_BCAST_PROCESS etc.
321  */
322 #define CMK_OFFLOAD_BCAST_PROCESS 1
323 #include "machine-common.h"
324 #include "machine-common.c"
325
326 /*######Beginning of functions related with Communication-Op functions ######*/
327
328 /* Utility functions */
329 static inline SMSG_LIST * smsg_allocate() {
330     SMSG_LIST *smsg = (SMSG_LIST *)PCQueuePop(CpvAccess(smsg_list_q));
331     if (smsg != NULL)
332         return smsg;
333
334     void * buf = malloc(sizeof(SMSG_LIST));
335     assert(buf!=NULL);
336     assert (((unsigned)buf & 0x0f) == 0);
337
338     return (SMSG_LIST *) buf;
339 }
340
341 static inline void smsg_free (SMSG_LIST *smsg) {
342     int size = PCQueueLength (CpvAccess(smsg_list_q));
343     if (size < MAX_NUM_SMSGS)
344         PCQueuePush (CpvAccess(smsg_list_q), (char *) smsg);
345     else
346         free (smsg);
347 }
348
349 static void SendMsgsUntil(int targetm) {
350     while (msgQueueLen>targetm) {
351 #if CMK_SMP
352         DCMF_CriticalSection_enter (0);
353 #endif
354
355         while (DCMF_Messager_advance()>0);
356
357 #if CMK_SMP
358         DCMF_CriticalSection_exit (0);
359 #endif
360     }
361 }
362
363 /* Send functions */
364 /* The callback on sender side */
365 #if (DCMF_VERSION_MAJOR >= 2)
366 static void send_done(void *data, DCMF_Error_t *err)
367 #else
368 static void send_done(void *data)
369 #endif
370 /* send done callback: sets the smsg entry to done */
371 {
372     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
373     CmiFree(msg_tmp->msg);
374     smsg_free (msg_tmp);
375     msgQueueLen--;
376 }
377
378 #if (DCMF_VERSION_MAJOR >= 2)
379 static void send_multi_done(void *data, DCMF_Error_t *err)
380 #else
381 static void send_multi_done(void *data)
382 #endif
383 /* send done callback: sets the smsg entry to done */
384 {
385     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
386     CmiFree(msg_tmp->msg);
387     free(msg_tmp->pelist);
388     smsg_free(msg_tmp);
389     msgQueueLen--;
390 }
391
392 /* The machine specific send function */
393 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode) {
394     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
395     //msg_tmp->destpe = destNode;
396     //msg_tmp->size = size;
397     msg_tmp->msg = msg;
398
399     DCMF_Callback_t cb;
400     DCQuad info;
401
402     cb.function = send_done;
403     cb.clientdata = msg_tmp;
404
405
406 #if CMK_ERROR_CHECKING
407     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
408     CMI_SET_CHECKSUM(msg, size);
409 #endif
410     CMI_MSG_SIZE(msg) = size;
411
412     //msg_tmp->cb.function = send_done;
413     //msg_tmp->cb.clientdata   =   msg_tmp;
414
415     DCMF_Protocol_t *protocol = NULL;
416
417     if (size < 224)
418         protocol = &cmi_dcmf_short_registration;
419     else if (size < 2048)
420         protocol = &cmi_dcmf_eager_registration;
421     else
422         protocol = &cmi_dcmf_rzv_registration;
423
424 #if CMK_SMP
425     DCMF_CriticalSection_enter (0);
426 #endif
427
428     msgQueueLen ++;
429     /*
430      * Original one:
431      *     DCMF_Send (protocol, &msg_tmp->send, msg_tmp->cb,
432                    DCMF_MATCH_CONSISTENCY, msg_tmp->destpe,
433                    msg_tmp->size, msg_tmp->msg, &msg_tmp->info, 1);
434            Ref:http://dcmf.anl-external.org/docs/mpi:dcmfd/group__SEND.html
435      */
436     DCMF_Send (protocol, &msg_tmp->send, cb, DCMF_MATCH_CONSISTENCY,
437                destNode, size, msg, &info, 0);
438
439 #if CMK_SMP
440     DCMF_CriticalSection_exit (0);
441 #endif
442
443     return 0;
444 }
445
446 #define MAX_MULTICAST 128
447 DCMF_Opcode_t  CmiOpcodeList [MAX_MULTICAST];
448
449 void  machineMulticast(int npes, int *pelist, int size, char* msg) {
450     CQdCreate(CpvAccess(cQdState), npes);
451
452     CmiAssert (npes < MAX_MULTICAST);
453
454 #if CMK_ERROR_CHECKING
455     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
456     CMI_SET_CHECKSUM(msg, size);
457 #endif
458
459     CMI_MSG_SIZE(msg) = size;
460
461     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
462
463     //msg_tmp->destpe    = -1;      //multicast operation
464     //msg_tmp->size      = size * npes; //keep track of #bytes outstanding
465     msg_tmp->msg       = msg;
466     msg_tmp->pelist    = pelist;
467
468     DCMF_Multicast_t  mcast_info __attribute__((__aligned__(16)));
469     DCQuad info;
470
471     mcast_info.registration   = & cmi_dcmf_multicast_registration;
472     mcast_info.request        = & msg_tmp->send;
473     mcast_info.cb_done.function    =   send_multi_done;
474     mcast_info.cb_done.clientdata  =   msg_tmp;
475     mcast_info.consistency    =   DCMF_MATCH_CONSISTENCY;
476     mcast_info.connection_id  =   CmiMyPe();
477     mcast_info.bytes          =   size;
478     mcast_info.src            =   msg;
479     mcast_info.nranks         =   npes;
480     mcast_info.ranks          =   (unsigned *)pelist;
481     mcast_info.opcodes        =   CmiOpcodeList;   //static list of MAX_MULTICAST entires with 0 in them
482     mcast_info.flags          =   0;
483     mcast_info.msginfo        =   &info;
484     //mcast_info.count          =   1;
485     mcast_info.count          =   0;
486
487 #if CMK_SMP
488     DCMF_CriticalSection_enter (0);
489 #endif
490     msgQueueLen++;
491     DCMF_Multicast (&mcast_info);
492
493 #if CMK_SMP
494     DCMF_CriticalSection_exit (0);
495 #endif
496 }
497
498 /* Recv functions */
499 /* The callback on the recv side */
500 #if (DCMF_VERSION_MAJOR >= 2)
501 static void recv_done(void *clientdata, DCMF_Error_t * err)
502 #else
503 static void recv_done(void *clientdata)
504 #endif
505 /* recv done callback: push the recved msg to recv queue */
506 {
507
508     char *msg = (char *) clientdata;
509
510     /*printf ("NODE[%d] Recv message done with msg rank %d\n", CmiMyNode(), CMI_DEST_RANK(msg));*/
511     MACHSTATE3(2,"[%d] recv_done begin with msg %p size=%d { ", CmiMyNode(), msg, CMI_MSG_SIZE(msg));
512 #if CMK_ERROR_CHECKING
513     int sndlen = CMI_MSG_SIZE(msg);
514     CMI_CHECK_CHECKSUM(msg, sndlen);
515     if (CMI_MAGIC(msg) != CHARM_MAGIC_NUMBER) { /* received a non-charm msg */
516         CmiAbort("Charm++ Warning: Non Charm++ Message Received. \n");
517         return;
518     }
519 #endif
520
521     handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);
522
523     outstanding_recvs--;
524     MACHSTATE(2,"} recv_done end ");
525     return;
526 }
527
528 void short_pkt_recv (void             * clientdata,
529                      const DCQuad     * info,
530                      unsigned           count,
531                      unsigned           senderrank,
532                      const char       * buffer,
533                      const unsigned     sndlen) {
534     outstanding_recvs ++;
535     int alloc_size = sndlen;
536
537     char * new_buffer = (char *)CmiAlloc(alloc_size);
538     CmiMemcpy (new_buffer, buffer, sndlen);
539
540 #if (DCMF_VERSION_MAJOR >= 2)
541     recv_done (new_buffer, NULL);
542 #else
543     recv_done (new_buffer);
544 #endif
545 }
546
547 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
548         unsigned            count,
549         unsigned            senderrank,
550         const unsigned      sndlen,
551         unsigned            connid,
552         void              * clientdata,
553         unsigned          * rcvlen,
554         char             ** buffer,
555         unsigned          * pw,
556         DCMF_Callback_t   * cb
557                                            ) {
558     outstanding_recvs ++;
559     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
560     /*printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
561     /* printf ("Receiving %d bytes\n", sndlen); */
562     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
563
564     *buffer = (char *)CmiAlloc(alloc_size);
565     cb->function = recv_done;
566     cb->clientdata = *buffer;
567
568     *pw  = 0x7fffffff;
569     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
570 }
571
572 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
573                                       const DCQuad      * info,
574                                       unsigned            count,
575                                       unsigned            senderrank,
576                                       const unsigned      sndlen,
577                                       unsigned          * rcvlen,
578                                       char             ** buffer,
579                                       DCMF_Callback_t   * cb
580                                      ) {
581     outstanding_recvs ++;
582     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
583     /* printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
584     /* printf ("Receiving %d bytes\n", sndlen); */
585     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
586
587     *buffer = (char *)CmiAlloc(alloc_size);
588     cb->function = recv_done;
589     cb->clientdata = *buffer;
590
591     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
592 }
593
594 #if 0
595 /* -----------------------------------------
596  * Rectangular broadcast implementation
597  * -----------------------------------------
598  */
599 unsigned int *ranklist;
600 BGTsC_t        barrier;
601 #define MAX_COMM  256
602 static void * comm_table [MAX_COMM];
603
604 typedef struct rectbcast_msg {
605     BGTsRC_t           request;
606     DCMF_Callback_t    cb;
607     char              *msg;
608 } RectBcastInfo;
609
610
611 static void bcast_done (void *data) {
612     RectBcastInfo *rinfo = (RectBcastInfo *) data;
613     CmiFree (rinfo->msg);
614     free (rinfo);
615 }
616
617 static  void *   getRectBcastRequest (unsigned comm) {
618     return comm_table [comm];
619 }
620
621
622 static  void *  bcast_recv     (unsigned               root,
623                                 unsigned               comm,
624                                 const unsigned         sndlen,
625                                 unsigned             * rcvlen,
626                                 char                ** rcvbuf,
627                                 DCMF_Callback_t      * const cb) {
628
629     int alloc_size = sndlen + sizeof(BGTsRC_t) + 16;
630
631     *rcvlen = sndlen;  /* to avoid malloc(0) which might
632                                    return NULL */
633
634     *rcvbuf       =  (char *)CmiAlloc(alloc_size);
635     cb->function  =   recv_done;
636     cb->clientdata = *rcvbuf;
637
638     return (BGTsRC_t *) ALIGN_16 (*rcvbuf + sndlen);
639
640 }
641
642
643 extern void bgl_machine_RectBcast (unsigned                 commid,
644                                    const char             * sndbuf,
645                                    unsigned                 sndlen) {
646     RectBcastInfo *rinfo  =   (RectBcastInfo *) malloc (sizeof(RectBcastInfo));
647     rinfo->cb.function    =   bcast_done;
648     rinfo->cb.clientdata  =   rinfo;
649
650     BGTsRC_AsyncBcast_start (commid, &rinfo->request, &rinfo->cb, sndbuf, sndlen);
651
652 }
653
654 extern void        bgl_machine_RectBcastInit  (unsigned               commID,
655         const BGTsRC_Geometry_t* geometry) {
656
657     CmiAssert (commID < 256);
658     CmiAssert (comm_table [commID] == NULL);
659
660     BGTsRC_t *request =  (BGTsRC_t *) malloc (sizeof (BGTsRC_t));
661     comm_table [commID] = request;
662
663     BGTsRC_AsyncBcast_init  (request, commID,  geometry);
664 }
665
666 /*--------------------------------------------------------------
667  *----- End Rectangular Broadcast Implementation ---------------
668  *--------------------------------------------------------------*/
669 #endif
670
671
672 /*######End of functions related with Communication-Op functions ######*/
673
674
675 /* ######Beginning of functions related with communication progress ###### */
676 static INLINE_KEYWORD void AdvanceCommunicationForDCMF() {
677 #if CMK_SMP
678     DCMF_CriticalSection_enter (0);
679 #endif
680
681     while (DCMF_Messager_advance()>0);
682     //DCMF_Messager_advance();
683
684 #if CMK_SMP
685     DCMF_CriticalSection_exit (0);
686 #endif
687 }
688 /* ######End of functions related with communication progress ###### */
689
690 static void MachinePostNonLocalForDCMF() {
691     /* None here */
692 }
693
694 /* Network progress function is used to poll the network when for
695    messages. This flushes receive buffers on some  implementations*/
696 #if CMK_MACHINE_PROGRESS_DEFINED
697 void CmiMachineProgressImpl() {
698     AdvanceCommunicationForDCMF();
699 #if CMK_IMMEDIATE_MSG
700     CmiHandleImmediate();
701 #endif
702 }
703 #endif
704
705 /* ######Beginning of functions related with exiting programs###### */
706 static void DrainResourcesForDCMF() {
707     while (msgQueueLen > 0 || outstanding_recvs > 0) {
708         AdvanceCommunicationForDCMF();
709     }
710 }
711
712 static void MachineExitForDCMF() {
713     DCMF_Messager_finalize();
714     exit(EXIT_SUCCESS);
715 }
716 /* ######End of functions related with exiting programs###### */
717
718
719 /* ######Beginning of functions related with starting programs###### */
720 /**
721  *  Obtain the number of nodes, my node id, and consuming machine layer
722  *  specific arguments
723  */
724 static void MachineInitForDCMF(int *argc, char ***argv, int *numNodes, int *myNodeID) {
725
726     DCMF_Messager_initialize();
727
728 #if CMK_SMP
729     DCMF_Configure_t  config_in, config_out;
730     config_in.thread_level= DCMF_THREAD_MULTIPLE;
731     config_in.interrupts  = DCMF_INTERRUPTS_OFF;
732
733     DCMF_Messager_configure(&config_in, &config_out);
734     //assert (config_out.thread_level == DCMF_THREAD_MULTIPLE); //not supported in vn mode
735 #endif
736
737     DCMF_Send_Configuration_t short_config, eager_config, rzv_config;
738
739
740     short_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
741     short_config.cb_recv_short = short_pkt_recv;
742     short_config.cb_recv       = first_pkt_recv_done;
743
744 #if (DCMF_VERSION_MAJOR >= 3)
745     short_config.network  = DCMF_DEFAULT_NETWORK;
746 #elif (DCMF_VERSION_MAJOR == 2)
747     short_config.network  = DCMF_DefaultNetwork;
748 #endif
749
750     eager_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
751     eager_config.cb_recv_short = short_pkt_recv;
752     eager_config.cb_recv       = first_pkt_recv_done;
753 #if (DCMF_VERSION_MAJOR >= 3)
754     eager_config.network  = DCMF_DEFAULT_NETWORK;
755 #elif (DCMF_VERSION_MAJOR == 2)
756     eager_config.network  = DCMF_DefaultNetwork;
757 #endif
758
759 #ifdef  OPT_RZV
760 #warning "Enabling Optimize Rzv"
761     rzv_config.protocol        = DCMF_RZV_SEND_PROTOCOL;
762 #else
763     rzv_config.protocol        = DCMF_DEFAULT_SEND_PROTOCOL;
764 #endif
765     rzv_config.cb_recv_short   = short_pkt_recv;
766     rzv_config.cb_recv         = first_pkt_recv_done;
767 #if (DCMF_VERSION_MAJOR >= 3)
768     rzv_config.network  = DCMF_DEFAULT_NETWORK;
769 #elif (DCMF_VERSION_MAJOR == 2)
770     rzv_config.network  = DCMF_DefaultNetwork;
771 #endif
772
773     DCMF_Send_register (&cmi_dcmf_short_registration, &short_config);
774     DCMF_Send_register (&cmi_dcmf_eager_registration, &eager_config);
775     DCMF_Send_register (&cmi_dcmf_rzv_registration,   &rzv_config);
776
777 #ifdef BGP_USE_AM_DIRECT
778     DCMF_Send_Configuration_t direct_config;
779     direct_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
780     direct_config.cb_recv_short = direct_short_pkt_recv;
781     direct_config.cb_recv       = direct_first_pkt_recv_done;
782 #if (DCMF_VERSION_MAJOR >= 3)
783     direct_config.network  = DCMF_DEFAULT_NETWORK;
784 #elif (DCMF_VERSION_MAJOR == 2)
785     direct_config.network  = DCMF_DefaultNetwork;
786 #endif
787     DCMF_Send_register (&cmi_dcmf_direct_registration,   &direct_config);
788     directcb.function=direct_send_done_cb;
789     directcb.clientdata=NULL;
790 #endif
791
792 #ifdef BGP_USE_RDMA_DIRECT
793     /* notification protocol */
794     DCMF_Send_Configuration_t direct_rdma_config;
795     direct_rdma_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
796     direct_rdma_config.cb_recv_short = direct_short_rdma_pkt_recv;
797     direct_rdma_config.cb_recv       = direct_first_rdma_pkt_recv_done;
798 #if (DCMF_VERSION_MAJOR >= 3)
799     direct_rdma_config.network  = DCMF_DEFAULT_NETWORK;
800 #elif (DCMF_VERSION_MAJOR == 2)
801     direct_rdma_config.network  = DCMF_DefaultNetwork;
802 #endif
803     DCMF_Send_register (&cmi_dcmf_direct_rdma_registration,   &direct_rdma_config);
804     directcb.function=direct_send_rdma_done_cb;
805     directcb.clientdata=NULL;
806     /* put protocol */
807     DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL };
808     DCMF_Put_register (&cmi_dcmf_direct_put_registration, &put_configuration);
809     DCMF_Get_Configuration_t get_configuration = { DCMF_DEFAULT_GET_PROTOCOL };
810     DCMF_Get_register (&cmi_dcmf_direct_get_registration, &get_configuration);
811
812 #endif
813     //fprintf(stderr, "Initializing Eager Protocol\n");
814
815     *numNodes = DCMF_Messager_size();
816     *myNodeID = DCMF_Messager_rank();
817
818     CmiBarrier();
819     CmiBarrier();
820     CmiBarrier();
821
822     /* NOTE: the following codes requires #PEs, which is not available
823      * until this function finishes. And it allocate O(p) space */
824     int totalPEs = _Cmi_mynodesize * (*numNodes);
825     DCMF_Multicast_Configuration_t mconfig;
826     mconfig.protocol = DCMF_MEMFIFO_DMA_MSEND_PROTOCOL;
827     mconfig.cb_recv  = first_multi_pkt_recv_done;
828     mconfig.clientdata = NULL;
829     mconfig.connectionlist = (void **) malloc (totalPEs * sizeof(unsigned long));
830     mconfig.nconnections = totalPEs;
831     DCMF_Multicast_register(&cmi_dcmf_multicast_registration, &mconfig);
832
833     int actualNodeSize = _Cmi_mynodesize;
834 #if !CMK_SMP_NO_COMMTHD
835     actualNodeSize++; //considering the extra comm thread
836 #endif
837     int i;
838     procState = (ProcState *)CmiAlloc((actualNodeSize) * sizeof(ProcState));
839     for (i=0; i<actualNodeSize; i++) {
840         /*    procState[i].sendMsgBuf = PCQueueCreate();   */
841         procState[i].recvLock = CmiCreateLock();
842         procState[i].bcastLock = CmiCreateLock();
843     }
844
845     /* checksum flag */
846     if (CmiGetArgFlag(*argv,"+checksum")) {
847 #if CMK_ERROR_CHECKING
848         checksum_flag = 1;
849         if (*myNodeID == 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
850 #else
851         if (*myNodeID == 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
852 #endif
853     }
854
855 }
856
857 static void MachinePreCommonInitForDCMF(int everReturn) {
858     CpvInitialize(PCQueue, smsg_list_q);
859     CpvAccess(smsg_list_q) = PCQueueCreate();
860 }
861
862 static void MachinePostCommonInitForDCMF(int everReturn) {
863 #if !CMK_SMP || CMK_SMP_NO_COMMTHD
864     CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)CmiNotifyIdle,NULL);
865 #endif
866
867     CmiBarrier();
868 }
869 /* ######End of functions related with starting programs###### */
870
871 /***********************************************************************
872  *
873  * Abort function:
874  *
875  ************************************************************************/
876
877 void CmiAbort(const char *message) {
878     CmiError("------------- Processor %d Exiting: Called CmiAbort ------------\n"
879              "{snd:%d,rcv:%d} Reason: %s\n",CmiMyPe(),
880              msgQueueLen, outstanding_recvs, message);
881
882 #if 0
883     /* Since it's a abort, why bother to drain the resources? The system
884      * should clean it self
885      */
886     /* FIXME: what happens in the SMP mode??? */
887     DrainResourcesForDCMF();
888 #endif
889     assert(0);
890 }
891
892
893 /*********** Beginning of MULTICAST/VECTOR SENDING FUNCTIONS **************/
894 /*
895
896  * In relations to some flags, some other delivery functions may be needed.
897  */
898
899 #if !CMK_MULTICAST_LIST_USE_COMMON_CODE
900
901 void CmiSyncListSendFn(int npes, int *pes, int size, char *msg) {
902     char *copymsg = CopyMsg(msg, size);
903     CmiFreeListSendFn(npes, pes, size, copymsg);
904 }
905
906 /* This optimized multicast only helps NAMD when #atoms/CPU is
907  * less than 10 according to Sameer Kumar. So it is off in
908  * default.
909  */
910 #define OPTIMIZED_MULTICAST  0
911
912 #if OPTIMIZED_MULTICAST
913 #warning "Using Optimized Multicast"
914 #endif
915
916 void CmiFreeListSendFn(int npes, int *pes, int size, char *msg) {
917     CmiAssert(npes>=1);
918     if (npes==1) {
919         CmiFreeSendFn(pes[0], size, msg);
920         return;
921     }
922
923     //if(CmiMyRank()==CmiMyNodeSize()) printf("CmiFreeListSendFn on comm thd on node %d\n", CmiMyNode());
924     //printf("%d: In Free List Send Fn\n", CmiMyPe());
925
926     int i;
927 #if OPTIMIZED_MULTICAST
928     int *newpelist = (int *)malloc(sizeof(int)*npes);
929     int new_npes = npes;
930     memcpy(newpelist, pes, sizeof(int)*npes);
931 #if CMK_SMP
932     new_npes = 0;
933     for (i=0; i<npes; i++) {
934         if (CmiNodeOf(pes[i]) == CmiMyNode()) {
935             CmiSyncSend(pes[i], size, msg);
936         } else {
937             newpelist[new_npes++] = pes[i];
938         }
939     }
940     if (new_npes == 0) {
941         CmiFree(msg);
942         return;
943     }
944 #endif
945
946     CMI_SET_BROADCAST_ROOT(msg,0);
947 #if !CMK_SMP
948     CMI_DEST_RANK(msg) = 0;
949 #else
950 #error optimized multicast should not be enabled in SMP mode
951 #endif
952
953     CQdCreate(CpvAccess(cQdState), new_npes);
954     machineMulticast (new_npes, newpelist, size, msg);
955 #else /* non-optimized multicast */
956
957     for (i=0; i<npes-1; i++) {
958 #if !CMK_SMP
959         CmiReference(msg);
960         CmiFreeSendFn(pes[i], size, msg);
961 #else
962     CmiSyncSend(pes[i], size, msg);
963 #endif
964     }
965     CmiFreeSendFn(pes[npes-1], size, msg);
966 #endif /* end of #if OPTIMIZED_MULTICAST */
967 }
968 #endif /* end of #if !CMK_MULTICAST_LIST_USE_COMMON_CODE */
969
970 /*********** End of MULTICAST/VECTOR SENDING FUNCTIONS **************/
971
972 /**************************  TIMER FUNCTIONS **************************/
973
974 /************Barrier Related Functions****************/
975 /* Barrier related functions */
976 /*TODO: does DCMF provide any Barrrier related functions ??? --Chao Mei */
977 /* Barrier needs to be implemented!!! -Chao Mei */
978 /* These two barriers are only needed by CmiTimerInit to synchronize all the
979    threads. They do not need to provide a general barrier. */
980 int CmiBarrier() {
981     return 0;
982 }
983 int CmiBarrierZero() {
984     return 0;
985 }
986
987 #include "manytomany.c"
988
989 /*********************************************************************************************
990 This section is for CmiDirect. This is a variant of the  persistent communication in which
991 the user can transfer data between processors without using Charm++ messages. This lets the user
992 send and receive data from the middle of his arrays without any copying on either send or receive
993 side
994 *********************************************************************************************/
995
996
997 #ifdef BGP_USE_AM_DIRECT
998
999 #include "cmidirect.h"
1000
1001 /* We can avoid a receiver side lookup by just sending the whole shebang.
1002    DCMF header is in units of quad words (16 bytes), so we'd need less than a
1003    quad word for the handle if we just sent that and did a lookup. Or exactly
1004    2 quad words for the buffer pointer, callback pointer, callback
1005    data pointer, and DCMF_Request_t pointer with no lookup.
1006
1007    Since CmiDirect is generally going to be used for messages which aren't
1008    tiny, the extra 16 bytes is not likely to impact performance noticably and
1009    not having to lookup handles in tables simplifies the code enormously.
1010
1011    EJB   2008/4/2
1012 */
1013
1014
1015 /**
1016  To be called on the receiver to create a handle and return its number
1017 **/
1018 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1019     /* with two-sided primitives we just bundle the buffer and callback info into the handle so the sender can remind us about it later. */
1020     struct infiDirectUserHandle userHandle;
1021     userHandle.handle=1; /* doesn't matter on BG/P*/
1022     userHandle.senderNode=senderNode;
1023     userHandle.recverNode=_Cmi_mynode;
1024     userHandle.recverBufSize=recvBufSize;
1025     userHandle.recverBuf=recvBuf;
1026     userHandle.initialValue=initialValue;
1027     userHandle.callbackFnPtr=callbackFnPtr;
1028     userHandle.callbackData=callbackData;
1029     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1030 #if CMI_DIRECT_DEBUG
1031     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1032 #endif
1033     return userHandle;
1034 }
1035
1036 /****
1037  To be called on the sender to attach the sender's buffer to this handle
1038 ******/
1039
1040 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1041
1042     /* one-sided primitives would require registration of memory */
1043
1044     /* with two-sided primitives we just record the sender buf in the handle */
1045     userHandle->senderBuf=sendBuf;
1046     CmiAssert(sendBufSize==userHandle->recverBufSize);
1047     userHandle->DCMF_rq_tsend = (DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1048 #if CMI_DIRECT_DEBUG
1049     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1050 #endif
1051
1052 }
1053
1054 /****
1055 To be called on the sender to do the actual data transfer
1056 ******/
1057 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1058     /** invoke a DCMF_Send with the direct callback */
1059     DCMF_Protocol_t *protocol = NULL;
1060     protocol = &cmi_dcmf_direct_registration;
1061     /* local copy */
1062     CmiAssert(userHandle->recverBuf!=NULL);
1063     CmiAssert(userHandle->senderBuf!=NULL);
1064     CmiAssert(userHandle->recverBufSize>0);
1065     if (userHandle->recverNode== _Cmi_mynode) {
1066 #if CMI_DIRECT_DEBUG
1067         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1068 #endif
1069
1070         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1071         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1072     } else {
1073         dcmfDirectMsgHeader msgHead;
1074         msgHead.recverBuf=userHandle->recverBuf;
1075         msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1076         msgHead.callbackData=userHandle->callbackData;
1077         msgHead.DCMF_rq_t=(DCMF_Request_t *) userHandle->DCMF_rq_trecv;
1078 #if CMK_SMP
1079         DCMF_CriticalSection_enter (0);
1080 #endif
1081 #if CMI_DIRECT_DEBUG
1082         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1083 #endif
1084         DCMF_Send (protocol,
1085                    (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1086                    directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1087                    userHandle->recverBufSize, userHandle->senderBuf,
1088                    (struct DCQuad *) &(msgHead), 2);
1089
1090 #if CMK_SMP
1091         DCMF_CriticalSection_exit (0);
1092 #endif
1093     }
1094 }
1095
1096 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1097     CmiAbort("Not Implemented, switch to #define BGP_USE_RDMA_DIRECT");
1098 }
1099
1100 /**** up to the user to safely call this */
1101 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1102     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1103 #if CMK_SMP
1104     DCMF_CriticalSection_enter (0);
1105 #endif
1106     CmiFree(userHandle->DCMF_rq_tsend);
1107 #if CMK_SMP
1108     DCMF_CriticalSection_exit (0);
1109 #endif
1110
1111 }
1112
1113 /**** up to the user to safely call this */
1114 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1115     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1116 #if CMK_SMP
1117     DCMF_CriticalSection_enter (0);
1118 #endif
1119     CmiFree(userHandle->DCMF_rq_trecv);
1120
1121 #if CMK_SMP
1122     DCMF_CriticalSection_exit (0);
1123 #endif
1124 }
1125
1126
1127 /**** Should not be called the first time *********/
1128 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1129     /* no op on BGP */
1130 }
1131
1132 /**** Should not be called the first time *********/
1133 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1134     /* no op on BGP */
1135 }
1136
1137 /**** Should not be called the first time *********/
1138 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1139     /* no op on BGP */
1140 }
1141
1142 #endif /* BGP_USE_AM_DIRECT*/
1143
1144 #ifdef BGP_USE_RDMA_DIRECT
1145
1146 #include "cmidirect.h"
1147
1148 /*
1149    Notification protocol passes callback function and data in a single
1150    quadword.  This occurs in a message triggered by the sender side ack
1151    callback and therefore has higher latency than polling, but is guaranteed
1152    to be semantically correct.  The latency for a single packet that isn't
1153    hitting charm/converse should be pretty minimal, but you could run into
1154    sender side progress issues.  The alternative of polling on the out of band
1155    byte scheme creates correctness issues in that the data really has to be
1156    out of band and you rely on the buffer being written in order.  It also has
1157    annoying polling issues.  A third scheme could add a second put to a
1158    control region to poll upon and force sequential consistency between
1159    puts. Its not really clear that this would be faster or avoid the progress
1160    issue since you run into the same issues to enforce that sequential
1161    consistency.
1162
1163    EJB   2011/1/20
1164 */
1165
1166
1167 /* local function to use the ack as our signal to send a remote notify */
1168 static void CmiNotifyRemoteRDMA(void *handle, struct DCMF_Error_t *error) {
1169     struct infiDirectUserHandle *userHandle= (struct infiDirectUserHandle *) handle;
1170     dcmfDirectRDMAMsgHeader msgHead;
1171     msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1172     msgHead.callbackData=userHandle->callbackData;
1173 #if CMK_SMP
1174     DCMF_CriticalSection_enter (0);
1175 #endif
1176 #if CMI_DIRECT_DEBUG
1177     CmiPrintf("[%d] RDMA notify put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p \n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1178 #endif
1179     DCMF_Result res=DCMF_Send (&cmi_dcmf_direct_rdma_registration,
1180                                userHandle->DCMF_rq_tsend,
1181                                directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1182                                sizeof(dcmfDirectRDMAMsgHeader),
1183
1184                                userHandle->DCMF_notify_buf,
1185                                (struct DCQuad *) &(msgHead), 1);
1186 //    CmiAssert(res==DCMF_SUCCESS);
1187 #if CMK_SMP
1188     DCMF_CriticalSection_exit (0);
1189 #endif
1190 }
1191
1192 /**
1193  To be called on the receiver to create a handle and return its number
1194 **/
1195
1196
1197 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1198     /* one-sided primitives require registration of memory */
1199     struct infiDirectUserHandle userHandle;
1200     size_t numbytesRegistered=0;
1201     DCMF_Result regresult=DCMF_Memregion_create( &userHandle.DCMF_recverMemregion,
1202                           &numbytesRegistered,
1203                           recvBufSize,
1204                           recvBuf,
1205                           0);
1206     CmiAssert(numbytesRegistered==recvBufSize);
1207     CmiAssert(regresult==DCMF_SUCCESS);
1208
1209
1210     userHandle.handle=1; /* doesn't matter on BG/P*/
1211     userHandle.senderNode=senderNode;
1212     userHandle.recverNode=_Cmi_mynode;
1213     userHandle.recverBufSize=recvBufSize;
1214     userHandle.recverBuf=recvBuf;
1215     userHandle.initialValue=initialValue;
1216     userHandle.callbackFnPtr=callbackFnPtr;
1217     userHandle.callbackData=callbackData;
1218     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1219 #if CMI_DIRECT_DEBUG
1220     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1221 #endif
1222     return userHandle;
1223 }
1224
1225 /****
1226  To be called on the sender to attach the sender's buffer to this handle
1227 ******/
1228
1229 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1230     /* one-sided primitives would require registration of memory */
1231     userHandle->senderBuf=sendBuf;
1232     CmiAssert(sendBufSize==userHandle->recverBufSize);
1233     userHandle->DCMF_rq_tsend =(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1234     size_t numbytesRegistered=0;  // set as return value from create
1235     userHandle->DCMF_notify_buf=ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+32));
1236     userHandle->DCMF_notify_cb.function=CmiNotifyRemoteRDMA;
1237     userHandle->DCMF_notify_cb.clientdata=userHandle;
1238     DCMF_Result regresult=DCMF_Memregion_create( &userHandle->DCMF_senderMemregion,
1239                           &numbytesRegistered,
1240                           sendBufSize,
1241                           sendBuf,
1242                           0);
1243     CmiAssert(numbytesRegistered==sendBufSize);
1244     CmiAssert(regresult==DCMF_SUCCESS);
1245
1246 #if CMI_DIRECT_DEBUG
1247     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1248 #endif
1249
1250 }
1251
1252
1253 /****
1254 To be called on the sender to do the actual data transfer
1255 ******/
1256 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1257     /** invoke a DCMF_Put with the direct callback */
1258
1259     CmiAssert(userHandle->recverBuf!=NULL);
1260     CmiAssert(userHandle->senderBuf!=NULL);
1261     CmiAssert(userHandle->recverBufSize>0);
1262     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1263 #if CMI_DIRECT_DEBUG
1264         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1265 #endif
1266
1267         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1268         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1269     } else {
1270 #if CMK_SMP
1271         DCMF_CriticalSection_enter (0);
1272 #endif
1273 #if CMI_DIRECT_DEBUG
1274         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1275 #endif
1276         DCMF_Result
1277         Res= DCMF_Put(&cmi_dcmf_direct_put_registration,
1278                       userHandle->DCMF_rq_tsend,
1279                       directcb, DCMF_RELAXED_CONSISTENCY,
1280                       userHandle->recverNode,
1281                       userHandle->recverBufSize,
1282                       &userHandle->DCMF_senderMemregion,
1283                       &userHandle->DCMF_recverMemregion,
1284                       0, /* offsets are zero */
1285                       0,
1286                       userHandle->DCMF_notify_cb
1287                      );
1288         CmiAssert(Res==DCMF_SUCCESS);
1289 #if CMK_SMP
1290         DCMF_CriticalSection_exit (0);
1291 #endif
1292     }
1293 }
1294
1295 /****
1296 To be called on the receiver to initiate the actual data transfer
1297 ******/
1298 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1299     /** invoke a DCMF_Get with the direct callback */
1300
1301     CmiAssert(userHandle->recverBuf!=NULL);
1302     CmiAssert(userHandle->senderBuf!=NULL);
1303     CmiAssert(userHandle->recverBufSize>0);
1304     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1305 #if CMI_DIRECT_DEBUG
1306         CmiPrintf("[%d] RDMA local get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1307 #endif
1308
1309         CmiMemcpy(userHandle->senderBuf,userHandle->recverBuf,userHandle->recverBufSize);
1310         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1311     } else {
1312         struct DCMF_Callback_t done_cb;
1313         done_cb.function=userHandle->callbackFnPtr;
1314         done_cb.clientdata=userHandle->callbackData;
1315 #if CMK_SMP
1316         DCMF_CriticalSection_enter (0);
1317 #endif
1318 #if CMI_DIRECT_DEBUG
1319         CmiPrintf("[%d] RDMA get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1320 #endif
1321         DCMF_Result
1322         Res= DCMF_Get(&cmi_dcmf_direct_get_registration,
1323                       (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1324                       done_cb, DCMF_RELAXED_CONSISTENCY,
1325                       userHandle->recverNode,
1326                       userHandle->recverBufSize,
1327                       & userHandle->DCMF_recverMemregion,
1328                       & userHandle->DCMF_senderMemregion,
1329                       0, /* offsets are zero */
1330                       0
1331                      );
1332         CmiAssert(Res==DCMF_SUCCESS);
1333
1334
1335 #if CMK_SMP
1336         DCMF_CriticalSection_exit (0);
1337 #endif
1338     }
1339 }
1340
1341 /**** up to the user to safely call this */
1342 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1343     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1344 #if CMK_SMP
1345     DCMF_CriticalSection_enter (0);
1346 #endif
1347
1348     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_senderMemregion);
1349     CmiFree(userHandle->DCMF_notify_buf);
1350     CmiFree(userHandle->DCMF_rq_tsend);
1351 #if CMK_SMP
1352     DCMF_CriticalSection_exit (0);
1353 #endif
1354
1355 }
1356
1357 /**** up to the user to safely call this */
1358 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1359     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1360 #if CMK_SMP
1361     DCMF_CriticalSection_enter (0);
1362 #endif
1363
1364     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_recverMemregion);
1365     CmiFree(userHandle->DCMF_rq_trecv);
1366
1367 #if CMK_SMP
1368     DCMF_CriticalSection_exit (0);
1369 #endif
1370 }
1371
1372
1373
1374 /**** Should not be called the first time *********/
1375 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1376     /* no op on BGP */
1377 }
1378
1379 /**** Should not be called the first time *********/
1380 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1381     /* no op on BGP */
1382 }
1383
1384 /**** Should not be called the first time *********/
1385 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1386     /* no op on BGP */
1387 }
1388
1389 #endif /* BGP_USE_RDMA_DIRECT*/
1390
1391 /*@}*/
1392