fixed typos in re-defining macros
[charm.git] / src / arch / bluegenep / machine.c
1 #include <stdio.h>
2 #include <errno.h>
3 #include <stdlib.h>
4 #include <unistd.h>
5 #include <math.h>
6 #include <string.h>
7 #include <malloc.h>
8 #include <assert.h>
9
10 #include "converse.h"
11 #include "machine.h"
12 #include "pcqueue.h"
13
14 #include <bpcore/ppc450_inlines.h>
15 #include "dcmf.h"
16 #include "dcmf_multisend.h"
17
18 /* =======Beginning of Definitions of Performance-Specific Macros =======*/
19 /* =======End of Definitions of Performance-Specific Macros =======*/
20
21 /* =======Beginning of Definitions of Msg Header Specific Macros =======*/
22 /* =======End of Definitions of Msg Header Specific Macros =======*/
23
24 /* =====Beginning of Definitions of Message-Corruption Related Macros=====*/
25 #define CMI_MAGIC(msg)                   ((CmiMsgHeaderBasic *)msg)->magic
26 #define CHARM_MAGIC_NUMBER               126
27
28 #if CMK_ERROR_CHECKING
29 static int checksum_flag = 0;
30 extern unsigned char computeCheckSum(unsigned char *data, int len);
31
32 #define CMI_SET_CHECKSUM(msg, len)      \
33         if (checksum_flag)  {   \
34           ((CmiMsgHeaderBasic *)msg)->cksum = 0;        \
35           ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len);        \
36         }
37
38 #define CMI_CHECK_CHECKSUM(msg, len)    \
39         if (checksum_flag)      \
40           if (computeCheckSum((unsigned char*)msg, len) != 0)  { \
41             printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
42             { \
43             int count; \
44             for(count = 0; count < len; count++) { \
45                 printf("%2x", msg[count]);                 \
46             } \
47             }                                             \
48             printf("------------------------------\n\n"); \
49             CmiAbort("Fatal error: checksum doesn't agree!\n"); \
50           }
51 #else
52 #define CMI_SET_CHECKSUM(msg, len)
53 #define CMI_CHECK_CHECKSUM(msg, len)
54 #endif
55 /* =====End of Definitions of Message-Corruption Related Macros=====*/
56
57
58 /* =====Beginning of Declarations of Machine Specific Variables===== */
59 typedef struct ProcState {
60     /* PCQueue      sendMsgBuf; */      /* per processor message sending queue */
61     CmiNodeLock  recvLock;              /* for cs->recv */
62     CmiNodeLock bcastLock;
63 } ProcState;
64
65 static ProcState  *procState;
66
67 volatile int msgQueueLen;
68 volatile int outstanding_recvs;
69
70 DCMF_Protocol_t  cmi_dcmf_short_registration __attribute__((__aligned__(16)));
71 DCMF_Protocol_t  cmi_dcmf_eager_registration __attribute__((__aligned__(16)));
72 DCMF_Protocol_t  cmi_dcmf_rzv_registration   __attribute__((__aligned__(16)));
73 DCMF_Protocol_t  cmi_dcmf_multicast_registration   __attribute__((__aligned__(16)));
74
75
76 typedef struct msg_list {
77     char              * msg;
78 //    int                 size;
79 //    int                 destpe;
80     int               * pelist;
81 //    DCMF_Callback_t     cb;
82 //    DCQuad              info __attribute__((__aligned__(16)));
83     DCMF_Request_t      send __attribute__((__aligned__(16)));
84 } SMSG_LIST __attribute__((__aligned__(16)));
85
86 #define MAX_NUM_SMSGS   64
87 CpvDeclare(PCQueue, smsg_list_q);
88 static SMSG_LIST * smsg_allocate();
89 static void smsg_free (SMSG_LIST *smsg);
90
91 /* =====End of Declarations of Machine Specific Variables===== */
92
93
94 /* =====Beginning of Declarations of Machine Specific Functions===== */
95 /* Utility functions */
96 char *ALIGN_16(char *p) {
97     return((char *)((((unsigned long)p)+0xf)&0xfffffff0));
98 }
99
100 void mysleep (int cycles) { /* approximate sleep command */
101     unsigned long long start = DCMF_Timebase();
102     unsigned long long end = start + cycles;
103     while (start < end)
104         start = DCMF_Timebase();
105     return;
106 }
107 static void SendMsgsUntil(int);
108
109 /* ######Begining of Machine-specific RDMA related functions###### */
110 #define BGP_USE_AM_DIRECT 1
111 /* #define BGP_USE_RDMA_DIRECT 1 */
112 /* #define CMI_DIRECT_DEBUG 1 */
113 #if BGP_USE_AM_DIRECT
114
115 DCMF_Protocol_t  cmi_dcmf_direct_registration __attribute__((__aligned__(16)));
116 /** The receive side of a put implemented in DCMF_Send */
117
118 typedef struct {
119     void *recverBuf;
120     void (*callbackFnPtr)(void *);
121     void *callbackData;
122     DCMF_Request_t *DCMF_rq_t;
123 } dcmfDirectMsgHeader;
124
125 /* nothing for us to do here */
126 #if (DCMF_VERSION_MAJOR >= 2)
127 void direct_send_done_cb(void*nothing, DCMF_Error_t *err)
128 #else
129 void direct_send_done_cb(void*nothing)
130 #endif
131 {
132 #if CMI_DIRECT_DEBUG
133     CmiPrintf("[%d] RDMA send_done_cb\n", CmiMyPe());
134 #endif
135 }
136
137 DCMF_Callback_t  directcb;
138
139 void     direct_short_pkt_recv (void             * clientdata,
140                                 const DCQuad     * info,
141                                 unsigned           count,
142                                 unsigned           senderrank,
143                                 const char       * buffer,
144                                 const unsigned     sndlen) {
145 #if CMI_DIRECT_DEBUG
146     CmiPrintf("[%d] RDMA direct_short_pkt_recv\n", CmiMyPe());
147 #endif
148     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
149     CmiMemcpy(msgHead->recverBuf, buffer, sndlen);
150     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
151 }
152
153
154 #if (DCMF_VERSION_MAJOR >= 2)
155 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
156 #else
157 typedef void (*cbhdlr) (void *);
158 #endif
159
160 DCMF_Request_t * direct_first_pkt_recv_done (void              * clientdata,
161         const DCQuad      * info,
162         unsigned            count,
163         unsigned            senderrank,
164         const unsigned      sndlen,
165         unsigned          * rcvlen,
166         char             ** buffer,
167         DCMF_Callback_t   * cb
168                                             ) {
169 #if CMI_DIRECT_DEBUG
170     CmiPrintf("[%d] RDMA direct_first_pkt_recv_done\n", CmiMyPe());
171 #endif
172     /* pull the data we need out of the header */
173     *rcvlen=sndlen;
174     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
175     cb->function= (cbhdlr)msgHead->callbackFnPtr;
176     cb->clientdata=msgHead->callbackData;
177     *buffer=msgHead->recverBuf;
178     return msgHead->DCMF_rq_t;
179 }
180 #endif /* end of #if BGP_USE_AM_DIRECT */
181
182 #ifdef BGP_USE_RDMA_DIRECT
183 static struct DCMF_Callback_t dcmf_rdma_cb_ack;
184
185 DCMF_Protocol_t  cmi_dcmf_direct_put_registration __attribute__((__aligned__(16)));
186 DCMF_Protocol_t  cmi_dcmf_direct_get_registration __attribute__((__aligned__(16)));
187 DCMF_Protocol_t  cmi_dcmf_direct_rdma_registration __attribute__((__aligned__(16)));
188 /** The receive side of a DCMF_Put notification implemented in DCMF_Send */
189
190 typedef struct {
191     void (*callbackFnPtr)(void *);
192     void *callbackData;
193 } dcmfDirectRDMAMsgHeader;
194
195 #if (DCMF_VERSION_MAJOR >= 2)
196 void direct_send_rdma_done_cb(void*nothing, DCMF_Error_t *err)
197 #else
198 void direct_send_rdma_done_cb(void*nothing)
199 #endif
200 {
201 #if CMI_DIRECT_DEBUG
202     CmiPrintf("[%d] RDMA send_rdma_done_cb result %d\n", CmiMyPe());
203 #endif
204
205
206 }
207
208 DCMF_Callback_t  directcb;
209
210 void     direct_short_rdma_pkt_recv (void             * clientdata,
211                                      const DCQuad     * info,
212                                      unsigned           count,
213                                      unsigned           senderrank,
214                                      const char       * buffer,
215                                      const unsigned     sndlen) {
216 #if CMI_DIRECT_DEBUG
217     CmiPrintf("[%d] RDMA direct_short_rdma_pkt_recv\n", CmiMyPe());
218 #endif
219     dcmfDirectRDMAMsgHeader *msgHead=  (dcmfDirectRDMAMsgHeader *) info;
220     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
221 }
222
223 #if (DCMF_VERSION_MAJOR >= 2)
224 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
225 #else
226 typedef void (*cbhdlr) (void *);
227 #endif
228
229 DCMF_Request_t * direct_first_rdma_pkt_recv_done (void              * clientdata,
230         const DCQuad      * info,
231         unsigned            count,
232         unsigned            senderrank,
233         const unsigned      sndlen,
234         unsigned          * rcvlen,
235         char             ** buffer,
236         DCMF_Callback_t   * cb
237                                                  ) {
238     CmiAbort("direct_first_rdma_pkt_recv should not be called");
239 }
240 #endif /* end of #if BGP_USE_RDMA_DIRECT */
241 /* ######End of Machine-specific RDMA related functions###### */
242
243
244 /* ### Beginning of Communication-Op Related Functions ### */
245 /* The machine-specific send-related function */
246 #if (DCMF_VERSION_MAJOR >= 2)
247 static void send_done(void *data, DCMF_Error_t *err);
248 static void send_multi_done(void *data, DCMF_Error_t *err);
249 #else
250 static void send_done(void *data);
251 static void send_multi_done(void *data);
252 #endif
253 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode);
254 #define CmiMachineSpecificSendFunc MachineSpecificSendForDCMF
255
256 /* The machine-specific recv-related function (on the receiver side) */
257 #if (DCMF_VERSION_MAJOR >= 2)
258 static void recv_done(void *clientdata, DCMF_Error_t * err);
259 #else
260 static void recv_done(void *clientdata);
261 #endif
262 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
263         unsigned            count,
264         unsigned            senderrank,
265         const unsigned      sndlen,
266         unsigned            connid,
267         void              * clientdata,
268         unsigned          * rcvlen,
269         char             ** buffer,
270         unsigned          * pw,
271         DCMF_Callback_t   * cb
272                                            );
273 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
274                                       const DCQuad      * info,
275                                       unsigned            count,
276                                       unsigned            senderrank,
277                                       const unsigned      sndlen,
278                                       unsigned          * rcvlen,
279                                       char             ** buffer,
280                                       DCMF_Callback_t   * cb
281                                      );
282
283 /* ### End of Communication-Op Related Functions ### */
284
285 /* ### Beginning of Machine-startup Related Functions ### */
286 static void MachineInitForDCMF(int argc, char **argv, int *numNodes, int *myNodeID);
287 #define MachineSpecificInit MachineInitForDCMF
288
289 static void MachinePreCommonInitForDCMF(int everReturn);
290 static void MachinePostCommonInitForDCMF(int everReturn);
291 #define MachineSpecificPreCommonInit MachinePreCommonInitForDCMF
292 #define MachineSpecificPostCommonInit MachinePostCommonInitForDCMF
293 /* ### End of Machine-startup Related Functions ### */
294
295 /* ### Beginning of Machine-running Related Functions ### */
296 static void AdvanceCommunicationForDCMF();
297 #define MachineSpecificAdvanceCommunication AdvanceCommunicationForDCMF
298
299 static void DrainResourcesForDCMF();
300 #define MachineSpecificDrainResources DrainResourcesForDCMF
301
302 static void MachineExitForDCMF();
303 #define MachineSpecificExit MachineExitForDCMF
304
305 /* ### End of Machine-running Related Functions ### */
306
307 /* ### Beginning of Idle-state Related Functions ### */
308
309 /* ### End of Idle-state Related Functions ### */
310
311 void MachinePostNonLocalForDCMF();
312 #define MachineSpecificPostNonLocal MachinePostNonLocalForDCMF
313
314 /* =====End of Declarations of Machine Specific Functions===== */
315
316 /**
317  *  Macros that overwrites the common codes, such as
318  *  CMK_SMP_NO_COMMTHD, NETWORK_PROGRESS_PERIOD_DEFAULT,
319  *  USE_COMMON_SYNC_P2P, CMK_HAS_SIZE_IN_MSGHDR,
320  *  CMK_OFFLOAD_BCAST_PROCESS etc.
321  */
322 #define CMK_OFFLOAD_BCAST_PROCESS 1
323 #include "machine-common.c"
324
325 /*######Beginning of functions related with Communication-Op functions ######*/
326
327 /* Utility functions */
328 static inline SMSG_LIST * smsg_allocate() {
329     SMSG_LIST *smsg = (SMSG_LIST *)PCQueuePop(CpvAccess(smsg_list_q));
330     if (smsg != NULL)
331         return smsg;
332
333     void * buf = malloc(sizeof(SMSG_LIST));
334     assert(buf!=NULL);
335     assert (((unsigned)buf & 0x0f) == 0);
336
337     return (SMSG_LIST *) buf;
338 }
339
340 static inline void smsg_free (SMSG_LIST *smsg) {
341     int size = PCQueueLength (CpvAccess(smsg_list_q));
342     if (size < MAX_NUM_SMSGS)
343         PCQueuePush (CpvAccess(smsg_list_q), (char *) smsg);
344     else
345         free (smsg);
346 }
347
348 static void SendMsgsUntil(int targetm) {
349     while (msgQueueLen>targetm) {
350 #if CMK_SMP
351         DCMF_CriticalSection_enter (0);
352 #endif
353
354         while (DCMF_Messager_advance()>0);
355
356 #if CMK_SMP
357         DCMF_CriticalSection_exit (0);
358 #endif
359     }
360 }
361
362 /* Send functions */
363 /* The callback on sender side */
364 #if (DCMF_VERSION_MAJOR >= 2)
365 static void send_done(void *data, DCMF_Error_t *err)
366 #else
367 static void send_done(void *data)
368 #endif
369 /* send done callback: sets the smsg entry to done */
370 {
371     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
372     CmiFree(msg_tmp->msg);
373     smsg_free (msg_tmp);
374     msgQueueLen--;
375 }
376
377 #if (DCMF_VERSION_MAJOR >= 2)
378 static void send_multi_done(void *data, DCMF_Error_t *err)
379 #else
380 static void send_multi_done(void *data)
381 #endif
382 /* send done callback: sets the smsg entry to done */
383 {
384     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
385     CmiFree(msg_tmp->msg);
386     free(msg_tmp->pelist);
387     smsg_free(msg_tmp);
388     msgQueueLen--;
389 }
390
391 /* The machine specific send function */
392 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode) {
393     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
394     //msg_tmp->destpe = destNode;
395     //msg_tmp->size = size;
396     msg_tmp->msg = msg;
397
398     DCMF_Callback_t cb;
399     DCQuad info;
400
401     cb.function = send_done;
402     cb.clientdata = msg_tmp;
403
404
405 #if CMK_ERROR_CHECKING
406     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
407     CMI_SET_CHECKSUM(msg, size);
408 #endif
409     CMI_MSG_SIZE(msg) = size;
410
411     //msg_tmp->cb.function = send_done;
412     //msg_tmp->cb.clientdata   =   msg_tmp;
413
414     DCMF_Protocol_t *protocol = NULL;
415
416     if (size < 224)
417         protocol = &cmi_dcmf_short_registration;
418     else if (size < 2048)
419         protocol = &cmi_dcmf_eager_registration;
420     else
421         protocol = &cmi_dcmf_rzv_registration;
422
423 #if CMK_SMP
424     DCMF_CriticalSection_enter (0);
425 #endif
426
427     msgQueueLen ++;
428     /*
429      * Original one:
430      *     DCMF_Send (protocol, &msg_tmp->send, msg_tmp->cb,
431                    DCMF_MATCH_CONSISTENCY, msg_tmp->destpe,
432                    msg_tmp->size, msg_tmp->msg, &msg_tmp->info, 1);
433            Ref:http://dcmf.anl-external.org/docs/mpi:dcmfd/group__SEND.html
434      */
435     DCMF_Send (protocol, &msg_tmp->send, cb, DCMF_MATCH_CONSISTENCY,
436                destNode, size, msg, &info, 0);
437
438 #if CMK_SMP
439     DCMF_CriticalSection_exit (0);
440 #endif
441
442     return 0;
443 }
444
445 #define MAX_MULTICAST 128
446 DCMF_Opcode_t  CmiOpcodeList [MAX_MULTICAST];
447
448 void  machineMulticast(int npes, int *pelist, int size, char* msg) {
449     CQdCreate(CpvAccess(cQdState), npes);
450
451     CmiAssert (npes < MAX_MULTICAST);
452
453 #if CMK_ERROR_CHECKING
454     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
455     CMI_SET_CHECKSUM(msg, size);
456 #endif
457
458     CMI_MSG_SIZE(msg) = size;
459     CMI_SET_BROADCAST_ROOT(msg,0);
460
461     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
462
463     //msg_tmp->destpe    = -1;      //multicast operation
464     //msg_tmp->size      = size * npes; //keep track of #bytes outstanding
465     msg_tmp->msg       = msg;
466     msg_tmp->pelist    = pelist;
467
468     DCMF_Multicast_t  mcast_info __attribute__((__aligned__(16)));
469     DCQuad info;
470
471     mcast_info.registration   = & cmi_dcmf_multicast_registration;
472     mcast_info.request        = & msg_tmp->send;
473     mcast_info.cb_done.function    =   send_multi_done;
474     mcast_info.cb_done.clientdata  =   msg_tmp;
475     mcast_info.consistency    =   DCMF_MATCH_CONSISTENCY;
476     mcast_info.connection_id  =   CmiMyPe();
477     mcast_info.bytes          =   size;
478     mcast_info.src            =   msg;
479     mcast_info.nranks         =   npes;
480     mcast_info.ranks          =   (unsigned *)pelist;
481     mcast_info.opcodes        =   CmiOpcodeList;   //static list of MAX_MULTICAST entires with 0 in them
482     mcast_info.flags          =   0;
483     mcast_info.msginfo        =   &info;
484     //mcast_info.count          =   1;
485     mcast_info.count          =   0;
486
487 #if CMK_SMP
488     DCMF_CriticalSection_enter (0);
489 #endif
490     msgQueueLen++;
491     DCMF_Multicast (&mcast_info);
492
493 #if CMK_SMP
494     DCMF_CriticalSection_exit (0);
495 #endif
496 }
497
498 /* Recv functions */
499 /* The callback on the recv side */
500 #if (DCMF_VERSION_MAJOR >= 2)
501 static void recv_done(void *clientdata, DCMF_Error_t * err)
502 #else
503 static void recv_done(void *clientdata)
504 #endif
505 /* recv done callback: push the recved msg to recv queue */
506 {
507
508     char *msg = (char *) clientdata;
509
510     /*printf ("NODE[%d] Recv message done with msg rank %d\n", CmiMyNode(), CMI_DEST_RANK(msg));*/
511     MACHSTATE3(2,"[%d] recv_done begin with msg %p size=%d { ", CmiMyNode(), msg, CMI_MSG_SIZE(msg));
512 #if CMK_ERROR_CHECKING
513     int sndlen = CMI_MSG_SIZE(msg);
514     CMI_CHECK_CHECKSUM(msg, sndlen);
515     if (CMI_MAGIC(msg) != CHARM_MAGIC_NUMBER) { /* received a non-charm msg */
516         CmiAbort("Charm++ Warning: Non Charm++ Message Received. \n");
517         return;
518     }
519 #endif
520
521     handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);
522
523     outstanding_recvs--;
524     MACHSTATE(2,"} recv_done end ");
525     return;
526 }
527
528 void short_pkt_recv (void             * clientdata,
529                      const DCQuad     * info,
530                      unsigned           count,
531                      unsigned           senderrank,
532                      const char       * buffer,
533                      const unsigned     sndlen) {
534     outstanding_recvs ++;
535     int alloc_size = sndlen;
536
537     char * new_buffer = (char *)CmiAlloc(alloc_size);
538     CmiMemcpy (new_buffer, buffer, sndlen);
539
540 #if (DCMF_VERSION_MAJOR >= 2)
541     recv_done (new_buffer, NULL);
542 #else
543     recv_done (new_buffer);
544 #endif
545 }
546
547 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
548         unsigned            count,
549         unsigned            senderrank,
550         const unsigned      sndlen,
551         unsigned            connid,
552         void              * clientdata,
553         unsigned          * rcvlen,
554         char             ** buffer,
555         unsigned          * pw,
556         DCMF_Callback_t   * cb
557                                            ) {
558     outstanding_recvs ++;
559     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
560     /*printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
561     /* printf ("Receiving %d bytes\n", sndlen); */
562     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
563
564     *buffer = (char *)CmiAlloc(alloc_size);
565     cb->function = recv_done;
566     cb->clientdata = *buffer;
567
568     *pw  = 0x7fffffff;
569     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
570 }
571
572 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
573                                       const DCQuad      * info,
574                                       unsigned            count,
575                                       unsigned            senderrank,
576                                       const unsigned      sndlen,
577                                       unsigned          * rcvlen,
578                                       char             ** buffer,
579                                       DCMF_Callback_t   * cb
580                                      ) {
581     outstanding_recvs ++;
582     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
583     /* printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
584     /* printf ("Receiving %d bytes\n", sndlen); */
585     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
586
587     *buffer = (char *)CmiAlloc(alloc_size);
588     cb->function = recv_done;
589     cb->clientdata = *buffer;
590
591     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
592 }
593
594 #if 0
595 /* -----------------------------------------
596  * Rectangular broadcast implementation
597  * -----------------------------------------
598  */
599 unsigned int *ranklist;
600 BGTsC_t        barrier;
601 #define MAX_COMM  256
602 static void * comm_table [MAX_COMM];
603
604 typedef struct rectbcast_msg {
605     BGTsRC_t           request;
606     DCMF_Callback_t    cb;
607     char              *msg;
608 } RectBcastInfo;
609
610
611 static void bcast_done (void *data) {
612     RectBcastInfo *rinfo = (RectBcastInfo *) data;
613     CmiFree (rinfo->msg);
614     free (rinfo);
615 }
616
617 static  void *   getRectBcastRequest (unsigned comm) {
618     return comm_table [comm];
619 }
620
621
622 static  void *  bcast_recv     (unsigned               root,
623                                 unsigned               comm,
624                                 const unsigned         sndlen,
625                                 unsigned             * rcvlen,
626                                 char                ** rcvbuf,
627                                 DCMF_Callback_t      * const cb) {
628
629     int alloc_size = sndlen + sizeof(BGTsRC_t) + 16;
630
631     *rcvlen = sndlen;  /* to avoid malloc(0) which might
632                                    return NULL */
633
634     *rcvbuf       =  (char *)CmiAlloc(alloc_size);
635     cb->function  =   recv_done;
636     cb->clientdata = *rcvbuf;
637
638     return (BGTsRC_t *) ALIGN_16 (*rcvbuf + sndlen);
639
640 }
641
642
643 extern void bgl_machine_RectBcast (unsigned                 commid,
644                                    const char             * sndbuf,
645                                    unsigned                 sndlen) {
646     RectBcastInfo *rinfo  =   (RectBcastInfo *) malloc (sizeof(RectBcastInfo));
647     rinfo->cb.function    =   bcast_done;
648     rinfo->cb.clientdata  =   rinfo;
649
650     BGTsRC_AsyncBcast_start (commid, &rinfo->request, &rinfo->cb, sndbuf, sndlen);
651
652 }
653
654 extern void        bgl_machine_RectBcastInit  (unsigned               commID,
655         const BGTsRC_Geometry_t* geometry) {
656
657     CmiAssert (commID < 256);
658     CmiAssert (comm_table [commID] == NULL);
659
660     BGTsRC_t *request =  (BGTsRC_t *) malloc (sizeof (BGTsRC_t));
661     comm_table [commID] = request;
662
663     BGTsRC_AsyncBcast_init  (request, commID,  geometry);
664 }
665
666 /*--------------------------------------------------------------
667  *----- End Rectangular Broadcast Implementation ---------------
668  *--------------------------------------------------------------*/
669 #endif
670
671
672 /*######End of functions related with Communication-Op functions ######*/
673
674
675 /* ######Beginning of functions related with communication progress ###### */
676 static INLINE_KEYWORD void AdvanceCommunicationForDCMF() {
677 #if CMK_SMP
678     DCMF_CriticalSection_enter (0);
679 #endif
680
681     while (DCMF_Messager_advance()>0);
682     //DCMF_Messager_advance();
683
684 #if CMK_SMP
685     DCMF_CriticalSection_exit (0);
686 #endif
687 }
688 /* ######End of functions related with communication progress ###### */
689
690 void MachinePostNonLocalForDCMF() {
691     /* None here */
692 }
693
694 /* Network progress function is used to poll the network when for
695    messages. This flushes receive buffers on some  implementations*/
696 #if CMK_MACHINE_PROGRESS_DEFINED
697 void CmiMachineProgressImpl() {
698     AdvanceCommunicationForDCMF();
699 #if CMK_IMMEDIATE_MSG
700     CmiHandleImmediate();
701 #endif
702 }
703 #endif
704
705 /* ######Beginning of functions related with exiting programs###### */
706 static void DrainResourcesForDCMF() {
707     while (msgQueueLen > 0 || outstanding_recvs > 0) {
708         AdvanceCommunicationForDCMF();
709     }
710 }
711
712 static void MachineExitForDCMF() {
713     DCMF_Messager_finalize();
714     exit(EXIT_SUCCESS);
715 }
716 /* ######End of functions related with exiting programs###### */
717
718
719 /* ######Beginning of functions related with starting programs###### */
720 /**
721  *  Obtain the number of nodes, my node id, and consuming machine layer
722  *  specific arguments
723  */
724 static void MachineInitForDCMF(int argc, char **argv, int *numNodes, int *myNodeID) {
725
726     DCMF_Messager_initialize();
727
728 #if CMK_SMP
729     DCMF_Configure_t  config_in, config_out;
730     config_in.thread_level= DCMF_THREAD_MULTIPLE;
731     config_in.interrupts  = DCMF_INTERRUPTS_OFF;
732
733     DCMF_Messager_configure(&config_in, &config_out);
734     //assert (config_out.thread_level == DCMF_THREAD_MULTIPLE); //not supported in vn mode
735 #endif
736
737     DCMF_Send_Configuration_t short_config, eager_config, rzv_config;
738
739
740     short_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
741     short_config.cb_recv_short = short_pkt_recv;
742     short_config.cb_recv       = first_pkt_recv_done;
743
744 #if (DCMF_VERSION_MAJOR >= 3)
745     short_config.network  = DCMF_DEFAULT_NETWORK;
746 #elif (DCMF_VERSION_MAJOR == 2)
747     short_config.network  = DCMF_DefaultNetwork;
748 #endif
749
750     eager_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
751     eager_config.cb_recv_short = short_pkt_recv;
752     eager_config.cb_recv       = first_pkt_recv_done;
753 #if (DCMF_VERSION_MAJOR >= 3)
754     eager_config.network  = DCMF_DEFAULT_NETWORK;
755 #elif (DCMF_VERSION_MAJOR == 2)
756     eager_config.network  = DCMF_DefaultNetwork;
757 #endif
758
759 #ifdef  OPT_RZV
760 #warning "Enabling Optimize Rzv"
761     rzv_config.protocol        = DCMF_RZV_SEND_PROTOCOL;
762 #else
763     rzv_config.protocol        = DCMF_DEFAULT_SEND_PROTOCOL;
764 #endif
765     rzv_config.cb_recv_short   = short_pkt_recv;
766     rzv_config.cb_recv         = first_pkt_recv_done;
767 #if (DCMF_VERSION_MAJOR >= 3)
768     rzv_config.network  = DCMF_DEFAULT_NETWORK;
769 #elif (DCMF_VERSION_MAJOR == 2)
770     rzv_config.network  = DCMF_DefaultNetwork;
771 #endif
772
773     DCMF_Send_register (&cmi_dcmf_short_registration, &short_config);
774     DCMF_Send_register (&cmi_dcmf_eager_registration, &eager_config);
775     DCMF_Send_register (&cmi_dcmf_rzv_registration,   &rzv_config);
776
777 #ifdef BGP_USE_AM_DIRECT
778     DCMF_Send_Configuration_t direct_config;
779     direct_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
780     direct_config.cb_recv_short = direct_short_pkt_recv;
781     direct_config.cb_recv       = direct_first_pkt_recv_done;
782 #if (DCMF_VERSION_MAJOR >= 3)
783     direct_config.network  = DCMF_DEFAULT_NETWORK;
784 #elif (DCMF_VERSION_MAJOR == 2)
785     direct_config.network  = DCMF_DefaultNetwork;
786 #endif
787     DCMF_Send_register (&cmi_dcmf_direct_registration,   &direct_config);
788     directcb.function=direct_send_done_cb;
789     directcb.clientdata=NULL;
790 #endif
791
792 #ifdef BGP_USE_RDMA_DIRECT
793     /* notification protocol */
794     DCMF_Send_Configuration_t direct_rdma_config;
795     direct_rdma_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
796     direct_rdma_config.cb_recv_short = direct_short_rdma_pkt_recv;
797     direct_rdma_config.cb_recv       = direct_first_rdma_pkt_recv_done;
798 #if (DCMF_VERSION_MAJOR >= 3)
799     direct_rdma_config.network  = DCMF_DEFAULT_NETWORK;
800 #elif (DCMF_VERSION_MAJOR == 2)
801     direct_rdma_config.network  = DCMF_DefaultNetwork;
802 #endif
803     DCMF_Send_register (&cmi_dcmf_direct_rdma_registration,   &direct_rdma_config);
804     directcb.function=direct_send_rdma_done_cb;
805     directcb.clientdata=NULL;
806     /* put protocol */
807     DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL };
808     DCMF_Put_register (&cmi_dcmf_direct_put_registration, &put_configuration);
809     DCMF_Get_Configuration_t get_configuration = { DCMF_DEFAULT_GET_PROTOCOL };
810     DCMF_Get_register (&cmi_dcmf_direct_get_registration, &get_configuration);
811
812 #endif
813     //fprintf(stderr, "Initializing Eager Protocol\n");
814
815     *numNodes = DCMF_Messager_size();
816     *myNodeID = DCMF_Messager_rank();
817
818     CmiBarrier();
819     CmiBarrier();
820     CmiBarrier();
821
822     /* NOTE: the following codes requires #PEs, which is not available
823      * until this function finishes. And it allocate O(p) space */
824     int totalPEs = _Cmi_mynodesize * (*numNodes);
825     DCMF_Multicast_Configuration_t mconfig;
826     mconfig.protocol = DCMF_MEMFIFO_DMA_MSEND_PROTOCOL;
827     mconfig.cb_recv  = first_multi_pkt_recv_done;
828     mconfig.clientdata = NULL;
829     mconfig.connectionlist = (void **) malloc (totalPEs * sizeof(unsigned long));
830     mconfig.nconnections = totalPEs;
831     DCMF_Multicast_register(&cmi_dcmf_multicast_registration, &mconfig);
832
833     int actualNodeSize = _Cmi_mynodesize;
834 #if !CMK_SMP_NO_COMMTHD
835     actualNodeSize++; //considering the extra comm thread
836 #endif
837
838     procState = (ProcState *)CmiAlloc((actualNodeSize) * sizeof(ProcState));
839     for (int i=0; i<actualNodeSize; i++) {
840         /*    procState[i].sendMsgBuf = PCQueueCreate();   */
841         procState[i].recvLock = CmiCreateLock();
842         procState[i].bcastLock = CmiCreateLock();
843     }
844
845     /* checksum flag */
846     if (CmiGetArgFlag(argv,"+checksum")) {
847 #if CMK_ERROR_CHECKING
848         checksum_flag = 1;
849         if (*myNodeID == 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
850 #else
851         if (*myNodeID == 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
852 #endif
853     }
854
855 }
856
857 static void MachinePreCommonInitForDCMF(int everReturn) {
858     CpvInitialize(PCQueue, smsg_list_q);
859     CpvAccess(smsg_list_q) = PCQueueCreate();
860 }
861
862 static void MachinePostCommonInitForDCMF(int everReturn) {
863 #if !CMK_SMP || CMK_SMP_NO_COMMTHD
864     CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)CmiNotifyIdle,NULL);
865 #endif
866
867     CmiBarrier();
868 }
869 /* ######End of functions related with starting programs###### */
870
871 /***********************************************************************
872  *
873  * Abort function:
874  *
875  ************************************************************************/
876
877 void CmiAbort(const char *message) {
878     CmiError("------------- Processor %d Exiting: Called CmiAbort ------------\n"
879              "{snd:%d,rcv:%d} Reason: %s\n",CmiMyPe(),
880              msgQueueLen, outstanding_recvs, message);
881
882 #if 0
883     /* Since it's a abort, why bother to drain the resources? The system
884      * should clean it self
885      */
886     /* FIXME: what happens in the SMP mode??? */
887     DrainResourcesForDCMF();
888 #endif
889     assert(0);
890 }
891
892
893 /*********** Beginning of MULTICAST/VECTOR SENDING FUNCTIONS **************/
894 /*
895
896  * In relations to some flags, some other delivery functions may be needed.
897  */
898
899 #if !CMK_MULTICAST_LIST_USE_COMMON_CODE
900
901 void CmiSyncListSendFn(int npes, int *pes, int size, char *msg) {
902     char *copymsg = CopyMsg(msg, size);
903     CmiFreeListSendFn(npes, pes, size, copymsg);
904 }
905
906 /* Currently disable optimized multicast for non-SMP as it fails
907  * for hybrid ldb in NAMD as reported by Gengbin --Chao Mei
908  */
909 #if !CMK_SMP
910 #define OPTIMIZED_MULTICAST  0
911 #else
912 #define OPTIMIZED_MULTICAST  1
913 #endif
914
915 #if OPTIMIZED_MULTICAST
916 #warning "Using Optimized Multicast"
917 #endif
918
919 void CmiFreeListSendFn(int npes, int *pes, int size, char *msg) {
920     CmiAssert(npes>=1);
921     if (npes==1) {
922         CmiFreeSendFn(pes[0], size, msg);
923         return;
924     }
925
926     //if(CmiMyRank()==CmiMyNodeSize()) printf("CmiFreeListSendFn on comm thd on node %d\n", CmiMyNode());
927     //printf("%d: In Free List Send Fn\n", CmiMyPe());
928
929     int i;
930 #if OPTIMIZED_MULTICAST
931     int *newpelist = pes;
932     int new_npes = npes;
933 #if CMK_SMP
934     newpelist = (int *)malloc(sizeof(int)*npes);
935     new_npes = 0;
936     for (i=0; i<npes; i++) {
937         if (CmiNodeOf(pes[i]) == CmiMyNode()) {
938             CmiSyncSend(pes[i], size, msg);
939         } else {
940             newpelist[new_npes++] = pes[i];
941         }
942     }
943     if (new_npes == 0) {
944         CmiFree(msg);
945         return;
946     }
947 #endif
948
949     CMI_SET_BROADCAST_ROOT(msg,0);
950     CMI_MSG_SIZE(msg) = size;
951 #if CMK_ERROR_CHECKING
952     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
953     CMI_SET_CHECKSUM(msg, size);
954 #endif
955
956     CQdCreate(CpvAccess(cQdState), new_npes);
957     machineMulticast (new_npes, newpelist, size, msg);
958 #else /* non-optimized multicast */
959
960     for (i=0; i<npes-1; i++) {
961 #if !CMK_SMP
962         CmiReference(msg);
963         CmiFreeSendFn(pes[i], size, msg);
964 #else
965     CmiSyncSend(pes[i], size, msg);
966 #endif
967     }
968     CmiFreeSendFn(pes[npes-1], size, msg);
969 #endif /* end of #if OPTIMIZED_MULTICAST */
970 }
971 #endif /* end of #if !CMK_MULTICAST_LIST_USE_COMMON_CODE */
972
973 /*********** End of MULTICAST/VECTOR SENDING FUNCTIONS **************/
974
975 /**************************  TIMER FUNCTIONS **************************/
976
977 /************Barrier Related Functions****************/
978 /* Barrier related functions */
979 /*TODO: does DCMF provide any Barrrier related functions ??? --Chao Mei */
980 /* Barrier needs to be implemented!!! -Chao Mei */
981 /* These two barriers are only needed by CmiTimerInit to synchronize all the
982    threads. They do not need to provide a general barrier. */
983 int CmiBarrier() {
984     return 0;
985 }
986 int CmiBarrierZero() {
987     return 0;
988 }
989
990 #include "manytomany.c"
991
992 /*********************************************************************************************
993 This section is for CmiDirect. This is a variant of the  persistent communication in which
994 the user can transfer data between processors without using Charm++ messages. This lets the user
995 send and receive data from the middle of his arrays without any copying on either send or receive
996 side
997 *********************************************************************************************/
998
999
1000 #ifdef BGP_USE_AM_DIRECT
1001
1002 #include "cmidirect.h"
1003
1004 /* We can avoid a receiver side lookup by just sending the whole shebang.
1005    DCMF header is in units of quad words (16 bytes), so we'd need less than a
1006    quad word for the handle if we just sent that and did a lookup. Or exactly
1007    2 quad words for the buffer pointer, callback pointer, callback
1008    data pointer, and DCMF_Request_t pointer with no lookup.
1009
1010    Since CmiDirect is generally going to be used for messages which aren't
1011    tiny, the extra 16 bytes is not likely to impact performance noticably and
1012    not having to lookup handles in tables simplifies the code enormously.
1013
1014    EJB   2008/4/2
1015 */
1016
1017
1018 /**
1019  To be called on the receiver to create a handle and return its number
1020 **/
1021 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1022     /* with two-sided primitives we just bundle the buffer and callback info into the handle so the sender can remind us about it later. */
1023     struct infiDirectUserHandle userHandle;
1024     userHandle.handle=1; /* doesn't matter on BG/P*/
1025     userHandle.senderNode=senderNode;
1026     userHandle.recverNode=_Cmi_mynode;
1027     userHandle.recverBufSize=recvBufSize;
1028     userHandle.recverBuf=recvBuf;
1029     userHandle.initialValue=initialValue;
1030     userHandle.callbackFnPtr=callbackFnPtr;
1031     userHandle.callbackData=callbackData;
1032     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1033 #if CMI_DIRECT_DEBUG
1034     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1035 #endif
1036     return userHandle;
1037 }
1038
1039 /****
1040  To be called on the sender to attach the sender's buffer to this handle
1041 ******/
1042
1043 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1044
1045     /* one-sided primitives would require registration of memory */
1046
1047     /* with two-sided primitives we just record the sender buf in the handle */
1048     userHandle->senderBuf=sendBuf;
1049     CmiAssert(sendBufSize==userHandle->recverBufSize);
1050     userHandle->DCMF_rq_tsend = (DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1051 #if CMI_DIRECT_DEBUG
1052     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1053 #endif
1054
1055 }
1056
1057 /****
1058 To be called on the sender to do the actual data transfer
1059 ******/
1060 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1061     /** invoke a DCMF_Send with the direct callback */
1062     DCMF_Protocol_t *protocol = NULL;
1063     protocol = &cmi_dcmf_direct_registration;
1064     /* local copy */
1065     CmiAssert(userHandle->recverBuf!=NULL);
1066     CmiAssert(userHandle->senderBuf!=NULL);
1067     CmiAssert(userHandle->recverBufSize>0);
1068     if (userHandle->recverNode== _Cmi_mynode) {
1069 #if CMI_DIRECT_DEBUG
1070         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1071 #endif
1072
1073         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1074         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1075     } else {
1076         dcmfDirectMsgHeader msgHead;
1077         msgHead.recverBuf=userHandle->recverBuf;
1078         msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1079         msgHead.callbackData=userHandle->callbackData;
1080         msgHead.DCMF_rq_t=(DCMF_Request_t *) userHandle->DCMF_rq_trecv;
1081 #if CMK_SMP
1082         DCMF_CriticalSection_enter (0);
1083 #endif
1084 #if CMI_DIRECT_DEBUG
1085         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1086 #endif
1087         DCMF_Send (protocol,
1088                    (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1089                    directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1090                    userHandle->recverBufSize, userHandle->senderBuf,
1091                    (struct DCQuad *) &(msgHead), 2);
1092
1093 #if CMK_SMP
1094         DCMF_CriticalSection_exit (0);
1095 #endif
1096     }
1097 }
1098
1099 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1100     CmiAbort("Not Implemented, switch to #define BGP_USE_RDMA_DIRECT");
1101 }
1102
1103 /**** up to the user to safely call this */
1104 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1105     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1106 #if CMK_SMP
1107     DCMF_CriticalSection_enter (0);
1108 #endif
1109     CmiFree(userHandle->DCMF_rq_tsend);
1110 #if CMK_SMP
1111     DCMF_CriticalSection_exit (0);
1112 #endif
1113
1114 }
1115
1116 /**** up to the user to safely call this */
1117 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1118     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1119 #if CMK_SMP
1120     DCMF_CriticalSection_enter (0);
1121 #endif
1122     CmiFree(userHandle->DCMF_rq_trecv);
1123
1124 #if CMK_SMP
1125     DCMF_CriticalSection_exit (0);
1126 #endif
1127 }
1128
1129
1130 /**** Should not be called the first time *********/
1131 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1132     /* no op on BGP */
1133 }
1134
1135 /**** Should not be called the first time *********/
1136 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1137     /* no op on BGP */
1138 }
1139
1140 /**** Should not be called the first time *********/
1141 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1142     /* no op on BGP */
1143 }
1144
1145 #endif /* BGP_USE_AM_DIRECT*/
1146
1147 #ifdef BGP_USE_RDMA_DIRECT
1148
1149 #include "cmidirect.h"
1150
1151 /*
1152    Notification protocol passes callback function and data in a single
1153    quadword.  This occurs in a message triggered by the sender side ack
1154    callback and therefore has higher latency than polling, but is guaranteed
1155    to be semantically correct.  The latency for a single packet that isn't
1156    hitting charm/converse should be pretty minimal, but you could run into
1157    sender side progress issues.  The alternative of polling on the out of band
1158    byte scheme creates correctness issues in that the data really has to be
1159    out of band and you rely on the buffer being written in order.  It also has
1160    annoying polling issues.  A third scheme could add a second put to a
1161    control region to poll upon and force sequential consistency between
1162    puts. Its not really clear that this would be faster or avoid the progress
1163    issue since you run into the same issues to enforce that sequential
1164    consistency.
1165
1166    EJB   2011/1/20
1167 */
1168
1169
1170 /* local function to use the ack as our signal to send a remote notify */
1171 static void CmiNotifyRemoteRDMA(void *handle, struct DCMF_Error_t *error) {
1172     struct infiDirectUserHandle *userHandle= (struct infiDirectUserHandle *) handle;
1173     dcmfDirectRDMAMsgHeader msgHead;
1174     msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1175     msgHead.callbackData=userHandle->callbackData;
1176 #if CMK_SMP
1177     DCMF_CriticalSection_enter (0);
1178 #endif
1179 #if CMI_DIRECT_DEBUG
1180     CmiPrintf("[%d] RDMA notify put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p \n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1181 #endif
1182     DCMF_Result res=DCMF_Send (&cmi_dcmf_direct_rdma_registration,
1183                                userHandle->DCMF_rq_tsend,
1184                                directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1185                                sizeof(dcmfDirectRDMAMsgHeader),
1186
1187                                userHandle->DCMF_notify_buf,
1188                                (struct DCQuad *) &(msgHead), 1);
1189 //    CmiAssert(res==DCMF_SUCCESS);
1190 #if CMK_SMP
1191     DCMF_CriticalSection_exit (0);
1192 #endif
1193 }
1194
1195 /**
1196  To be called on the receiver to create a handle and return its number
1197 **/
1198
1199
1200 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1201     /* one-sided primitives require registration of memory */
1202     struct infiDirectUserHandle userHandle;
1203     size_t numbytesRegistered=0;
1204     DCMF_Result regresult=DCMF_Memregion_create( &userHandle.DCMF_recverMemregion,
1205                           &numbytesRegistered,
1206                           recvBufSize,
1207                           recvBuf,
1208                           0);
1209     CmiAssert(numbytesRegistered==recvBufSize);
1210     CmiAssert(regresult==DCMF_SUCCESS);
1211
1212
1213     userHandle.handle=1; /* doesn't matter on BG/P*/
1214     userHandle.senderNode=senderNode;
1215     userHandle.recverNode=_Cmi_mynode;
1216     userHandle.recverBufSize=recvBufSize;
1217     userHandle.recverBuf=recvBuf;
1218     userHandle.initialValue=initialValue;
1219     userHandle.callbackFnPtr=callbackFnPtr;
1220     userHandle.callbackData=callbackData;
1221     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1222 #if CMI_DIRECT_DEBUG
1223     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1224 #endif
1225     return userHandle;
1226 }
1227
1228 /****
1229  To be called on the sender to attach the sender's buffer to this handle
1230 ******/
1231
1232 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1233     /* one-sided primitives would require registration of memory */
1234     userHandle->senderBuf=sendBuf;
1235     CmiAssert(sendBufSize==userHandle->recverBufSize);
1236     userHandle->DCMF_rq_tsend =(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1237     size_t numbytesRegistered=0;  // set as return value from create
1238     userHandle->DCMF_notify_buf=ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+32));
1239     userHandle->DCMF_notify_cb.function=CmiNotifyRemoteRDMA;
1240     userHandle->DCMF_notify_cb.clientdata=userHandle;
1241     DCMF_Result regresult=DCMF_Memregion_create( &userHandle->DCMF_senderMemregion,
1242                           &numbytesRegistered,
1243                           sendBufSize,
1244                           sendBuf,
1245                           0);
1246     CmiAssert(numbytesRegistered==sendBufSize);
1247     CmiAssert(regresult==DCMF_SUCCESS);
1248
1249 #if CMI_DIRECT_DEBUG
1250     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1251 #endif
1252
1253 }
1254
1255
1256 /****
1257 To be called on the sender to do the actual data transfer
1258 ******/
1259 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1260     /** invoke a DCMF_Put with the direct callback */
1261
1262     CmiAssert(userHandle->recverBuf!=NULL);
1263     CmiAssert(userHandle->senderBuf!=NULL);
1264     CmiAssert(userHandle->recverBufSize>0);
1265     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1266 #if CMI_DIRECT_DEBUG
1267         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1268 #endif
1269
1270         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1271         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1272     } else {
1273 #if CMK_SMP
1274         DCMF_CriticalSection_enter (0);
1275 #endif
1276 #if CMI_DIRECT_DEBUG
1277         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1278 #endif
1279         DCMF_Result
1280         Res= DCMF_Put(&cmi_dcmf_direct_put_registration,
1281                       userHandle->DCMF_rq_tsend,
1282                       directcb, DCMF_RELAXED_CONSISTENCY,
1283                       userHandle->recverNode,
1284                       userHandle->recverBufSize,
1285                       &userHandle->DCMF_senderMemregion,
1286                       &userHandle->DCMF_recverMemregion,
1287                       0, /* offsets are zero */
1288                       0,
1289                       userHandle->DCMF_notify_cb
1290                      );
1291         CmiAssert(Res==DCMF_SUCCESS);
1292 #if CMK_SMP
1293         DCMF_CriticalSection_exit (0);
1294 #endif
1295     }
1296 }
1297
1298 /****
1299 To be called on the receiver to initiate the actual data transfer
1300 ******/
1301 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1302     /** invoke a DCMF_Get with the direct callback */
1303
1304     CmiAssert(userHandle->recverBuf!=NULL);
1305     CmiAssert(userHandle->senderBuf!=NULL);
1306     CmiAssert(userHandle->recverBufSize>0);
1307     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1308 #if CMI_DIRECT_DEBUG
1309         CmiPrintf("[%d] RDMA local get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1310 #endif
1311
1312         CmiMemcpy(userHandle->senderBuf,userHandle->recverBuf,userHandle->recverBufSize);
1313         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1314     } else {
1315         struct DCMF_Callback_t done_cb;
1316         done_cb.function=userHandle->callbackFnPtr;
1317         done_cb.clientdata=userHandle->callbackData;
1318 #if CMK_SMP
1319         DCMF_CriticalSection_enter (0);
1320 #endif
1321 #if CMI_DIRECT_DEBUG
1322         CmiPrintf("[%d] RDMA get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1323 #endif
1324         DCMF_Result
1325         Res= DCMF_Get(&cmi_dcmf_direct_get_registration,
1326                       (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1327                       done_cb, DCMF_RELAXED_CONSISTENCY,
1328                       userHandle->recverNode,
1329                       userHandle->recverBufSize,
1330                       & userHandle->DCMF_recverMemregion,
1331                       & userHandle->DCMF_senderMemregion,
1332                       0, /* offsets are zero */
1333                       0
1334                      );
1335         CmiAssert(Res==DCMF_SUCCESS);
1336
1337
1338 #if CMK_SMP
1339         DCMF_CriticalSection_exit (0);
1340 #endif
1341     }
1342 }
1343
1344 /**** up to the user to safely call this */
1345 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1346     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1347 #if CMK_SMP
1348     DCMF_CriticalSection_enter (0);
1349 #endif
1350
1351     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_senderMemregion);
1352     CmiFree(userHandle->DCMF_notify_buf);
1353     CmiFree(userHandle->DCMF_rq_tsend);
1354 #if CMK_SMP
1355     DCMF_CriticalSection_exit (0);
1356 #endif
1357
1358 }
1359
1360 /**** up to the user to safely call this */
1361 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1362     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1363 #if CMK_SMP
1364     DCMF_CriticalSection_enter (0);
1365 #endif
1366
1367     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_recverMemregion);
1368     CmiFree(userHandle->DCMF_rq_trecv);
1369
1370 #if CMK_SMP
1371     DCMF_CriticalSection_exit (0);
1372 #endif
1373 }
1374
1375
1376
1377 /**** Should not be called the first time *********/
1378 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1379     /* no op on BGP */
1380 }
1381
1382 /**** Should not be called the first time *********/
1383 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1384     /* no op on BGP */
1385 }
1386
1387 /**** Should not be called the first time *********/
1388 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1389     /* no op on BGP */
1390 }
1391
1392 #endif /* BGP_USE_RDMA_DIRECT*/
1393
1394 /*@}*/
1395