Fixed a bug in passing argv for machine-specific initialization procedures.
[charm.git] / src / arch / bluegenep / machine.c
1 #include <stdio.h>
2 #include <errno.h>
3 #include <stdlib.h>
4 #include <unistd.h>
5 #include <math.h>
6 #include <string.h>
7 #include <malloc.h>
8 #include <assert.h>
9
10 #include "converse.h"
11 #include "machine.h"
12 #include "pcqueue.h"
13
14 #include <bpcore/ppc450_inlines.h>
15 #include "dcmf.h"
16 #include "dcmf_multisend.h"
17
18 /* =======Beginning of Definitions of Performance-Specific Macros =======*/
19 /* =======End of Definitions of Performance-Specific Macros =======*/
20
21 /* =======Beginning of Definitions of Msg Header Specific Macros =======*/
22 /* =======End of Definitions of Msg Header Specific Macros =======*/
23
24 /* =====Beginning of Definitions of Message-Corruption Related Macros=====*/
25 #define CMI_MAGIC(msg)                   ((CmiMsgHeaderBasic *)msg)->magic
26 #define CHARM_MAGIC_NUMBER               126
27
28 #if CMK_ERROR_CHECKING
29 static int checksum_flag = 0;
30 extern unsigned char computeCheckSum(unsigned char *data, int len);
31
32 #define CMI_SET_CHECKSUM(msg, len)      \
33         if (checksum_flag)  {   \
34           ((CmiMsgHeaderBasic *)msg)->cksum = 0;        \
35           ((CmiMsgHeaderBasic *)msg)->cksum = computeCheckSum((unsigned char*)msg, len);        \
36         }
37
38 #define CMI_CHECK_CHECKSUM(msg, len)    \
39         if (checksum_flag)      \
40           if (computeCheckSum((unsigned char*)msg, len) != 0)  { \
41             printf("\n\n------------------------------\n\nReceiver %d size %d:", CmiMyPe(), len); \
42             { \
43             int count; \
44             for(count = 0; count < len; count++) { \
45                 printf("%2x", msg[count]);                 \
46             } \
47             }                                             \
48             printf("------------------------------\n\n"); \
49             CmiAbort("Fatal error: checksum doesn't agree!\n"); \
50           }
51 #else
52 #define CMI_SET_CHECKSUM(msg, len)
53 #define CMI_CHECK_CHECKSUM(msg, len)
54 #endif
55 /* =====End of Definitions of Message-Corruption Related Macros=====*/
56
57
58 /* =====Beginning of Declarations of Machine Specific Variables===== */
59 typedef struct ProcState {
60     /* PCQueue      sendMsgBuf; */      /* per processor message sending queue */
61     CmiNodeLock  recvLock;              /* for cs->recv */
62     CmiNodeLock bcastLock;
63 } ProcState;
64
65 static ProcState  *procState;
66
67 volatile int msgQueueLen;
68 volatile int outstanding_recvs;
69
70 DCMF_Protocol_t  cmi_dcmf_short_registration __attribute__((__aligned__(16)));
71 DCMF_Protocol_t  cmi_dcmf_eager_registration __attribute__((__aligned__(16)));
72 DCMF_Protocol_t  cmi_dcmf_rzv_registration   __attribute__((__aligned__(16)));
73 DCMF_Protocol_t  cmi_dcmf_multicast_registration   __attribute__((__aligned__(16)));
74
75
76 typedef struct msg_list {
77     char              * msg;
78 //    int                 size;
79 //    int                 destpe;
80     int               * pelist;
81 //    DCMF_Callback_t     cb;
82 //    DCQuad              info __attribute__((__aligned__(16)));
83     DCMF_Request_t      send __attribute__((__aligned__(16)));
84 } SMSG_LIST __attribute__((__aligned__(16)));
85
86 #define MAX_NUM_SMSGS   64
87 CpvDeclare(PCQueue, smsg_list_q);
88 static SMSG_LIST * smsg_allocate();
89 static void smsg_free (SMSG_LIST *smsg);
90
91 /* =====End of Declarations of Machine Specific Variables===== */
92
93
94 /* =====Beginning of Declarations of Machine Specific Functions===== */
95 /* Utility functions */
96 char *ALIGN_16(char *p) {
97     return((char *)((((unsigned long)p)+0xf)&0xfffffff0));
98 }
99
100 void mysleep (int cycles) { /* approximate sleep command */
101     unsigned long long start = DCMF_Timebase();
102     unsigned long long end = start + cycles;
103     while (start < end)
104         start = DCMF_Timebase();
105     return;
106 }
107 static void SendMsgsUntil(int);
108
109 /* ######Begining of Machine-specific RDMA related functions###### */
110 #define BGP_USE_AM_DIRECT 1
111 /* #define BGP_USE_RDMA_DIRECT 1 */
112 /* #define CMI_DIRECT_DEBUG 1 */
113 #if BGP_USE_AM_DIRECT
114
115 DCMF_Protocol_t  cmi_dcmf_direct_registration __attribute__((__aligned__(16)));
116 /** The receive side of a put implemented in DCMF_Send */
117
118 typedef struct {
119     void *recverBuf;
120     void (*callbackFnPtr)(void *);
121     void *callbackData;
122     DCMF_Request_t *DCMF_rq_t;
123 } dcmfDirectMsgHeader;
124
125 /* nothing for us to do here */
126 #if (DCMF_VERSION_MAJOR >= 2)
127 void direct_send_done_cb(void*nothing, DCMF_Error_t *err)
128 #else
129 void direct_send_done_cb(void*nothing)
130 #endif
131 {
132 #if CMI_DIRECT_DEBUG
133     CmiPrintf("[%d] RDMA send_done_cb\n", CmiMyPe());
134 #endif
135 }
136
137 DCMF_Callback_t  directcb;
138
139 void     direct_short_pkt_recv (void             * clientdata,
140                                 const DCQuad     * info,
141                                 unsigned           count,
142                                 unsigned           senderrank,
143                                 const char       * buffer,
144                                 const unsigned     sndlen) {
145 #if CMI_DIRECT_DEBUG
146     CmiPrintf("[%d] RDMA direct_short_pkt_recv\n", CmiMyPe());
147 #endif
148     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
149     CmiMemcpy(msgHead->recverBuf, buffer, sndlen);
150     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
151 }
152
153
154 #if (DCMF_VERSION_MAJOR >= 2)
155 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
156 #else
157 typedef void (*cbhdlr) (void *);
158 #endif
159
160 DCMF_Request_t * direct_first_pkt_recv_done (void              * clientdata,
161         const DCQuad      * info,
162         unsigned            count,
163         unsigned            senderrank,
164         const unsigned      sndlen,
165         unsigned          * rcvlen,
166         char             ** buffer,
167         DCMF_Callback_t   * cb
168                                             ) {
169 #if CMI_DIRECT_DEBUG
170     CmiPrintf("[%d] RDMA direct_first_pkt_recv_done\n", CmiMyPe());
171 #endif
172     /* pull the data we need out of the header */
173     *rcvlen=sndlen;
174     dcmfDirectMsgHeader *msgHead=  (dcmfDirectMsgHeader *) info;
175     cb->function= (cbhdlr)msgHead->callbackFnPtr;
176     cb->clientdata=msgHead->callbackData;
177     *buffer=msgHead->recverBuf;
178     return msgHead->DCMF_rq_t;
179 }
180 #endif /* end of #if BGP_USE_AM_DIRECT */
181
182 #ifdef BGP_USE_RDMA_DIRECT
183 static struct DCMF_Callback_t dcmf_rdma_cb_ack;
184
185 DCMF_Protocol_t  cmi_dcmf_direct_put_registration __attribute__((__aligned__(16)));
186 DCMF_Protocol_t  cmi_dcmf_direct_get_registration __attribute__((__aligned__(16)));
187 DCMF_Protocol_t  cmi_dcmf_direct_rdma_registration __attribute__((__aligned__(16)));
188 /** The receive side of a DCMF_Put notification implemented in DCMF_Send */
189
190 typedef struct {
191     void (*callbackFnPtr)(void *);
192     void *callbackData;
193 } dcmfDirectRDMAMsgHeader;
194
195 #if (DCMF_VERSION_MAJOR >= 2)
196 void direct_send_rdma_done_cb(void*nothing, DCMF_Error_t *err)
197 #else
198 void direct_send_rdma_done_cb(void*nothing)
199 #endif
200 {
201 #if CMI_DIRECT_DEBUG
202     CmiPrintf("[%d] RDMA send_rdma_done_cb result %d\n", CmiMyPe());
203 #endif
204
205
206 }
207
208 DCMF_Callback_t  directcb;
209
210 void     direct_short_rdma_pkt_recv (void             * clientdata,
211                                      const DCQuad     * info,
212                                      unsigned           count,
213                                      unsigned           senderrank,
214                                      const char       * buffer,
215                                      const unsigned     sndlen) {
216 #if CMI_DIRECT_DEBUG
217     CmiPrintf("[%d] RDMA direct_short_rdma_pkt_recv\n", CmiMyPe());
218 #endif
219     dcmfDirectRDMAMsgHeader *msgHead=  (dcmfDirectRDMAMsgHeader *) info;
220     (*(msgHead->callbackFnPtr))(msgHead->callbackData);
221 }
222
223 #if (DCMF_VERSION_MAJOR >= 2)
224 typedef void (*cbhdlr) (void *, DCMF_Error_t *);
225 #else
226 typedef void (*cbhdlr) (void *);
227 #endif
228
229 DCMF_Request_t * direct_first_rdma_pkt_recv_done (void              * clientdata,
230         const DCQuad      * info,
231         unsigned            count,
232         unsigned            senderrank,
233         const unsigned      sndlen,
234         unsigned          * rcvlen,
235         char             ** buffer,
236         DCMF_Callback_t   * cb
237                                                  ) {
238     CmiAbort("direct_first_rdma_pkt_recv should not be called");
239 }
240 #endif /* end of #if BGP_USE_RDMA_DIRECT */
241 /* ######End of Machine-specific RDMA related functions###### */
242
243
244 /* ### Beginning of Communication-Op Related Functions ### */
245 /* The machine-specific send-related function */
246 #if (DCMF_VERSION_MAJOR >= 2)
247 static void send_done(void *data, DCMF_Error_t *err);
248 static void send_multi_done(void *data, DCMF_Error_t *err);
249 #else
250 static void send_done(void *data);
251 static void send_multi_done(void *data);
252 #endif
253 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode);
254 #define CmiMachineSpecificSendFunc MachineSpecificSendForDCMF
255
256 /* The machine-specific recv-related function (on the receiver side) */
257 #if (DCMF_VERSION_MAJOR >= 2)
258 static void recv_done(void *clientdata, DCMF_Error_t * err);
259 #else
260 static void recv_done(void *clientdata);
261 #endif
262 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
263         unsigned            count,
264         unsigned            senderrank,
265         const unsigned      sndlen,
266         unsigned            connid,
267         void              * clientdata,
268         unsigned          * rcvlen,
269         char             ** buffer,
270         unsigned          * pw,
271         DCMF_Callback_t   * cb
272                                            );
273 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
274                                       const DCQuad      * info,
275                                       unsigned            count,
276                                       unsigned            senderrank,
277                                       const unsigned      sndlen,
278                                       unsigned          * rcvlen,
279                                       char             ** buffer,
280                                       DCMF_Callback_t   * cb
281                                      );
282
283 /* ### End of Communication-Op Related Functions ### */
284
285 /* ### Beginning of Machine-startup Related Functions ### */
286 static void MachineInitForDCMF(int *argc, char ***argv, int *numNodes, int *myNodeID);
287 #define MachineSpecificInit MachineInitForDCMF
288
289 static void MachinePreCommonInitForDCMF(int everReturn);
290 static void MachinePostCommonInitForDCMF(int everReturn);
291 #define MachineSpecificPreCommonInit MachinePreCommonInitForDCMF
292 #define MachineSpecificPostCommonInit MachinePostCommonInitForDCMF
293 /* ### End of Machine-startup Related Functions ### */
294
295 /* ### Beginning of Machine-running Related Functions ### */
296 static void AdvanceCommunicationForDCMF();
297 #define MachineSpecificAdvanceCommunication AdvanceCommunicationForDCMF
298
299 static void DrainResourcesForDCMF();
300 #define MachineSpecificDrainResources DrainResourcesForDCMF
301
302 static void MachineExitForDCMF();
303 #define MachineSpecificExit MachineExitForDCMF
304
305 /* ### End of Machine-running Related Functions ### */
306
307 /* ### Beginning of Idle-state Related Functions ### */
308
309 /* ### End of Idle-state Related Functions ### */
310
311 void MachinePostNonLocalForDCMF();
312 #define MachineSpecificPostNonLocal MachinePostNonLocalForDCMF
313
314 /* =====End of Declarations of Machine Specific Functions===== */
315
316 /**
317  *  Macros that overwrites the common codes, such as
318  *  CMK_SMP_NO_COMMTHD, NETWORK_PROGRESS_PERIOD_DEFAULT,
319  *  USE_COMMON_SYNC_P2P, CMK_HAS_SIZE_IN_MSGHDR,
320  *  CMK_OFFLOAD_BCAST_PROCESS etc.
321  */
322 #define CMK_OFFLOAD_BCAST_PROCESS 1
323 #include "machine-common.c"
324
325 /*######Beginning of functions related with Communication-Op functions ######*/
326
327 /* Utility functions */
328 static inline SMSG_LIST * smsg_allocate() {
329     SMSG_LIST *smsg = (SMSG_LIST *)PCQueuePop(CpvAccess(smsg_list_q));
330     if (smsg != NULL)
331         return smsg;
332
333     void * buf = malloc(sizeof(SMSG_LIST));
334     assert(buf!=NULL);
335     assert (((unsigned)buf & 0x0f) == 0);
336
337     return (SMSG_LIST *) buf;
338 }
339
340 static inline void smsg_free (SMSG_LIST *smsg) {
341     int size = PCQueueLength (CpvAccess(smsg_list_q));
342     if (size < MAX_NUM_SMSGS)
343         PCQueuePush (CpvAccess(smsg_list_q), (char *) smsg);
344     else
345         free (smsg);
346 }
347
348 static void SendMsgsUntil(int targetm) {
349     while (msgQueueLen>targetm) {
350 #if CMK_SMP
351         DCMF_CriticalSection_enter (0);
352 #endif
353
354         while (DCMF_Messager_advance()>0);
355
356 #if CMK_SMP
357         DCMF_CriticalSection_exit (0);
358 #endif
359     }
360 }
361
362 /* Send functions */
363 /* The callback on sender side */
364 #if (DCMF_VERSION_MAJOR >= 2)
365 static void send_done(void *data, DCMF_Error_t *err)
366 #else
367 static void send_done(void *data)
368 #endif
369 /* send done callback: sets the smsg entry to done */
370 {
371     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
372     CmiFree(msg_tmp->msg);
373     smsg_free (msg_tmp);
374     msgQueueLen--;
375 }
376
377 #if (DCMF_VERSION_MAJOR >= 2)
378 static void send_multi_done(void *data, DCMF_Error_t *err)
379 #else
380 static void send_multi_done(void *data)
381 #endif
382 /* send done callback: sets the smsg entry to done */
383 {
384     SMSG_LIST *msg_tmp = (SMSG_LIST *)(data);
385     CmiFree(msg_tmp->msg);
386     free(msg_tmp->pelist);
387     smsg_free(msg_tmp);
388     msgQueueLen--;
389 }
390
391 /* The machine specific send function */
392 static CmiCommHandle MachineSpecificSendForDCMF(int destNode, int size, char *msg, int mode) {
393     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
394     //msg_tmp->destpe = destNode;
395     //msg_tmp->size = size;
396     msg_tmp->msg = msg;
397
398     DCMF_Callback_t cb;
399     DCQuad info;
400
401     cb.function = send_done;
402     cb.clientdata = msg_tmp;
403
404
405 #if CMK_ERROR_CHECKING
406     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
407     CMI_SET_CHECKSUM(msg, size);
408 #endif
409     CMI_MSG_SIZE(msg) = size;
410
411     //msg_tmp->cb.function = send_done;
412     //msg_tmp->cb.clientdata   =   msg_tmp;
413
414     DCMF_Protocol_t *protocol = NULL;
415
416     if (size < 224)
417         protocol = &cmi_dcmf_short_registration;
418     else if (size < 2048)
419         protocol = &cmi_dcmf_eager_registration;
420     else
421         protocol = &cmi_dcmf_rzv_registration;
422
423 #if CMK_SMP
424     DCMF_CriticalSection_enter (0);
425 #endif
426
427     msgQueueLen ++;
428     /*
429      * Original one:
430      *     DCMF_Send (protocol, &msg_tmp->send, msg_tmp->cb,
431                    DCMF_MATCH_CONSISTENCY, msg_tmp->destpe,
432                    msg_tmp->size, msg_tmp->msg, &msg_tmp->info, 1);
433            Ref:http://dcmf.anl-external.org/docs/mpi:dcmfd/group__SEND.html
434      */
435     DCMF_Send (protocol, &msg_tmp->send, cb, DCMF_MATCH_CONSISTENCY,
436                destNode, size, msg, &info, 0);
437
438 #if CMK_SMP
439     DCMF_CriticalSection_exit (0);
440 #endif
441
442     return 0;
443 }
444
445 #define MAX_MULTICAST 128
446 DCMF_Opcode_t  CmiOpcodeList [MAX_MULTICAST];
447
448 void  machineMulticast(int npes, int *pelist, int size, char* msg) {
449     CQdCreate(CpvAccess(cQdState), npes);
450
451     CmiAssert (npes < MAX_MULTICAST);
452
453 #if CMK_ERROR_CHECKING
454     CMI_MAGIC(msg) = CHARM_MAGIC_NUMBER;
455     CMI_SET_CHECKSUM(msg, size);
456 #endif
457
458     CMI_MSG_SIZE(msg) = size;
459
460     SMSG_LIST *msg_tmp = smsg_allocate(); //(SMSG_LIST *) malloc(sizeof(SMSG_LIST));
461
462     //msg_tmp->destpe    = -1;      //multicast operation
463     //msg_tmp->size      = size * npes; //keep track of #bytes outstanding
464     msg_tmp->msg       = msg;
465     msg_tmp->pelist    = pelist;
466
467     DCMF_Multicast_t  mcast_info __attribute__((__aligned__(16)));
468     DCQuad info;
469
470     mcast_info.registration   = & cmi_dcmf_multicast_registration;
471     mcast_info.request        = & msg_tmp->send;
472     mcast_info.cb_done.function    =   send_multi_done;
473     mcast_info.cb_done.clientdata  =   msg_tmp;
474     mcast_info.consistency    =   DCMF_MATCH_CONSISTENCY;
475     mcast_info.connection_id  =   CmiMyPe();
476     mcast_info.bytes          =   size;
477     mcast_info.src            =   msg;
478     mcast_info.nranks         =   npes;
479     mcast_info.ranks          =   (unsigned *)pelist;
480     mcast_info.opcodes        =   CmiOpcodeList;   //static list of MAX_MULTICAST entires with 0 in them
481     mcast_info.flags          =   0;
482     mcast_info.msginfo        =   &info;
483     //mcast_info.count          =   1;
484     mcast_info.count          =   0;
485
486 #if CMK_SMP
487     DCMF_CriticalSection_enter (0);
488 #endif
489     msgQueueLen++;
490     DCMF_Multicast (&mcast_info);
491
492 #if CMK_SMP
493     DCMF_CriticalSection_exit (0);
494 #endif
495 }
496
497 /* Recv functions */
498 /* The callback on the recv side */
499 #if (DCMF_VERSION_MAJOR >= 2)
500 static void recv_done(void *clientdata, DCMF_Error_t * err)
501 #else
502 static void recv_done(void *clientdata)
503 #endif
504 /* recv done callback: push the recved msg to recv queue */
505 {
506
507     char *msg = (char *) clientdata;
508
509     /*printf ("NODE[%d] Recv message done with msg rank %d\n", CmiMyNode(), CMI_DEST_RANK(msg));*/
510     MACHSTATE3(2,"[%d] recv_done begin with msg %p size=%d { ", CmiMyNode(), msg, CMI_MSG_SIZE(msg));
511 #if CMK_ERROR_CHECKING
512     int sndlen = CMI_MSG_SIZE(msg);
513     CMI_CHECK_CHECKSUM(msg, sndlen);
514     if (CMI_MAGIC(msg) != CHARM_MAGIC_NUMBER) { /* received a non-charm msg */
515         CmiAbort("Charm++ Warning: Non Charm++ Message Received. \n");
516         return;
517     }
518 #endif
519
520     handleOneRecvedMsg(CMI_MSG_SIZE(msg), msg);
521
522     outstanding_recvs--;
523     MACHSTATE(2,"} recv_done end ");
524     return;
525 }
526
527 void short_pkt_recv (void             * clientdata,
528                      const DCQuad     * info,
529                      unsigned           count,
530                      unsigned           senderrank,
531                      const char       * buffer,
532                      const unsigned     sndlen) {
533     outstanding_recvs ++;
534     int alloc_size = sndlen;
535
536     char * new_buffer = (char *)CmiAlloc(alloc_size);
537     CmiMemcpy (new_buffer, buffer, sndlen);
538
539 #if (DCMF_VERSION_MAJOR >= 2)
540     recv_done (new_buffer, NULL);
541 #else
542     recv_done (new_buffer);
543 #endif
544 }
545
546 DCMF_Request_t * first_multi_pkt_recv_done (const DCQuad      * info,
547         unsigned            count,
548         unsigned            senderrank,
549         const unsigned      sndlen,
550         unsigned            connid,
551         void              * clientdata,
552         unsigned          * rcvlen,
553         char             ** buffer,
554         unsigned          * pw,
555         DCMF_Callback_t   * cb
556                                            ) {
557     outstanding_recvs ++;
558     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
559     /*printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
560     /* printf ("Receiving %d bytes\n", sndlen); */
561     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
562
563     *buffer = (char *)CmiAlloc(alloc_size);
564     cb->function = recv_done;
565     cb->clientdata = *buffer;
566
567     *pw  = 0x7fffffff;
568     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
569 }
570
571 DCMF_Request_t * first_pkt_recv_done (void              * clientdata,
572                                       const DCQuad      * info,
573                                       unsigned            count,
574                                       unsigned            senderrank,
575                                       const unsigned      sndlen,
576                                       unsigned          * rcvlen,
577                                       char             ** buffer,
578                                       DCMF_Callback_t   * cb
579                                      ) {
580     outstanding_recvs ++;
581     int alloc_size = sndlen + sizeof(DCMF_Request_t) + 16;
582     /* printf ("%d: Receiving message %d bytes from %d\n", CmiMyPe(), sndlen, senderrank);*/
583     /* printf ("Receiving %d bytes\n", sndlen); */
584     *rcvlen = sndlen;  /* to avoid malloc(0) which might return NULL */
585
586     *buffer = (char *)CmiAlloc(alloc_size);
587     cb->function = recv_done;
588     cb->clientdata = *buffer;
589
590     return (DCMF_Request_t *) ALIGN_16(*buffer + sndlen);
591 }
592
593 #if 0
594 /* -----------------------------------------
595  * Rectangular broadcast implementation
596  * -----------------------------------------
597  */
598 unsigned int *ranklist;
599 BGTsC_t        barrier;
600 #define MAX_COMM  256
601 static void * comm_table [MAX_COMM];
602
603 typedef struct rectbcast_msg {
604     BGTsRC_t           request;
605     DCMF_Callback_t    cb;
606     char              *msg;
607 } RectBcastInfo;
608
609
610 static void bcast_done (void *data) {
611     RectBcastInfo *rinfo = (RectBcastInfo *) data;
612     CmiFree (rinfo->msg);
613     free (rinfo);
614 }
615
616 static  void *   getRectBcastRequest (unsigned comm) {
617     return comm_table [comm];
618 }
619
620
621 static  void *  bcast_recv     (unsigned               root,
622                                 unsigned               comm,
623                                 const unsigned         sndlen,
624                                 unsigned             * rcvlen,
625                                 char                ** rcvbuf,
626                                 DCMF_Callback_t      * const cb) {
627
628     int alloc_size = sndlen + sizeof(BGTsRC_t) + 16;
629
630     *rcvlen = sndlen;  /* to avoid malloc(0) which might
631                                    return NULL */
632
633     *rcvbuf       =  (char *)CmiAlloc(alloc_size);
634     cb->function  =   recv_done;
635     cb->clientdata = *rcvbuf;
636
637     return (BGTsRC_t *) ALIGN_16 (*rcvbuf + sndlen);
638
639 }
640
641
642 extern void bgl_machine_RectBcast (unsigned                 commid,
643                                    const char             * sndbuf,
644                                    unsigned                 sndlen) {
645     RectBcastInfo *rinfo  =   (RectBcastInfo *) malloc (sizeof(RectBcastInfo));
646     rinfo->cb.function    =   bcast_done;
647     rinfo->cb.clientdata  =   rinfo;
648
649     BGTsRC_AsyncBcast_start (commid, &rinfo->request, &rinfo->cb, sndbuf, sndlen);
650
651 }
652
653 extern void        bgl_machine_RectBcastInit  (unsigned               commID,
654         const BGTsRC_Geometry_t* geometry) {
655
656     CmiAssert (commID < 256);
657     CmiAssert (comm_table [commID] == NULL);
658
659     BGTsRC_t *request =  (BGTsRC_t *) malloc (sizeof (BGTsRC_t));
660     comm_table [commID] = request;
661
662     BGTsRC_AsyncBcast_init  (request, commID,  geometry);
663 }
664
665 /*--------------------------------------------------------------
666  *----- End Rectangular Broadcast Implementation ---------------
667  *--------------------------------------------------------------*/
668 #endif
669
670
671 /*######End of functions related with Communication-Op functions ######*/
672
673
674 /* ######Beginning of functions related with communication progress ###### */
675 static INLINE_KEYWORD void AdvanceCommunicationForDCMF() {
676 #if CMK_SMP
677     DCMF_CriticalSection_enter (0);
678 #endif
679
680     while (DCMF_Messager_advance()>0);
681     //DCMF_Messager_advance();
682
683 #if CMK_SMP
684     DCMF_CriticalSection_exit (0);
685 #endif
686 }
687 /* ######End of functions related with communication progress ###### */
688
689 void MachinePostNonLocalForDCMF() {
690     /* None here */
691 }
692
693 /* Network progress function is used to poll the network when for
694    messages. This flushes receive buffers on some  implementations*/
695 #if CMK_MACHINE_PROGRESS_DEFINED
696 void CmiMachineProgressImpl() {
697     AdvanceCommunicationForDCMF();
698 #if CMK_IMMEDIATE_MSG
699     CmiHandleImmediate();
700 #endif
701 }
702 #endif
703
704 /* ######Beginning of functions related with exiting programs###### */
705 static void DrainResourcesForDCMF() {
706     while (msgQueueLen > 0 || outstanding_recvs > 0) {
707         AdvanceCommunicationForDCMF();
708     }
709 }
710
711 static void MachineExitForDCMF() {
712     DCMF_Messager_finalize();
713     exit(EXIT_SUCCESS);
714 }
715 /* ######End of functions related with exiting programs###### */
716
717
718 /* ######Beginning of functions related with starting programs###### */
719 /**
720  *  Obtain the number of nodes, my node id, and consuming machine layer
721  *  specific arguments
722  */
723 static void MachineInitForDCMF(int *argc, char ***argv, int *numNodes, int *myNodeID) {
724
725     DCMF_Messager_initialize();
726
727 #if CMK_SMP
728     DCMF_Configure_t  config_in, config_out;
729     config_in.thread_level= DCMF_THREAD_MULTIPLE;
730     config_in.interrupts  = DCMF_INTERRUPTS_OFF;
731
732     DCMF_Messager_configure(&config_in, &config_out);
733     //assert (config_out.thread_level == DCMF_THREAD_MULTIPLE); //not supported in vn mode
734 #endif
735
736     DCMF_Send_Configuration_t short_config, eager_config, rzv_config;
737
738
739     short_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
740     short_config.cb_recv_short = short_pkt_recv;
741     short_config.cb_recv       = first_pkt_recv_done;
742
743 #if (DCMF_VERSION_MAJOR >= 3)
744     short_config.network  = DCMF_DEFAULT_NETWORK;
745 #elif (DCMF_VERSION_MAJOR == 2)
746     short_config.network  = DCMF_DefaultNetwork;
747 #endif
748
749     eager_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
750     eager_config.cb_recv_short = short_pkt_recv;
751     eager_config.cb_recv       = first_pkt_recv_done;
752 #if (DCMF_VERSION_MAJOR >= 3)
753     eager_config.network  = DCMF_DEFAULT_NETWORK;
754 #elif (DCMF_VERSION_MAJOR == 2)
755     eager_config.network  = DCMF_DefaultNetwork;
756 #endif
757
758 #ifdef  OPT_RZV
759 #warning "Enabling Optimize Rzv"
760     rzv_config.protocol        = DCMF_RZV_SEND_PROTOCOL;
761 #else
762     rzv_config.protocol        = DCMF_DEFAULT_SEND_PROTOCOL;
763 #endif
764     rzv_config.cb_recv_short   = short_pkt_recv;
765     rzv_config.cb_recv         = first_pkt_recv_done;
766 #if (DCMF_VERSION_MAJOR >= 3)
767     rzv_config.network  = DCMF_DEFAULT_NETWORK;
768 #elif (DCMF_VERSION_MAJOR == 2)
769     rzv_config.network  = DCMF_DefaultNetwork;
770 #endif
771
772     DCMF_Send_register (&cmi_dcmf_short_registration, &short_config);
773     DCMF_Send_register (&cmi_dcmf_eager_registration, &eager_config);
774     DCMF_Send_register (&cmi_dcmf_rzv_registration,   &rzv_config);
775
776 #ifdef BGP_USE_AM_DIRECT
777     DCMF_Send_Configuration_t direct_config;
778     direct_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
779     direct_config.cb_recv_short = direct_short_pkt_recv;
780     direct_config.cb_recv       = direct_first_pkt_recv_done;
781 #if (DCMF_VERSION_MAJOR >= 3)
782     direct_config.network  = DCMF_DEFAULT_NETWORK;
783 #elif (DCMF_VERSION_MAJOR == 2)
784     direct_config.network  = DCMF_DefaultNetwork;
785 #endif
786     DCMF_Send_register (&cmi_dcmf_direct_registration,   &direct_config);
787     directcb.function=direct_send_done_cb;
788     directcb.clientdata=NULL;
789 #endif
790
791 #ifdef BGP_USE_RDMA_DIRECT
792     /* notification protocol */
793     DCMF_Send_Configuration_t direct_rdma_config;
794     direct_rdma_config.protocol      = DCMF_DEFAULT_SEND_PROTOCOL;
795     direct_rdma_config.cb_recv_short = direct_short_rdma_pkt_recv;
796     direct_rdma_config.cb_recv       = direct_first_rdma_pkt_recv_done;
797 #if (DCMF_VERSION_MAJOR >= 3)
798     direct_rdma_config.network  = DCMF_DEFAULT_NETWORK;
799 #elif (DCMF_VERSION_MAJOR == 2)
800     direct_rdma_config.network  = DCMF_DefaultNetwork;
801 #endif
802     DCMF_Send_register (&cmi_dcmf_direct_rdma_registration,   &direct_rdma_config);
803     directcb.function=direct_send_rdma_done_cb;
804     directcb.clientdata=NULL;
805     /* put protocol */
806     DCMF_Put_Configuration_t put_configuration = { DCMF_DEFAULT_PUT_PROTOCOL };
807     DCMF_Put_register (&cmi_dcmf_direct_put_registration, &put_configuration);
808     DCMF_Get_Configuration_t get_configuration = { DCMF_DEFAULT_GET_PROTOCOL };
809     DCMF_Get_register (&cmi_dcmf_direct_get_registration, &get_configuration);
810
811 #endif
812     //fprintf(stderr, "Initializing Eager Protocol\n");
813
814     *numNodes = DCMF_Messager_size();
815     *myNodeID = DCMF_Messager_rank();
816
817     CmiBarrier();
818     CmiBarrier();
819     CmiBarrier();
820
821     /* NOTE: the following codes requires #PEs, which is not available
822      * until this function finishes. And it allocate O(p) space */
823     int totalPEs = _Cmi_mynodesize * (*numNodes);
824     DCMF_Multicast_Configuration_t mconfig;
825     mconfig.protocol = DCMF_MEMFIFO_DMA_MSEND_PROTOCOL;
826     mconfig.cb_recv  = first_multi_pkt_recv_done;
827     mconfig.clientdata = NULL;
828     mconfig.connectionlist = (void **) malloc (totalPEs * sizeof(unsigned long));
829     mconfig.nconnections = totalPEs;
830     DCMF_Multicast_register(&cmi_dcmf_multicast_registration, &mconfig);
831
832     int actualNodeSize = _Cmi_mynodesize;
833 #if !CMK_SMP_NO_COMMTHD
834     actualNodeSize++; //considering the extra comm thread
835 #endif
836     int i;
837     procState = (ProcState *)CmiAlloc((actualNodeSize) * sizeof(ProcState));
838     for (i=0; i<actualNodeSize; i++) {
839         /*    procState[i].sendMsgBuf = PCQueueCreate();   */
840         procState[i].recvLock = CmiCreateLock();
841         procState[i].bcastLock = CmiCreateLock();
842     }
843
844     /* checksum flag */
845     if (CmiGetArgFlag(*argv,"+checksum")) {
846 #if CMK_ERROR_CHECKING
847         checksum_flag = 1;
848         if (*myNodeID == 0) CmiPrintf("Charm++: CheckSum checking enabled! \n");
849 #else
850         if (*myNodeID == 0) CmiPrintf("Charm++: +checksum ignored in optimized version! \n");
851 #endif
852     }
853
854 }
855
856 static void MachinePreCommonInitForDCMF(int everReturn) {
857     CpvInitialize(PCQueue, smsg_list_q);
858     CpvAccess(smsg_list_q) = PCQueueCreate();
859 }
860
861 static void MachinePostCommonInitForDCMF(int everReturn) {
862 #if !CMK_SMP || CMK_SMP_NO_COMMTHD
863     CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)CmiNotifyIdle,NULL);
864 #endif
865
866     CmiBarrier();
867 }
868 /* ######End of functions related with starting programs###### */
869
870 /***********************************************************************
871  *
872  * Abort function:
873  *
874  ************************************************************************/
875
876 void CmiAbort(const char *message) {
877     CmiError("------------- Processor %d Exiting: Called CmiAbort ------------\n"
878              "{snd:%d,rcv:%d} Reason: %s\n",CmiMyPe(),
879              msgQueueLen, outstanding_recvs, message);
880
881 #if 0
882     /* Since it's a abort, why bother to drain the resources? The system
883      * should clean it self
884      */
885     /* FIXME: what happens in the SMP mode??? */
886     DrainResourcesForDCMF();
887 #endif
888     assert(0);
889 }
890
891
892 /*********** Beginning of MULTICAST/VECTOR SENDING FUNCTIONS **************/
893 /*
894
895  * In relations to some flags, some other delivery functions may be needed.
896  */
897
898 #if !CMK_MULTICAST_LIST_USE_COMMON_CODE
899
900 void CmiSyncListSendFn(int npes, int *pes, int size, char *msg) {
901     char *copymsg = CopyMsg(msg, size);
902     CmiFreeListSendFn(npes, pes, size, copymsg);
903 }
904
905 /* This optimized multicast only helps NAMD when #atoms/CPU is
906  * less than 10 according to Sameer Kumar. So it is off in
907  * default.
908  */
909 #define OPTIMIZED_MULTICAST  0
910
911 #if OPTIMIZED_MULTICAST
912 #warning "Using Optimized Multicast"
913 #endif
914
915 void CmiFreeListSendFn(int npes, int *pes, int size, char *msg) {
916     CmiAssert(npes>=1);
917     if (npes==1) {
918         CmiFreeSendFn(pes[0], size, msg);
919         return;
920     }
921
922     //if(CmiMyRank()==CmiMyNodeSize()) printf("CmiFreeListSendFn on comm thd on node %d\n", CmiMyNode());
923     //printf("%d: In Free List Send Fn\n", CmiMyPe());
924
925     int i;
926 #if OPTIMIZED_MULTICAST
927     int *newpelist = (int *)malloc(sizeof(int)*npes);
928     int new_npes = npes;
929     memcpy(newpelist, pes, sizeof(int)*npes);
930 #if CMK_SMP
931     new_npes = 0;
932     for (i=0; i<npes; i++) {
933         if (CmiNodeOf(pes[i]) == CmiMyNode()) {
934             CmiSyncSend(pes[i], size, msg);
935         } else {
936             newpelist[new_npes++] = pes[i];
937         }
938     }
939     if (new_npes == 0) {
940         CmiFree(msg);
941         return;
942     }
943 #endif
944
945     CMI_SET_BROADCAST_ROOT(msg,0);
946 #if !CMK_SMP
947     CMI_DEST_RANK(msg) = 0;
948 #else
949 #error optimized multicast should not be enabled in SMP mode
950 #endif
951
952     CQdCreate(CpvAccess(cQdState), new_npes);
953     machineMulticast (new_npes, newpelist, size, msg);
954 #else /* non-optimized multicast */
955
956     for (i=0; i<npes-1; i++) {
957 #if !CMK_SMP
958         CmiReference(msg);
959         CmiFreeSendFn(pes[i], size, msg);
960 #else
961     CmiSyncSend(pes[i], size, msg);
962 #endif
963     }
964     CmiFreeSendFn(pes[npes-1], size, msg);
965 #endif /* end of #if OPTIMIZED_MULTICAST */
966 }
967 #endif /* end of #if !CMK_MULTICAST_LIST_USE_COMMON_CODE */
968
969 /*********** End of MULTICAST/VECTOR SENDING FUNCTIONS **************/
970
971 /**************************  TIMER FUNCTIONS **************************/
972
973 /************Barrier Related Functions****************/
974 /* Barrier related functions */
975 /*TODO: does DCMF provide any Barrrier related functions ??? --Chao Mei */
976 /* Barrier needs to be implemented!!! -Chao Mei */
977 /* These two barriers are only needed by CmiTimerInit to synchronize all the
978    threads. They do not need to provide a general barrier. */
979 int CmiBarrier() {
980     return 0;
981 }
982 int CmiBarrierZero() {
983     return 0;
984 }
985
986 #include "manytomany.c"
987
988 /*********************************************************************************************
989 This section is for CmiDirect. This is a variant of the  persistent communication in which
990 the user can transfer data between processors without using Charm++ messages. This lets the user
991 send and receive data from the middle of his arrays without any copying on either send or receive
992 side
993 *********************************************************************************************/
994
995
996 #ifdef BGP_USE_AM_DIRECT
997
998 #include "cmidirect.h"
999
1000 /* We can avoid a receiver side lookup by just sending the whole shebang.
1001    DCMF header is in units of quad words (16 bytes), so we'd need less than a
1002    quad word for the handle if we just sent that and did a lookup. Or exactly
1003    2 quad words for the buffer pointer, callback pointer, callback
1004    data pointer, and DCMF_Request_t pointer with no lookup.
1005
1006    Since CmiDirect is generally going to be used for messages which aren't
1007    tiny, the extra 16 bytes is not likely to impact performance noticably and
1008    not having to lookup handles in tables simplifies the code enormously.
1009
1010    EJB   2008/4/2
1011 */
1012
1013
1014 /**
1015  To be called on the receiver to create a handle and return its number
1016 **/
1017 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1018     /* with two-sided primitives we just bundle the buffer and callback info into the handle so the sender can remind us about it later. */
1019     struct infiDirectUserHandle userHandle;
1020     userHandle.handle=1; /* doesn't matter on BG/P*/
1021     userHandle.senderNode=senderNode;
1022     userHandle.recverNode=_Cmi_mynode;
1023     userHandle.recverBufSize=recvBufSize;
1024     userHandle.recverBuf=recvBuf;
1025     userHandle.initialValue=initialValue;
1026     userHandle.callbackFnPtr=callbackFnPtr;
1027     userHandle.callbackData=callbackData;
1028     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1029 #if CMI_DIRECT_DEBUG
1030     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1031 #endif
1032     return userHandle;
1033 }
1034
1035 /****
1036  To be called on the sender to attach the sender's buffer to this handle
1037 ******/
1038
1039 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1040
1041     /* one-sided primitives would require registration of memory */
1042
1043     /* with two-sided primitives we just record the sender buf in the handle */
1044     userHandle->senderBuf=sendBuf;
1045     CmiAssert(sendBufSize==userHandle->recverBufSize);
1046     userHandle->DCMF_rq_tsend = (DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1047 #if CMI_DIRECT_DEBUG
1048     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1049 #endif
1050
1051 }
1052
1053 /****
1054 To be called on the sender to do the actual data transfer
1055 ******/
1056 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1057     /** invoke a DCMF_Send with the direct callback */
1058     DCMF_Protocol_t *protocol = NULL;
1059     protocol = &cmi_dcmf_direct_registration;
1060     /* local copy */
1061     CmiAssert(userHandle->recverBuf!=NULL);
1062     CmiAssert(userHandle->senderBuf!=NULL);
1063     CmiAssert(userHandle->recverBufSize>0);
1064     if (userHandle->recverNode== _Cmi_mynode) {
1065 #if CMI_DIRECT_DEBUG
1066         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1067 #endif
1068
1069         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1070         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1071     } else {
1072         dcmfDirectMsgHeader msgHead;
1073         msgHead.recverBuf=userHandle->recverBuf;
1074         msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1075         msgHead.callbackData=userHandle->callbackData;
1076         msgHead.DCMF_rq_t=(DCMF_Request_t *) userHandle->DCMF_rq_trecv;
1077 #if CMK_SMP
1078         DCMF_CriticalSection_enter (0);
1079 #endif
1080 #if CMI_DIRECT_DEBUG
1081         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1082 #endif
1083         DCMF_Send (protocol,
1084                    (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1085                    directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1086                    userHandle->recverBufSize, userHandle->senderBuf,
1087                    (struct DCQuad *) &(msgHead), 2);
1088
1089 #if CMK_SMP
1090         DCMF_CriticalSection_exit (0);
1091 #endif
1092     }
1093 }
1094
1095 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1096     CmiAbort("Not Implemented, switch to #define BGP_USE_RDMA_DIRECT");
1097 }
1098
1099 /**** up to the user to safely call this */
1100 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1101     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1102 #if CMK_SMP
1103     DCMF_CriticalSection_enter (0);
1104 #endif
1105     CmiFree(userHandle->DCMF_rq_tsend);
1106 #if CMK_SMP
1107     DCMF_CriticalSection_exit (0);
1108 #endif
1109
1110 }
1111
1112 /**** up to the user to safely call this */
1113 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1114     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1115 #if CMK_SMP
1116     DCMF_CriticalSection_enter (0);
1117 #endif
1118     CmiFree(userHandle->DCMF_rq_trecv);
1119
1120 #if CMK_SMP
1121     DCMF_CriticalSection_exit (0);
1122 #endif
1123 }
1124
1125
1126 /**** Should not be called the first time *********/
1127 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1128     /* no op on BGP */
1129 }
1130
1131 /**** Should not be called the first time *********/
1132 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1133     /* no op on BGP */
1134 }
1135
1136 /**** Should not be called the first time *********/
1137 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1138     /* no op on BGP */
1139 }
1140
1141 #endif /* BGP_USE_AM_DIRECT*/
1142
1143 #ifdef BGP_USE_RDMA_DIRECT
1144
1145 #include "cmidirect.h"
1146
1147 /*
1148    Notification protocol passes callback function and data in a single
1149    quadword.  This occurs in a message triggered by the sender side ack
1150    callback and therefore has higher latency than polling, but is guaranteed
1151    to be semantically correct.  The latency for a single packet that isn't
1152    hitting charm/converse should be pretty minimal, but you could run into
1153    sender side progress issues.  The alternative of polling on the out of band
1154    byte scheme creates correctness issues in that the data really has to be
1155    out of band and you rely on the buffer being written in order.  It also has
1156    annoying polling issues.  A third scheme could add a second put to a
1157    control region to poll upon and force sequential consistency between
1158    puts. Its not really clear that this would be faster or avoid the progress
1159    issue since you run into the same issues to enforce that sequential
1160    consistency.
1161
1162    EJB   2011/1/20
1163 */
1164
1165
1166 /* local function to use the ack as our signal to send a remote notify */
1167 static void CmiNotifyRemoteRDMA(void *handle, struct DCMF_Error_t *error) {
1168     struct infiDirectUserHandle *userHandle= (struct infiDirectUserHandle *) handle;
1169     dcmfDirectRDMAMsgHeader msgHead;
1170     msgHead.callbackFnPtr=userHandle->callbackFnPtr;
1171     msgHead.callbackData=userHandle->callbackData;
1172 #if CMK_SMP
1173     DCMF_CriticalSection_enter (0);
1174 #endif
1175 #if CMI_DIRECT_DEBUG
1176     CmiPrintf("[%d] RDMA notify put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p \n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1177 #endif
1178     DCMF_Result res=DCMF_Send (&cmi_dcmf_direct_rdma_registration,
1179                                userHandle->DCMF_rq_tsend,
1180                                directcb, DCMF_MATCH_CONSISTENCY, userHandle->recverNode,
1181                                sizeof(dcmfDirectRDMAMsgHeader),
1182
1183                                userHandle->DCMF_notify_buf,
1184                                (struct DCQuad *) &(msgHead), 1);
1185 //    CmiAssert(res==DCMF_SUCCESS);
1186 #if CMK_SMP
1187     DCMF_CriticalSection_exit (0);
1188 #endif
1189 }
1190
1191 /**
1192  To be called on the receiver to create a handle and return its number
1193 **/
1194
1195
1196 struct infiDirectUserHandle CmiDirect_createHandle(int senderNode,void *recvBuf, int recvBufSize, void (*callbackFnPtr)(void *), void *callbackData,double initialValue) {
1197     /* one-sided primitives require registration of memory */
1198     struct infiDirectUserHandle userHandle;
1199     size_t numbytesRegistered=0;
1200     DCMF_Result regresult=DCMF_Memregion_create( &userHandle.DCMF_recverMemregion,
1201                           &numbytesRegistered,
1202                           recvBufSize,
1203                           recvBuf,
1204                           0);
1205     CmiAssert(numbytesRegistered==recvBufSize);
1206     CmiAssert(regresult==DCMF_SUCCESS);
1207
1208
1209     userHandle.handle=1; /* doesn't matter on BG/P*/
1210     userHandle.senderNode=senderNode;
1211     userHandle.recverNode=_Cmi_mynode;
1212     userHandle.recverBufSize=recvBufSize;
1213     userHandle.recverBuf=recvBuf;
1214     userHandle.initialValue=initialValue;
1215     userHandle.callbackFnPtr=callbackFnPtr;
1216     userHandle.callbackData=callbackData;
1217     userHandle.DCMF_rq_trecv=(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1218 #if CMI_DIRECT_DEBUG
1219     CmiPrintf("[%d] RDMA create addr %p %d callback %p callbackdata %p\n",CmiMyPe(),userHandle.recverBuf,userHandle.recverBufSize, userHandle.callbackFnPtr, userHandle.callbackData);
1220 #endif
1221     return userHandle;
1222 }
1223
1224 /****
1225  To be called on the sender to attach the sender's buffer to this handle
1226 ******/
1227
1228 void CmiDirect_assocLocalBuffer(struct infiDirectUserHandle *userHandle,void *sendBuf,int sendBufSize) {
1229     /* one-sided primitives would require registration of memory */
1230     userHandle->senderBuf=sendBuf;
1231     CmiAssert(sendBufSize==userHandle->recverBufSize);
1232     userHandle->DCMF_rq_tsend =(DCMF_Request_t *) ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+16));
1233     size_t numbytesRegistered=0;  // set as return value from create
1234     userHandle->DCMF_notify_buf=ALIGN_16(CmiAlloc(sizeof(DCMF_Request_t)+32));
1235     userHandle->DCMF_notify_cb.function=CmiNotifyRemoteRDMA;
1236     userHandle->DCMF_notify_cb.clientdata=userHandle;
1237     DCMF_Result regresult=DCMF_Memregion_create( &userHandle->DCMF_senderMemregion,
1238                           &numbytesRegistered,
1239                           sendBufSize,
1240                           sendBuf,
1241                           0);
1242     CmiAssert(numbytesRegistered==sendBufSize);
1243     CmiAssert(regresult==DCMF_SUCCESS);
1244
1245 #if CMI_DIRECT_DEBUG
1246     CmiPrintf("[%d] RDMA assoc addr %p %d to receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,sendBufSize, userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1247 #endif
1248
1249 }
1250
1251
1252 /****
1253 To be called on the sender to do the actual data transfer
1254 ******/
1255 void CmiDirect_put(struct infiDirectUserHandle *userHandle) {
1256     /** invoke a DCMF_Put with the direct callback */
1257
1258     CmiAssert(userHandle->recverBuf!=NULL);
1259     CmiAssert(userHandle->senderBuf!=NULL);
1260     CmiAssert(userHandle->recverBufSize>0);
1261     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1262 #if CMI_DIRECT_DEBUG
1263         CmiPrintf("[%d] RDMA local put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1264 #endif
1265
1266         CmiMemcpy(userHandle->recverBuf,userHandle->senderBuf,userHandle->recverBufSize);
1267         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1268     } else {
1269 #if CMK_SMP
1270         DCMF_CriticalSection_enter (0);
1271 #endif
1272 #if CMI_DIRECT_DEBUG
1273         CmiPrintf("[%d] RDMA put addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1274 #endif
1275         DCMF_Result
1276         Res= DCMF_Put(&cmi_dcmf_direct_put_registration,
1277                       userHandle->DCMF_rq_tsend,
1278                       directcb, DCMF_RELAXED_CONSISTENCY,
1279                       userHandle->recverNode,
1280                       userHandle->recverBufSize,
1281                       &userHandle->DCMF_senderMemregion,
1282                       &userHandle->DCMF_recverMemregion,
1283                       0, /* offsets are zero */
1284                       0,
1285                       userHandle->DCMF_notify_cb
1286                      );
1287         CmiAssert(Res==DCMF_SUCCESS);
1288 #if CMK_SMP
1289         DCMF_CriticalSection_exit (0);
1290 #endif
1291     }
1292 }
1293
1294 /****
1295 To be called on the receiver to initiate the actual data transfer
1296 ******/
1297 void CmiDirect_get(struct infiDirectUserHandle *userHandle) {
1298     /** invoke a DCMF_Get with the direct callback */
1299
1300     CmiAssert(userHandle->recverBuf!=NULL);
1301     CmiAssert(userHandle->senderBuf!=NULL);
1302     CmiAssert(userHandle->recverBufSize>0);
1303     if (userHandle->recverNode== _Cmi_mynode) {     /* local copy */
1304 #if CMI_DIRECT_DEBUG
1305         CmiPrintf("[%d] RDMA local get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1306 #endif
1307
1308         CmiMemcpy(userHandle->senderBuf,userHandle->recverBuf,userHandle->recverBufSize);
1309         (*(userHandle->callbackFnPtr))(userHandle->callbackData);
1310     } else {
1311         struct DCMF_Callback_t done_cb;
1312         done_cb.function=userHandle->callbackFnPtr;
1313         done_cb.clientdata=userHandle->callbackData;
1314 #if CMK_SMP
1315         DCMF_CriticalSection_enter (0);
1316 #endif
1317 #if CMI_DIRECT_DEBUG
1318         CmiPrintf("[%d] RDMA get addr %p %d to recverNode %d receiver addr %p callback %p callbackdata %p\n",CmiMyPe(),userHandle->senderBuf,userHandle->recverBufSize, userHandle->recverNode,userHandle->recverBuf, userHandle->callbackFnPtr, userHandle->callbackData);
1319 #endif
1320         DCMF_Result
1321         Res= DCMF_Get(&cmi_dcmf_direct_get_registration,
1322                       (DCMF_Request_t *) userHandle->DCMF_rq_tsend,
1323                       done_cb, DCMF_RELAXED_CONSISTENCY,
1324                       userHandle->recverNode,
1325                       userHandle->recverBufSize,
1326                       & userHandle->DCMF_recverMemregion,
1327                       & userHandle->DCMF_senderMemregion,
1328                       0, /* offsets are zero */
1329                       0
1330                      );
1331         CmiAssert(Res==DCMF_SUCCESS);
1332
1333
1334 #if CMK_SMP
1335         DCMF_CriticalSection_exit (0);
1336 #endif
1337     }
1338 }
1339
1340 /**** up to the user to safely call this */
1341 void CmiDirect_deassocLocalBuffer(struct infiDirectUserHandle *userHandle) {
1342     CmiAssert(userHandle->senderNode==_Cmi_mynode);
1343 #if CMK_SMP
1344     DCMF_CriticalSection_enter (0);
1345 #endif
1346
1347     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_senderMemregion);
1348     CmiFree(userHandle->DCMF_notify_buf);
1349     CmiFree(userHandle->DCMF_rq_tsend);
1350 #if CMK_SMP
1351     DCMF_CriticalSection_exit (0);
1352 #endif
1353
1354 }
1355
1356 /**** up to the user to safely call this */
1357 void CmiDirect_destroyHandle(struct infiDirectUserHandle *userHandle) {
1358     CmiAssert(userHandle->recverNode==_Cmi_mynode);
1359 #if CMK_SMP
1360     DCMF_CriticalSection_enter (0);
1361 #endif
1362
1363     DCMF_Memregion_destroy((DCMF_Memregion_t*) userHandle->DCMF_recverMemregion);
1364     CmiFree(userHandle->DCMF_rq_trecv);
1365
1366 #if CMK_SMP
1367     DCMF_CriticalSection_exit (0);
1368 #endif
1369 }
1370
1371
1372
1373 /**** Should not be called the first time *********/
1374 void CmiDirect_ready(struct infiDirectUserHandle *userHandle) {
1375     /* no op on BGP */
1376 }
1377
1378 /**** Should not be called the first time *********/
1379 void CmiDirect_readyPollQ(struct infiDirectUserHandle *userHandle) {
1380     /* no op on BGP */
1381 }
1382
1383 /**** Should not be called the first time *********/
1384 void CmiDirect_readyMark(struct infiDirectUserHandle *userHandle) {
1385     /* no op on BGP */
1386 }
1387
1388 #endif /* BGP_USE_RDMA_DIRECT*/
1389
1390 /*@}*/
1391