separate get and put counters
authorGengbin Zheng <gzheng@h2ologin1.(none)>
Thu, 22 Mar 2012 01:18:11 +0000 (20:18 -0500)
committerGengbin Zheng <gzheng@h2ologin1.(none)>
Thu, 22 Mar 2012 01:18:11 +0000 (20:18 -0500)
src/arch/gemini_gni/machine-persistent.c
src/arch/gemini_gni/machine.c
src/arch/util/persist-comm.c

index e436889632e5130ba6d1ed5e66e63cf1238c2b08..ff59652084adccd32296970b37fc1575e04a5b3a 100644 (file)
@@ -32,9 +32,9 @@ void LrtsSendPersistentMsg(PersistentHandle h, int destNode, int size, void *m)
         CmiAbort("Abort: Invalid size\n");
     }
 
-     //CmiPrintf("[%d] LrtsSendPersistentMsg h=%p hdl=%d destNode=%d destAddress=%p size=%d\n", CmiMyPe(), h, CmiGetHandler(m), destNode, slot->destBuf[0].destAddress, size);
-
     if (slot->destBuf[0].destAddress) {
+        // CmiPrintf("[%d] LrtsSendPersistentMsg h=%p hdl=%d destNode=%d destAddress=%p size=%d\n", CmiMyPe(), h, CmiGetHandler(m), destNode, slot->destBuf[0].destAddress, size);
+
         // uGNI part
         MallocPostDesc(pd);
         if(size <= LRTS_GNI_RDMA_THRESHOLD) {
@@ -60,6 +60,8 @@ void LrtsSendPersistentMsg(PersistentHandle h, int destNode, int size, void *m)
         pd->sync_flag_addr = 1000000 * CmiWallTimer(); //microsecond
 #endif
         SetMemHndlZero(pd->local_mem_hndl);
+
+         /* always buffer */
 #if CMK_SMP || 1
 #if REMOTE_EVENT
         bufferRdmaMsg(destNode, pd, (int)(size_t)(slot->destHandle));
@@ -77,7 +79,7 @@ void LrtsSendPersistentMsg(PersistentHandle h, int destNode, int size, void *m)
         if (status == GNI_RC_SUCCESS) 
         {
 #if CMK_WITH_STATS
-            RDMA_TRY_SEND()
+            RDMA_TRY_SEND(pd->type)
 #endif
          if(pd->type == GNI_POST_RDMA_PUT) 
             status = GNI_PostRdma(ep_hndl_array[destNode], pd);
@@ -98,7 +100,7 @@ void LrtsSendPersistentMsg(PersistentHandle h, int destNode, int size, void *m)
             GNI_RC_CHECK("AFter posting", status);
 #if  CMK_WITH_STATS
             pd->sync_flag_value = 1000000 * CmiWallTimer(); //microsecond
-            RDMA_TRANS_INIT(pd->sync_flag_addr/1000000.0)
+            RDMA_TRANS_INIT(pd->type, pd->sync_flag_addr/1000000.0)
 #endif
         }
 #endif
index bbaad4242729cba668158cc4863b32241c5db7d0..fdb7cb6294a3362237da308b44d21e296ceea479 100644 (file)
@@ -46,7 +46,7 @@
 #include "cmidirect.h"
 #endif
 
-#define     LARGEPAGE              0
+#define     LARGEPAGE              1
 
 #if CMK_SMP
 #define MULTI_THREAD_SEND          0
@@ -57,7 +57,7 @@
 #define CMK_WORKER_SINGLE_TASK     0
 #endif
 
-#define REMOTE_EVENT               0
+#define REMOTE_EVENT               1
 #define CQWRITE                    0
 
 #define CMI_EXERT_SEND_CAP     0
@@ -770,6 +770,8 @@ void CmiTurnOffStats()
     stats_off = 1;
 }
 
+#define IS_PUT(type)    (type == GNI_POST_FMA_PUT || type == GNI_POST_RDMA_PUT)
+
 #if CMK_WITH_STATS
 FILE *counterLog = NULL;
 typedef struct comm_thread_stats
@@ -793,8 +795,8 @@ typedef struct comm_thread_stats
     double    max_time_in_send_buffered_smsg;
     double    all_time_in_send_buffered_smsg;
 
-    uint64_t  rdma_count;
-    uint64_t  try_rdma_count;
+    uint64_t  rdma_get_count, rdma_put_count;
+    uint64_t  try_rdma_get_count, try_rdma_put_count;
     double    max_time_from_control_to_rdma_init;
     double    all_time_from_control_to_rdma_init;
 
@@ -847,7 +849,7 @@ static void init_comm_stats()
             comm_stats.try_smsg_count++; \
         }
 
-#define  RDMA_TRY_SEND()        if (print_stats && !stats_off) {comm_stats.try_rdma_count++;}
+#define  RDMA_TRY_SEND(type)        if (print_stats && !stats_off) {IS_PUT(type)?comm_stats.try_rdma_put_count++:comm_stats.try_rdma_get_count++;}
 
 #define  RDMA_TRANS_DONE(x)      \
          if (print_stats && !stats_off) {  double rdma_trans_time = CmiWallTimer() - x ; \
@@ -855,8 +857,8 @@ static void init_comm_stats()
              comm_stats.all_time_from_rdma_init_to_rdma_done += rdma_trans_time; \
          }
 
-#define  RDMA_TRANS_INIT(x)      \
-         if (print_stats && !stats_off) {   comm_stats.rdma_count++;  \
+#define  RDMA_TRANS_INIT(type, x)      \
+         if (print_stats && !stats_off) {   IS_PUT(type)?comm_stats.rdma_put_count++:comm_stats.rdma_get_count++;  \
              double rdma_trans_time = CmiWallTimer() - x ; \
              if(rdma_trans_time > comm_stats.max_time_from_control_to_rdma_init) comm_stats.max_time_from_control_to_rdma_init = rdma_trans_time; \
              comm_stats.all_time_from_control_to_rdma_init += rdma_trans_time; \
@@ -904,25 +906,25 @@ static void init_comm_stats()
 
 static void print_comm_stats()
 {
-    fprintf(counterLog, "Node[%d]SMSG time in buffer\t[max:%f\tAverage:%f](milisecond)\n", myrank, 1000*comm_stats.max_time_in_send_buffered_smsg, 1000.0*comm_stats.all_time_in_send_buffered_smsg/comm_stats.smsg_count);
-    fprintf(counterLog, "Node[%d]Smsg  Msgs  \t[Total:%lld\t Data:%lld\t Lmsg_Init:%lld\t ACK:%lld\t BIG_MSG_ACK:%lld Direct_put_done:%lld\t Persistent_put_done:%lld]\n", myrank, 
+    fprintf(counterLog, "Node[%d] SMSG time in buffer\t[max:%f\tAverage:%f](milisecond)\n", myrank, 1000*comm_stats.max_time_in_send_buffered_smsg, 1000.0*comm_stats.all_time_in_send_buffered_smsg/comm_stats.smsg_count);
+    fprintf(counterLog, "Node[%d] Smsg  Msgs  \t[Total:%lld\t Data:%lld\t Lmsg_Init:%lld\t ACK:%lld\t BIG_MSG_ACK:%lld Direct_put_done:%lld\t Persistent_put_done:%lld]\n", myrank, 
             comm_stats.smsg_count, comm_stats.smsg_data_count, comm_stats.lmsg_init_count, 
             comm_stats.ack_count, comm_stats.big_msg_ack_count, comm_stats.direct_put_done_count, comm_stats.put_done_count);
     
-    fprintf(counterLog, "Node[%d]SmsgSendCalls\t[Total:%lld\t Data:%lld\t Lmsg_Init:%lld\t ACK:%lld\t BIG_MSG_ACK:%lld Direct_put_done:%lld\t Persistent_put_done:%lld]\n\n", myrank, 
+    fprintf(counterLog, "Node[%d] SmsgSendCalls\t[Total:%lld\t Data:%lld\t Lmsg_Init:%lld\t ACK:%lld\t BIG_MSG_ACK:%lld Direct_put_done:%lld\t Persistent_put_done:%lld]\n\n", myrank, 
             comm_stats.try_smsg_count, comm_stats.try_smsg_data_count, comm_stats.try_lmsg_init_count, 
             comm_stats.try_ack_count, comm_stats.try_big_msg_ack_count, comm_stats.try_direct_put_done_count, comm_stats.try_put_done_count);
 
-    fprintf(counterLog, "Node[%d]Rdma Transaction [count:%lld\t calls:%lld]\n", myrank, comm_stats.rdma_count, comm_stats.try_rdma_count);
-    fprintf(counterLog, "Node[%d]Rdma time from control arrives to rdma init [MAX:%f\t Average:%f](milisecond)\n", myrank, 1000.0*comm_stats.max_time_from_control_to_rdma_init, 1000.0*comm_stats.all_time_from_control_to_rdma_init/comm_stats.rdma_count); 
-    fprintf(counterLog, "Node[%d]Rdma time from init to rdma done [MAX:%f\t Average:%f](milisecond)\n\n", myrank, 1000.0*comm_stats.max_time_from_rdma_init_to_rdma_done, 1000.0*comm_stats.all_time_from_rdma_init_to_rdma_done/comm_stats.rdma_count); 
+    fprintf(counterLog, "Node[%d] Rdma Transaction [count (GET/PUT):%lld %lld\t calls (GET/PUT):%lld %lld]\n", myrank, comm_stats.rdma_get_count, comm_stats.rdma_put_count, comm_stats.try_rdma_get_count, comm_stats.try_rdma_put_count);
+    fprintf(counterLog, "Node[%d] Rdma time from control arrives to rdma init [MAX:%f\t Average:%f](milisecond)\n", myrank, 1000.0*comm_stats.max_time_from_control_to_rdma_init, 1000.0*comm_stats.all_time_from_control_to_rdma_init/(comm_stats.rdma_get_count+comm_stats.rdma_put_count)); 
+    fprintf(counterLog, "Node[%d] Rdma time from init to rdma done [MAX:%f\t Average:%f](milisecond)\n\n", myrank, 1000.0*comm_stats.max_time_from_rdma_init_to_rdma_done, 1000.0*comm_stats.all_time_from_rdma_init_to_rdma_done/(comm_stats.rdma_get_count+comm_stats.rdma_put_count));
+
 
-    fprintf(counterLog, "Node[%d]Rdma time from init to rdma done [MAX:%f\t Average:%f](milisecond)\n\n", myrank, 1000.0*comm_stats.max_time_from_rdma_init_to_rdma_done, 1000.0*comm_stats.all_time_from_rdma_init_to_rdma_done/comm_stats.rdma_count); 
-    fprintf(counterLog, "                          count\ttotal time\tmax \n", myrank);
-    fprintf(counterLog, "PumpNetworkSmsg:              %d\t%.3f\t%.3f\n", comm_stats.count_in_PumpNetwork, comm_stats.time_in_PumpNetwork, comm_stats.max_time_in_PumpNetwork);
-    fprintf(counterLog, "PumpRemoteTransactions:       %d\t%.3f\t%.3f\n", comm_stats.count_in_PumpRemoteTransactions, comm_stats.time_in_PumpRemoteTransactions, comm_stats.max_time_in_PumpRemoteTransactions);
-    fprintf(counterLog, "PumpLocalTransactions(RDMA):  %d\t%.3f\t%.3f\n", comm_stats.count_in_PumpLocalTransactions_rdma, comm_stats.time_in_PumpLocalTransactions_rdma, comm_stats.max_time_in_PumpLocalTransactions_rdma);
-    fprintf(counterLog, "SendBufferMsg (SMSG):         %d\t%.3f\t%.3f\n",  comm_stats.count_in_SendBufferMsg_smsg, comm_stats.time_in_SendBufferMsg_smsg, comm_stats.max_time_in_SendBufferMsg_smsg);
+    fprintf(counterLog, "                             count\ttotal_time\tmax \n", myrank);
+    fprintf(counterLog, "PumpNetworkSmsg:              %d\t%.6f\t%.6f\n", comm_stats.count_in_PumpNetwork, comm_stats.time_in_PumpNetwork, comm_stats.max_time_in_PumpNetwork);
+    fprintf(counterLog, "PumpRemoteTransactions:       %d\t%.6f\t%.6f\n", comm_stats.count_in_PumpRemoteTransactions, comm_stats.time_in_PumpRemoteTransactions, comm_stats.max_time_in_PumpRemoteTransactions);
+    fprintf(counterLog, "PumpLocalTransactions(RDMA):  %d\t%.6f\t%.6f\n", comm_stats.count_in_PumpLocalTransactions_rdma, comm_stats.time_in_PumpLocalTransactions_rdma, comm_stats.max_time_in_PumpLocalTransactions_rdma);
+    fprintf(counterLog, "SendBufferMsg (SMSG):         %d\t%.6f\t%.6f\n",  comm_stats.count_in_SendBufferMsg_smsg, comm_stats.time_in_SendBufferMsg_smsg, comm_stats.max_time_in_SendBufferMsg_smsg);
 }
 
 #else
@@ -2392,7 +2394,7 @@ static void getLargeMsgRequest(void* header, uint64_t inst_id )
 #endif
 
 #if CMK_WITH_STATS
-        RDMA_TRY_SEND()
+        RDMA_TRY_SEND(pd->type)
 #endif
         if(pd->type == GNI_POST_RDMA_GET) 
         {
@@ -2418,8 +2420,8 @@ static void getLargeMsgRequest(void* header, uint64_t inst_id )
 #endif
             }
 #if  CMK_WITH_STATS
-                pd->sync_flag_value = 1000000 * CmiWallTimer(); //microsecond
-                RDMA_TRANS_INIT(pd->sync_flag_addr/1000000.0)
+            pd->sync_flag_value = 1000000 * CmiWallTimer(); //microsecond
+            RDMA_TRANS_INIT(pd->type, pd->sync_flag_addr/1000000.0)
 #endif
         }
     }else
@@ -2845,7 +2847,7 @@ static void  SendRdmaMsg()
             }
 #endif
 #if CMK_WITH_STATS
-            RDMA_TRY_SEND()
+            RDMA_TRY_SEND(pd->type)
 #endif
             if(pd->type == GNI_POST_RDMA_GET || pd->type == GNI_POST_RDMA_PUT) 
             {
@@ -2879,7 +2881,7 @@ static void  SendRdmaMsg()
                 }
 #if  CMK_WITH_STATS
                 pd->sync_flag_value = 1000000 * CmiWallTimer(); //microsecond
-                RDMA_TRANS_INIT(pd->sync_flag_addr/1000000.0)
+                RDMA_TRANS_INIT(pd->type, pd->sync_flag_addr/1000000.0)
 #endif
 #if MACHINE_DEBUG_LOG
                 buffered_recv_msg += register_size;
index 6456c3122d2458c0dc3e1ef5cb20e1764e242e0b..acc57f06bac0da27d446cb31aad398dc3a976a3a 100644 (file)
@@ -145,18 +145,21 @@ PersistentHandle getFreeRecvSlot()
 
 PersistentHandle CmiCreatePersistent(int destPE, int maxBytes)
 {
-  PersistentHandle h = getFreeSendSlot();
-
-  PersistentSendsTable *slot = (PersistentSendsTable *)h;
+  PersistentHandle h;
+  PersistentSendsTable *slot;
 
   if (CmiMyNode() == CmiNodeOf(destPE)) return NULL;
 
 /*
   if (CmiMyPe() == destPE) {
-    CmiAbort("CmiCreatePersistent Error: setting up persistent communication to the same processor is not allowed.");
+    CmiPrintf("[%d] CmiCreatePersistent Error>  setting up persistent communication to the same processor is not allowed.\n", CmiMyPe());
+    CmiAbort("CmiCreatePersistent");
   }
 */
 
+  h = getFreeSendSlot();
+  slot = (PersistentSendsTable *)h;
+
   slot->used = 1;
   slot->destPE = destPE;
   slot->sizeMax = maxBytes;