Added the implicit tree broadcast for notifying other helpers about the parallel...
authorChao Mei <chaomei2@illinois.edu>
Thu, 2 Feb 2012 22:11:10 +0000 (16:11 -0600)
committerChao Mei <chaomei2@illinois.edu>
Thu, 2 Feb 2012 22:11:10 +0000 (16:11 -0600)
NodeHelper.C
NodeHelper.h

index 742e0a995030d8f8857171e4f0766137af5909f0..08ff8988703bbaab7e14cf73c7b3a5dd8b74450d 100644 (file)
@@ -262,12 +262,20 @@ void FuncNodeHelper::parallelizeFunc(HelperFn func, int paramNum, void * param,
         curLoop->set(numChunks, func, lowerRange, upperRange, paramNum, param);
         ConverseNotifyMsg *notifyMsg = &(notifyMsgs[CmiMyRank()]);
         notifyMsg->ptr = (void *)curLoop;
-        
+#if USE_TREE_BROADCAST        
+        notifyMsg->srcRank = CmiMyRank();
+        int loopTimes = TREE_BCAST_BRANCH>(CmiMyNodeSize()-1)?CmiMyNodeSize()-1:TREE_BCAST_BRANCH;
+        //just implicit binary tree
+        int pe = CmiMyRank()+1;        
+        for(int i=0; i<loopTimes; i++, pe++){
+            if(pe >= CmiMyNodeSize()) pe -= CmiMyNodeSize();
+            CmiPushPE(pe, (void *)(notifyMsg));    
+        }
+#else        
         for (int i=0; i<numHelpers; i++) {
-            if (i!=CkMyRank()) {                
-                CmiPushPE(i, (void *)(notifyMsg));
-            }
-        }        
+            if (i!=CkMyRank()) CmiPushPE(i, (void *)(notifyMsg));            
+        }
+#endif            
         curLoop->stealWork();
         TRACE_BRACKET(20);
         
@@ -394,6 +402,20 @@ void NotifySingleHelper(ConverseNotifyMsg *msg){
 }
 
 void SingleHelperStealWork(ConverseNotifyMsg *msg){
+#if USE_TREE_BROADCAST
+    //int numHelpers = CmiMyNodeSize(); //the value of "numHelpers" should be obtained somewhere else
+    int relPE = CmiMyRank()-msg->srcRank;
+    if(relPE<0) relPE += CmiMyNodeSize();
+    
+    //CmiPrintf("Rank[%d]: got msg from src %d with relPE %d\n", CmiMyRank(), msg->srcRank, relPE);
+    relPE=relPE*TREE_BCAST_BRANCH+1;
+    for(int i=0; i<TREE_BCAST_BRANCH; i++, relPE++){
+        if(relPE >= CmiMyNodeSize()) break;
+        int pe = (relPE + msg->srcRank)%CmiMyNodeSize();
+        //CmiPrintf("Rank[%d]: send msg to dst %d (relPE: %d) from src %d\n", CmiMyRank(), pe, relPE, msg->srcRank);
+        CmiPushPE(pe, (void *)msg);
+    }
+#endif
     CurLoopInfo *loop = (CurLoopInfo *)msg->ptr;
     loop->stealWork();
 }
index e5836ac09eb7cd8281d4ec89b0738f4c693e30c5..900254692f16f2bfd938fa3b5f622abdc9609a17 100644 (file)
@@ -9,6 +9,8 @@
 #include "queueing.h"
 
 #define USE_CONVERSE_MSG 1
+#define USE_TREE_BROADCAST 0
+#define TREE_BCAST_BRANCH (4)
 
 /* The following only works on X86_64 platform */
 #define AtomicIncrement(someInt)  __asm__ __volatile__("lock incl (%0)" :: "r" (&(someInt)))
@@ -120,6 +122,9 @@ public:
 
 typedef struct converseNotifyMsg{
     char core[CmiMsgHeaderSizeBytes];
+#if USE_TREE_BROADCAST
+    int srcRank;
+#endif    
     void *ptr;
 }ConverseNotifyMsg;