revert cldb.neighbor.c to the old version because the new version has poor performance
authorYanhua Yanhua <sun51@illinois.edu>
Tue, 3 Aug 2010 05:09:31 +0000 (00:09 -0500)
committerYanhua Yanhua <sun51@illinois.edu>
Tue, 3 Aug 2010 05:09:31 +0000 (00:09 -0500)
src/ck-core/init.C
src/conv-ldb/cldb.neighbor.c

index 904764d052457135aed42fcd626d8fbe56f7723d..00b7610968ef7220e7541475c318ff2b1f4ae116 100644 (file)
@@ -863,7 +863,7 @@ extern "C" void CmiInitCPUTopology(char **argv);
 extern "C" void CmiInitCPUAffinity(char **argv);
 extern "C" void CmiInitMemAffinity(char **argv);
 
-extern "C" void CldCallback();
+//extern "C" void CldCallback();
 
 void _registerInitCall(CkInitCallFn fn, int isNodeCall)
 {
@@ -1213,7 +1213,7 @@ void _initCharm(int unused_argc, char **argv)
         }
         CmiInitCPUTopology(argv);
     }
-    CldCallback();
+    //CldCallback();
 #if CMK_BLUEGENE_CHARM && CMK_CHARMDEBUG
       // Register the BG handler for CCS. Notice that this is put into a variable shared by
       // the whole real processor. This because converse needs to find it. We check that all
index 492ff6f4a9028445c46f6a0bbafcb267b364e3d6..014d8e18b8daa5cf3a2ec7f6fff3618f4e196ebf 100644 (file)
 #define TRACE_USEREVENTS        0
 
 #define PERIOD 20                /* default: 30 */
-#define MSGDELAY 10
 #define MAXOVERLOAD 1
 
-#define MAXBEFOREBALANCE 2
-
-#define LOADTHRESH       3
-
-typedef struct loadproc_s{
-    int index;
-    int load;
-}loadproc;
-
 typedef struct CldProcInfo_s {
   double lastCheck;
   int    sent;                 /* flag to disable idle work request */
   int    balanceEvt;           /* user event for balancing */
   int    idleEvt;              /* user event for idle balancing */
   int    idleprocEvt;          /* user event for processing idle req */
-  double lastBalanceTime;
 } *CldProcInfo;
 
 extern char *_lbtopo;                  /* topology name string */
@@ -44,8 +33,6 @@ CpvStaticDeclare(int, MinProc);
 CpvStaticDeclare(int, Mindex);
 CpvStaticDeclare(int, start);
 
-CpvStaticDeclare(int, topologyReady);
-
 void LoadNotifyFn(int l)
 {
   CldProcInfo  cldData = CpvAccess(CldData);
@@ -95,13 +82,8 @@ static void CldStillIdle(void *dummy, double curT)
   CmiBecomeImmediate(&msg);
   for (i=0; i<CpvAccess(numNeighbors); i++) {
     msg.to_rank = CmiRankOf(CpvAccess(neighbors)[i].pe);
-    CmiSyncSend(CpvAccess(neighbors)[i].pe,sizeof(requestmsg),(char *)&msg);
-    //CmiSyncNodeSend(CmiNodeOf(CpvAccess(neighbors)[i].pe),sizeof(requestmsg),(char *)&msg);
+    CmiSyncNodeSend(CmiNodeOf(CpvAccess(neighbors)[i].pe),sizeof(requestmsg),(char *)&msg);
   }
-  /*
-  msg.to_rank = rand() % CmiNumPes();
-  CmiSyncNodeSend(CmiNodeOf(msg.to_rank),sizeof(requestmsg),(char *)&msg);
-  */
 #endif
   cldData->sent = 1;
 
@@ -117,10 +99,7 @@ static void CldAskLoadHandler(requestmsg *msg)
   int receiver, rank, recvIdx, i;
   int myload = CldLoad();
   double now = CmiWallTimer();
-  CldProcInfo  cldData = CpvAccess(CldData);
 
-  if(now - cldData->lastBalanceTime < MSGDELAY * 0.001)
-      return;
   /* only give you work if I have more than 1 */
   if (myload>0) {
     int sendLoad;
@@ -135,12 +114,13 @@ static void CldAskLoadHandler(requestmsg *msg)
     }
     CmiUnlock(CpvAccessOther(cldLock, rank));  /* release lock, grab later */
 #endif
-    sendLoad = myload / CpvAccess(numNeighbors);
+    sendLoad = myload / CpvAccess(numNeighbors) / 2;
     if (sendLoad < 1) sendLoad = 1;
+    sendLoad = 1;
     for (i=0; i<CpvAccess(numNeighbors); i++) 
       if (CpvAccess(neighbors)[i].pe == receiver) break;
-    if(i >= CpvAccess(numNeighbors)) return;
-    //CmiAssert(i<CpvAccess(numNeighbors));
+    
+    if(i<CpvAccess(numNeighbors)) {CmiFree(msg); return;}
     CpvAccess(neighbors)[i].load += sendLoad;
     CldMultipleSend(receiver, sendLoad, rank, 0);
 #if 0
@@ -175,7 +155,6 @@ void CldSendLoad()
         found = 1;
         break;
       }
-    CmiAssert(found == 1);
   }
 #else
   loadmsg msg;
@@ -193,16 +172,15 @@ int CldMinAvg()
   int sum=0, i;
 
   int nNeighbors = CpvAccess(numNeighbors);
-  if(CpvAccess(topologyReady) == 0 || nNeighbors == 0)
-  {
-      return CmiMyPe();
-  }
-#ifdef YHDEBUG
-  CmiPrintf("Line 196 processor %d, numNeighbor=%d\n", CmiMyPe(), nNeighbors);
-#endif
   if (CpvAccess(start) == -1)
     CpvAccess(start) = CmiMyPe() % nNeighbors;
 
+#if 0
+    /* update load from neighbors for multicore */
+  for (i=0; i<nNeighbors; i++) {
+    CpvAccess(neighbors)[i].load = CldLoadRank(CpvAccess(neighbors)[i].pe);
+  }
+#endif
   CpvAccess(MinProc) = CpvAccess(neighbors)[CpvAccess(start)].pe;
   CpvAccess(MinLoad) = CpvAccess(neighbors)[CpvAccess(start)].load;
   sum = CpvAccess(neighbors)[CpvAccess(start)].load;
@@ -222,40 +200,15 @@ int CldMinAvg()
     CpvAccess(MinLoad) = CldLoad();
     CpvAccess(MinProc) = CmiMyPe();
   }
-  i = sum/(nNeighbors+1);
-  if (sum % (nNeighbors+1) != 0) i++;
-  /*
-  if( ((float)sum /(float)(nNeighbors+1) - sum/(nNeighbors+1)) > 0.000001)
-      i = i+1;
-  */
+  i = (int)(1.0 + (((float)sum) /((float)(nNeighbors+1))));
   return i;
 }
 
-static int comp(const void *p1, const void *p2)
-{
-    loadproc *l1 = (loadproc *)p1;
-    loadproc *l2 = (loadproc *)p2;
-    if ( l1->load < l2->load) return -1;
-    else if (l1->load == l2->load) return 0;
-    else return 1;
-}
-
 void CldBalance(void *dummy, double curT)
 {
-  int i, j, k, overload, numToMove=0, avgLoad;
+  int i, j, overload, numToMove=0, avgLoad;
   int totalUnderAvg=0, numUnderAvg=0, maxUnderAvg=0;
-  int nNeighbors = CpvAccess(numNeighbors);
-  CldProcInfo  cldData = CpvAccess(CldData);
-  loadproc* sorted_byload = (loadproc*)malloc(sizeof(loadproc) * nNeighbors);
-  int myload = CldLoad();
-  int minload;
-  int numberoflessthanme = 0;
 
-  int already_sort = 0;
-  int thisload;
-  int sum = 0;
-
-  cldData->lastBalanceTime = CmiWallTimer();
 #if CMK_TRACE_ENABLED && TRACE_USEREVENTS
   double startT = curT;
 #endif
@@ -266,85 +219,36 @@ void CldBalance(void *dummy, double curT)
   if (overload > CldCountTokens())
     overload = CldCountTokens();
 
-  for (i=0; i<nNeighbors; i++) {
-      thisload = CpvAccess(neighbors)[i].load;
-      if(thisload < myload)
-          numberoflessthanme++;
-       sorted_byload[i].load = thisload;
-       sorted_byload[i].index = i;
-  }
-  qsort(sorted_byload, nNeighbors, sizeof(loadproc), comp);
-  
-  minload = sorted_byload[0].load;
-
   if (overload > MAXOVERLOAD) {
     int nNeighbors = CpvAccess(numNeighbors);
     for (i=0; i<nNeighbors; i++)
-      if (sorted_byload[i].load < avgLoad) {
-        totalUnderAvg += avgLoad-sorted_byload[i].load;
-        if (avgLoad - sorted_byload[i].load > maxUnderAvg)
-          maxUnderAvg = avgLoad - sorted_byload[i].load;
+      if (CpvAccess(neighbors)[i].load < avgLoad) {
+        totalUnderAvg += avgLoad-CpvAccess(neighbors)[i].load;
+        if (avgLoad - CpvAccess(neighbors)[i].load > maxUnderAvg)
+          maxUnderAvg = avgLoad - CpvAccess(neighbors)[i].load;
         numUnderAvg++;
       }
     if (numUnderAvg > 0)
-        for (i=0; ((i<nNeighbors) && (overload>0)); i++) {
-            j = (sorted_byload[i].index % nNeighbors);
-            if (CpvAccess(neighbors)[j].load < avgLoad) {
-                numToMove = (avgLoad - CpvAccess(neighbors)[j].load);
-                if (numToMove > overload)
-                    numToMove = overload;
-                overload -= numToMove;
-                CpvAccess(neighbors)[j].load += numToMove;
+      for (i=0; ((i<nNeighbors) && (overload>0)); i++) {
+          j = (i+CpvAccess(Mindex))%CpvAccess(numNeighbors);
+          if (CpvAccess(neighbors)[j].load < avgLoad) {
+              numToMove = (avgLoad - CpvAccess(neighbors)[j].load);
+          if (numToMove > overload)
+            numToMove = overload;
+          overload -= numToMove;
+         CpvAccess(neighbors)[j].load += numToMove;
 #if CMK_MULTICORE
-                CldSimpleMultipleSend(CpvAccess(neighbors)[j].pe, numToMove);
+          CldSimpleMultipleSend(CpvAccess(neighbors)[j].pe, numToMove);
 #else
-                CldMultipleSend(CpvAccess(neighbors)[j].pe, 
-                    numToMove, CmiMyRank(), 
+          CldMultipleSend(CpvAccess(neighbors)[j].pe, 
+                         numToMove, CmiMyRank(), 
 #if CMK_SMP
-                    0
+                         0
 #else
-                    1
+                         1
 #endif
-                    );
+                          );
 #endif
-            }
-        }
-  }else if(myload > LOADTHRESH)
-  {
-      sum = myload;
-      avgLoad = 0;
-      
-      for (i=0; i<numberoflessthanme; i++){
-          if(sorted_byload[i].load >= avgLoad)
-              break;
-          sum += sorted_byload[i].load;      
-          avgLoad = sum/(i+1);
-      }
-      avgLoad = sum/(i+1);
-      overload = myload - avgLoad;
-      for (k=0; k<i && overload > 0; k++)
-      {
-        j = (sorted_byload[k].index % nNeighbors);
-        if(CpvAccess(neighbors)[j].load < avgLoad)
-        {
-            numToMove = (avgLoad - CpvAccess(neighbors)[j].load);
-            if (numToMove > overload)
-                numToMove = overload;
-            overload -= numToMove;
-            CpvAccess(neighbors)[j].load += numToMove;
-#if CMK_MULTICORE
-            CldSimpleMultipleSend(CpvAccess(neighbors)[j].pe, numToMove);
-#else
-            CldMultipleSend(CpvAccess(neighbors)[j].pe, 
-                numToMove, CmiMyRank(), 
-#if CMK_SMP
-                0
-#else
-                1
-#endif
-                );
-#endif
-
         }
       }
   }
@@ -352,14 +256,12 @@ void CldBalance(void *dummy, double curT)
 #if CMK_TRACE_ENABLED && TRACE_USEREVENTS
   traceUserBracketEvent(CpvAccess(CldData)->balanceEvt, startT, CmiWallTimer());
 #endif
-
-  free(sorted_byload);
 }
 
 void CldBalancePeriod(void *dummy, double curT)
 {
-   CldBalance(NULL, curT);
-   CcdCallFnAfterOnPE((CcdVoidFn)CldBalancePeriod, NULL, PERIOD, CmiMyPe());
+    CldBalance(NULL, curT);
+    CcdCallFnAfterOnPE((CcdVoidFn)CldBalancePeriod, NULL, PERIOD, CmiMyPe());
 }
 
 
@@ -379,9 +281,6 @@ void CldBalanceHandler(void *msg)
 {
   CldRestoreHandler(msg);
   CldPutToken(msg);
-
-    if(CldLoad() > MAXBEFOREBALANCE)
-        CldBalance(NULL, CmiWallTimer());
 }
 
 void CldHandler(void *msg)
@@ -464,7 +363,6 @@ void CldEnqueue(int pe, void *msg, int infofn)
       CmiSetInfo(msg,infofn);
       CldSwitchHandler(msg, CpvAccess(CldBalanceHandlerIndex));
       CmiSyncSendAndFree(pe, len, msg);
-    
     }
     else {
       ifn(msg, &pfn, &len, &queueing, &priobits, &prioptr);
@@ -616,11 +514,6 @@ static void CldComputeNeighborData()
   }
   CpvAccess(neighborGroup) = CmiEstablishGroup(CpvAccess(numNeighbors), pes);
   free(pes);
-
-#ifdef YHDEBUG
-  CmiPrintf("Line 617 On processor=%d npe=%d, numNeighbors=%d\n", CmiMyPe(), npe, CpvAccess(numNeighbors));
-#endif
-
 }
 
 void CldGraphModuleInit(char **argv)
@@ -684,6 +577,11 @@ void CldGraphModuleInit(char **argv)
     else fclose(fp);
     CldReadNeighborData();
 #endif
+    CldComputeNeighborData();
+    #if CMK_MULTICORE
+    CmiNodeBarrier();
+#endif
+    CldBalancePeriod(NULL, CmiWallTimer());
 
   }
 
@@ -701,18 +599,6 @@ void CldGraphModuleInit(char **argv)
 #endif
 }
 
-void CldCallback()
-{
-    if(CpvAccess(topologyReady))
-        return;
-    CldComputeNeighborData();
-    CpvAccess(topologyReady) = 1;
-#if CMK_MULTICORE
-    CmiNodeBarrier();
-#endif
-    CldBalancePeriod(NULL, CmiWallTimer());
-
-}
 
 void CldModuleInit(char **argv)
 {
@@ -720,11 +606,9 @@ void CldModuleInit(char **argv)
   CpvInitialize(int, CldRelocatedMessages);
   CpvInitialize(int, CldLoadBalanceMessages);
   CpvInitialize(int, CldMessageChunks);
-  CpvInitialize(int, topologyReady);
   CpvAccess(CldHandlerIndex) = CmiRegisterHandler(CldHandler);
   CpvAccess(CldRelocatedMessages) = CpvAccess(CldLoadBalanceMessages) = 
   CpvAccess(CldMessageChunks) = 0;
-  CpvAccess(topologyReady) = 0;
 
   CldModuleGeneralInit(argv);
   CldGraphModuleInit(argv);