use the new migrationDone callback t handle the end of load balancing phase.
authorGengbin Zheng <gzheng@illinois.edu>
Wed, 20 Jul 2011 02:27:05 +0000 (21:27 -0500)
committerGengbin Zheng <gzheng@illinois.edu>
Wed, 20 Jul 2011 02:27:05 +0000 (21:27 -0500)
The old scheme has a bug that when processor 0 has zero objects, doneLB() won't happen, and the program hang.

src/libs/ck-libs/pose/gvt.C
src/libs/ck-libs/pose/sim.C

index e8bdf2e69bac6152064c1db84727df77152e0d2b..e06e40e50c83fe8c8f78344191955e15987fa567 100644 (file)
@@ -8,6 +8,12 @@ CkGroupID ThePVT;
 CkGroupID TheGVT;
 CpvExtern(int, stateRecovery);
 CpvExtern(eventID, theEventID);
+
+static void staticDoneLB(void *data)
+{
+  ((PVT*)data)->doneLB();
+}
+
 /// Basic Constructor
 PVT::PVT() 
 {
@@ -81,6 +87,8 @@ PVT::PVT()
   if(pose_config.stats)
     localStats->TimerStop();
 #endif
+
+  LBDatabase::Object()->AddMigrationDoneFn(staticDoneLB, this);
 }
 
 /// PUP routine
@@ -358,16 +366,9 @@ void PVT::callAtSync() {
 }
 
 void PVT::doneLB() {
-  static int count = 0;
-  count ++;
-  if (count == objs.getNumObjs()) {
-    count =0;
-    if (CkMyPe()==0) { 
-      eventMsg *dummyMsg = new eventMsg();
-      CProxy_PVT p(ThePVT);
-      p[0].resumeAfterLB(dummyMsg);
-    }
-  }
+  eventMsg *dummyMsg = new eventMsg();
+  CProxy_PVT p(ThePVT);
+  p[0].resumeAfterLB(dummyMsg);
 }
 
 /// ENTRY: resume after checkpointing, restarting, or if checkpointing doesn't occur
@@ -395,7 +396,15 @@ void PVT::resumeAfterCheckpoint(eventMsg *m) {
 #endif
 }
 
+// called on PE 0
 void PVT::resumeAfterLB(eventMsg *m) {
+  static int count = 0;
+  count ++;
+  if (count != CkNumPes()) {
+    CkFreeMsg(m);
+    return;
+  }
+  count = 0;
 #ifndef CMK_OPTIMIZE
   if(pose_config.stats)
     localStats->TimerStart(GVT_TIMER);
index 3b065b852d07d22aa60db2cc6ceba7658ef27de4..4463fbdd6c083f467374595486a081aded6a72e6 100644 (file)
@@ -427,8 +427,8 @@ void sim::SeqResumeAfterCheckpoint() {
 
 void sim::ResumeFromSync()
 {
-  PVT *localPVT = (PVT *)CkLocalBranch(ThePVT);
-  localPVT->doneLB();
+//  PVT *localPVT = (PVT *)CkLocalBranch(ThePVT);
+//  localPVT->doneLB();
 }
 
 /// Dump all data fields