there is a bug in end of run analysis. The processor who starts CkExit(), and use...
authorGengbin Zheng <gzheng@illinois.edu>
Sun, 14 Mar 2010 00:09:47 +0000 (18:09 -0600)
committerGengbin Zheng <gzheng@illinois.edu>
Sun, 14 Mar 2010 00:09:47 +0000 (18:09 -0600)
src/ck-perf/trace-projections.C
src/ck-perf/trace-projections.ci
src/ck-perf/trace-projectionsBOC.h

index 9a77ceeef72c77ba6af841b00085a7eff8dde9e3..6c1b79cafc99f58f6612b950bba06a2e18ca4536 100644 (file)
@@ -1090,7 +1090,7 @@ void TraceProjections::traceClose(void)
   converseExit = 1;
   if (CkMyPe() == 0) {
     CProxy_TraceProjectionsBOC bocProxy(traceProjectionsGID);
-    bocProxy.traceProjectionsParallelShutdown();
+    bocProxy.traceProjectionsParallelShutdown(-1);
   }
 #else
   // we've already deleted the logpool, so multiple calls to traceClose
@@ -1667,8 +1667,9 @@ void registerOutlierReduction() {
 extern "C" void TraceProjectionsExitHandler()
 {
 #ifndef CMK_OPTIMIZE
+  // CkPrintf("[%d] TraceProjectionsExitHandler called!\n", CkMyPe());
   CProxy_TraceProjectionsBOC bocProxy(traceProjectionsGID);
-  bocProxy.traceProjectionsParallelShutdown();
+  bocProxy.traceProjectionsParallelShutdown(CkMyPe());
 #else
   CkExit();
 #endif
@@ -1740,7 +1741,9 @@ TraceProjectionsInit::TraceProjectionsInit(CkArgMsg *msg) {
 }
 
 // Called on every processor.
-void TraceProjectionsBOC::traceProjectionsParallelShutdown() {
+void TraceProjectionsBOC::traceProjectionsParallelShutdown(int pe) {
+  //CmiPrintf("[%d] traceProjectionsParallelShutdown called from . \n", CkMyPe(), pe);
+  endPe = pe;                // the pe that starts CkExit()
   if (CkMyPe() == 0) {
     analysisStartTime = CmiWallTimer();
   }
@@ -2584,7 +2587,7 @@ void KMeansBOC::phaseDone() {
 
 void TraceProjectionsBOC::startEndTimeAnalysis()
 {
- //if(CkMyPe()==0)    CkPrintf("[%d] TraceProjectionsBOC::startEndTimeAnalysis time=\t%g\n", CkMyPe(), CkWallTimer() );
+ //CkPrintf("[%d] TraceProjectionsBOC::startEndTimeAnalysis time=\t%g\n", CkMyPe(), CkWallTimer() );
 
   endTime = CkpvAccess(_trace)->endTime;
   // CkPrintf("[%d] End time is %lf us\n", CkMyPe(), endTime*1e06);
@@ -2596,7 +2599,7 @@ void TraceProjectionsBOC::startEndTimeAnalysis()
 
 void TraceProjectionsBOC::endTimeDone(CkReductionMsg *msg)
 {
- //if(CkMyPe()==0)    CkPrintf("[%d] TraceProjectionsBOC::endTimeDone time=\t%g\n", CkMyPe(), CkWallTimer() );
+ //if(CkMyPe()==0)    CkPrintf("[%d] TraceProjectionsBOC::endTimeDone time=\t%g parModulesRemaining:%d\n", CkMyPe(), CkWallTimer(), parModulesRemaining);
 
   CkAssert(CkMyPe() == 0);
   parModulesRemaining--;
@@ -2652,17 +2655,19 @@ void TraceProjectionsBOC::finalize()
 void TraceProjectionsBOC::closingTraces() {
   CkpvAccess(_trace)->closeTrace();
 
-  int dummy = 0;
+    // subtle:  reduction needs to go to the PE which started CkExit()
+  int pe = 0;
+  if (endPe != -1) pe = endPe;
   CkCallback cb(CkIndex_TraceProjectionsBOC::closeParallelShutdown(NULL), 
-               0, thisProxy);
-  contribute(sizeof(int), &dummy, CkReduction::sum_int, cb);  
+               pe, thisProxy); 
+  contribute(0, NULL, CkReduction::sum_int, cb);  
 }
 
 // The sole purpose of this reduction is to decide whether or not
 //   Projections as a module needs to call CkExit() to get other
 //   modules to shutdown.
 void TraceProjectionsBOC::closeParallelShutdown(CkReductionMsg *msg) {
-  CkAssert(CkMyPe() == 0);
+  CkAssert(endPe == -1 && CkMyPe() ==0 || CkMyPe() == endPe);
   delete msg;
   // decide if CkExit() needs to be called
   if (!CkpvAccess(_trace)->converseExit) {
index 721486ab15be0822baeb9ad47b5a7d7fb27b42a6..3f7bc1ef09e746cc4a7f209fe6669d9d1491fc49 100644 (file)
@@ -53,7 +53,7 @@ module TraceProjections {
   group [migratable] TraceProjectionsBOC {
     entry TraceProjectionsBOC(bool);
 
-    entry void traceProjectionsParallelShutdown(void);
+    entry void traceProjectionsParallelShutdown(int);
     entry void startEndTimeAnalysis(void);
     entry void endTimeDone(CkReductionMsg *);
     entry void kMeansDone(void);
index ed72fbcfcecd4ac55a85fc6f27c2e498479aa870..c91d2e0a4b1552d0f2d1b1e6fb972dc5dab7bb0e 100644 (file)
@@ -135,11 +135,12 @@ class TraceProjectionsBOC : public CBase_TraceProjectionsBOC {
   double dummy;
   double endTime;
   double analysisStartTime;
+  int endPe;                          // end PE which calls CkExit()
  public:
- TraceProjectionsBOC(bool _findOutliers) : findOutliers(_findOutliers), parModulesRemaining(0) {};
- TraceProjectionsBOC(CkMigrateMessage *m):CBase_TraceProjectionsBOC(m), parModulesRemaining(0) {};
+ TraceProjectionsBOC(bool _findOutliers) : findOutliers(_findOutliers), parModulesRemaining(0), endPe(-1) {};
+ TraceProjectionsBOC(CkMigrateMessage *m):CBase_TraceProjectionsBOC(m), parModulesRemaining(0), endPe(-1) {};
 
-  void traceProjectionsParallelShutdown();
+  void traceProjectionsParallelShutdown(int);
   void startEndTimeAnalysis();
   void endTimeDone(CkReductionMsg *);
   void kMeansDone(CkReductionMsg *);