Fixing bugs and simplifying code for recording multiple phases of performance measure...
authorIsaac Dooley <isaacdooley@hope.cs.uiuc.edu>
Thu, 14 Jan 2010 17:40:07 +0000 (11:40 -0600)
committerIsaac Dooley <isaacdooley@hope.cs.uiuc.edu>
Thu, 14 Jan 2010 17:40:07 +0000 (11:40 -0600)
src/ck-cp/controlPoints.C
src/ck-cp/controlPoints.h
src/ck-perf/trace-controlPoints.h

index b061ced930d33918d298de6ee45fd97a33b95d85..dc58f716249ecd63b20103f9c9273e53f66cd62e 100644 (file)
@@ -179,6 +179,10 @@ controlPointManager::controlPointManager(){
     exitWhenReady = false;
     alreadyRequestedMemoryUsage = false;   
     alreadyRequestedIdleTime = false;
+    alreadyRequestedAll = false;
+    
+    instrumentedPhase newPhase;
+    allData.phases.push_back(newPhase);   
     
     dataFilename = (char*)malloc(128);
     sprintf(dataFilename, "controlPointData.txt");
@@ -295,8 +299,12 @@ controlPointManager::controlPointManager(){
   void controlPointManager::writeDataFile(){
     CkPrintf("============= writeDataFile() ============\n");
     ofstream outfile(dataFilename);
-    allData.phases.push_back(thisPhaseData);
+    //    allData.phases.push_back(thisPhaseData);
     allData.cleanupNames();
+
+    allData.verify();
+    allData.filterOutIncompletePhases();
+
     outfile << allData.toString();
     outfile.close();
   }
@@ -524,6 +532,15 @@ controlPointManager::controlPointManager(){
     return false;   
   }
   
+
+  /// The data from the current phase
+  instrumentedPhase * controlPointManager::currentPhaseData(){
+    int s = allData.phases.size();
+    CkAssert(s>=1);
+    return &(allData.phases[s-1]);
+  }
+
   /// The data from the previous phase
   instrumentedPhase * controlPointManager::previousPhaseData(){
     int s = allData.phases.size();
@@ -605,31 +622,16 @@ controlPointManager::controlPointManager(){
     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
     
 #endif    
-    
-    if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
-      alreadyRequestedMemoryUsage = true;
-      CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
-      thisProxy.requestMemoryUsage(*cb);
-      delete cb;
-    }
 
-    if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
-      alreadyRequestedIdleTime = true;
-      CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
-      thisProxy.requestIdleTime(*cb);
-      delete cb;
-    }
 
     
     // increment phase id
     phase_id++;
     
-    
-    // save a copy of the timing information from this phase
-    allData.phases.push_back(thisPhaseData);
-    
-    // clear the timing information that will be used for the next phase
-    thisPhaseData.clear();
+
+    // Create new entry for the phase we are starting now
+    instrumentedPhase newPhase;
+    allData.phases.push_back(newPhase);
     
     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
 
@@ -637,7 +639,8 @@ controlPointManager::controlPointManager(){
 
   /// An application uses this to register an instrumented timing for this phase
   void controlPointManager::setTiming(double time){
-    thisPhaseData.times.push_back(time);
+    currentPhaseData()->times.push_back(time);
+
 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
        
     // First we should register this currently executing message as a path, because it is likely an important one to consider.
@@ -678,7 +681,6 @@ controlPointManager::controlPointManager(){
         
     instrumentedPhase* prevPhase = previousPhaseData();
     if(prevPhase != NULL){
-      CkPrintf("Storing idle time measurements\n");
       prevPhase->idleTime.min = r[0];
       prevPhase->idleTime.avg = r[1]/CkNumPes();
       prevPhase->idleTime.max = r[2];
@@ -698,12 +700,6 @@ controlPointManager::controlPointManager(){
 
 
 
-
-
-
-
-
-
   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
   void controlPointManager::requestAll(CkCallback cb){
     const double i = localControlPointTracingInstance()->idleRatio();
@@ -741,17 +737,27 @@ controlPointManager::controlPointManager(){
     double *over = data+3;
     double *mem = data+6;
 
+    std::string b = allData.toString();
+
     instrumentedPhase* prevPhase = previousPhaseData();
     if(prevPhase != NULL){
-      CkPrintf("Storing idle time measurements\n");
       prevPhase->idleTime.min = idle[0];
       prevPhase->idleTime.avg = idle[1]/CkNumPes();
       prevPhase->idleTime.max = idle[2];
       
       prevPhase->memoryUsageMB = mem[0];
+
       
+      CkPrintf("Stored idle time min=%lf mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, (double)prevPhase->memoryUsageMB, prevPhase);
+      prevPhase->print();
+      CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
+
+      std::string a = allData.toString();
+
+      CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
       
-      CkPrintf("Stored idle time min=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase);
+
+
     } else {
       CkPrintf("There is no previous phase to store measurements\n");
     }
@@ -764,18 +770,15 @@ controlPointManager::controlPointManager(){
 
 
 
-
-
-
   void controlPointManager::checkForShutdown(){
-    if( exitWhenReady && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
+    if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
       doExitNow();
     }
   }
 
 
   void controlPointManager::exitIfReady(){
-     if( !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
+     if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
        doExitNow();
      } else {
@@ -1305,14 +1308,14 @@ void controlPointManager::generatePlan() {
 
 /// Get control point value from range of integers [lb,ub]
 int controlPoint(const char *name, int lb, int ub){
-  instrumentedPhase &thisPhaseData = controlPointManagerProxy.ckLocalBranch()->thisPhaseData;
+  instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
   int result;
 
   // if we already have control point values for phase, return them
-  if( thisPhaseData.controlPoints.count(std::string(name))>0 ){
-    return thisPhaseData.controlPoints[std::string(name)];
+  if( thisPhaseData->controlPoints.count(std::string(name))>0 ){
+    return thisPhaseData->controlPoints[std::string(name)];
   }
 
 
@@ -1334,7 +1337,7 @@ int controlPoint(const char *name, int lb, int ub){
   }
 
   CkAssert(isInRange(result,ub,lb));
-  thisPhaseData.controlPoints.insert(std::make_pair(std::string(name),result)); 
+  thisPhaseData->controlPoints.insert(std::make_pair(std::string(name),result)); 
   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
 
   return result;
index 421d133ca02a803f85cba8dae064b210a5cd979d..d39d5072fa581ea75ef3b7f33209bfca099e93bb 100644 (file)
@@ -145,17 +145,18 @@ public:
   std::vector<PathHistoryTableEntry> criticalPaths;
 #endif
   
-  int memoryUsageMB;
+  double memoryUsageMB;
 
   idleTimeContainer idleTime;
 
   instrumentedPhase(){
-    memoryUsageMB = -1;
+    memoryUsageMB = -1.0;
   }
   
   void clear(){
     controlPoints.clear();
     times.clear();
+    memoryUsageMB = -1.0;
     //    criticalPaths.clear();
   }
 
@@ -364,10 +365,6 @@ public:
   std::string toString(){
     std::ostringstream s;
 
-    verify();
-
-    filterOutIncompletePhases();
-
     // HEADER:
     s << "# HEADER:\n";
     s << "# Data for use with Isaac Dooley's Control Point Framework\n";
@@ -394,7 +391,7 @@ public:
       s << "# number of control point sets: " << phases.size() << "\n";
       std::vector<instrumentedPhase>::iterator runiter;
       for(runiter=phases.begin();runiter!=phases.end();runiter++){
-
+       
        // Print the memory usage
         s << runiter->memoryUsageMB << "    "; 
 
@@ -518,7 +515,6 @@ public:
   char * dataFilename;
   
   instrumentedData allData;
-  instrumentedPhase thisPhaseData;
   
   /// The lower and upper bounds for each named control point
   std::map<std::string, std::pair<int,int> > controlPointSpace;
@@ -578,6 +574,9 @@ public:
   /// Generate a plan (new control point values) once per phase
   void generatePlan();
 
+  /// The data for the current phase
+  instrumentedPhase *currentPhaseData();
+
   /// The data from the previous phase
   instrumentedPhase *previousPhaseData();
 
index c0e36d3e1c5b30daa62d71c40a53715e093e61c8..7614a2a807cc36e3c188bf60efba38a53a284330 100644 (file)
@@ -115,7 +115,7 @@ class TraceControlPoints : public Trace {
 
   /** Highest memory usage (in MB) value we've seen since last time */
   double memoryUsageMB(){
-    return memUsage / 1024.0 / 1024.0;
+    return ((double)memUsage) / 1024.0 / 1024.0;
   }