Merge branch 'charm' of charmgit:charm into charm
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15
16 #include <climits>
17 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
18  
19
20 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
21 /**  @{ */
22
23 using namespace std;
24
25 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
26
27
28 //#undef DEBUGPRINT
29 //#define DEBUGPRINT 4
30
31 static void periodicProcessControlPoints(void* ptr, double currWallTime);
32
33
34 // A pointer to this PE's controlpoint manager Proxy
35 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
36 /* readonly */ int random_seed;
37 /* readonly */ long controlPointSamplePeriod;
38 /* readonly */ int whichTuningScheme;
39 /* readonly */ bool writeDataFileAtShutdown;
40 /* readonly */ bool shouldFilterOutputData;
41 /* readonly */ bool loadDataFileAtStartup;
42 /* readonly */ bool shouldGatherMemoryUsage;
43 /* readonly */ bool shouldGatherUtilization;
44 /* readonly */ bool shouldGatherAll;
45 /* readonly */ char CPDataFilename[512];
46
47
48
49 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
50 /// These probably come from the command line arguments, so are available only on PE 0
51 std::map<std::string, int> defaultControlPointValues;
52
53
54
55 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod}  tuningScheme;
56
57
58
59 void printTuningScheme(){
60   switch(whichTuningScheme){
61   case RandomSelection:
62     CkPrintf("Tuning Scheme: RandomSelection\n");
63     break;
64   case AlwaysDefaults:
65     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
66     break;
67   case SimulatedAnnealing:
68     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
69     break;
70   case ExhaustiveSearch:
71     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
72     break;
73   case CriticalPathAutoPrioritization:
74     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
75     break;
76   case UseBestKnownTiming:
77     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
78     break;
79   case UseSteering:
80     CkPrintf("Tuning Scheme: UseSteering\n");
81     break;
82   case MemoryAware:
83     CkPrintf("Tuning Scheme: MemoryAware\n");
84     break;
85   case Simplex:
86     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
87     break;
88   case DivideAndConquer:
89     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
90     break;
91   case LDBPeriod:
92     CkPrintf("Tuning Scheme: Load Balancing Period Steering\n");
93     break;
94   default:
95     CkPrintf("Unknown tuning scheme\n");
96     break;
97   }
98   fflush(stdout);
99 }
100
101
102
103 /// A reduction type that combines idle time measurements (min/sum/max etc.)
104 CkReduction::reducerType idleTimeReductionType;
105 /// A reducer that combines idle time measurements (min/sum/max etc.)
106 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
107   double ret[3];
108   if(nMsg > 0){
109     CkAssert(msgs[0]->getSize()==3*sizeof(double));
110     double *m=(double *)msgs[0]->getData();
111     ret[0]=m[0];
112     ret[1]=m[1];
113     ret[2]=m[2];
114   }
115   for (int i=1;i<nMsg;i++) {
116     CkAssert(msgs[i]->getSize()==3*sizeof(double));
117     double *m=(double *)msgs[i]->getData();
118     ret[0]=min(ret[0],m[0]);
119     ret[1]+=m[1];
120     ret[2]=max(ret[2],m[2]);
121   }  
122   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
123 }
124
125
126
127 /// A reduction type that combines idle, overhead, and memory measurements
128 CkReduction::reducerType allMeasuresReductionType;
129 /// A reducer that combines idle, overhead, and memory measurements
130 #define ALL_REDUCTION_SIZE 12
131 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
132   double ret[ALL_REDUCTION_SIZE];
133   if(nMsg > 0){
134     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
135     double *m=(double *)msgs[0]->getData();
136     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
137   }
138   for (int i=1;i<nMsg;i++) {
139     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
140     double *m=(double *)msgs[i]->getData();
141     // idle time (min/sum/max)
142     ret[0]=min(ret[0],m[0]);
143     ret[1]+=m[1];
144     ret[2]=max(ret[2],m[2]);
145     // overhead time (min/sum/max)
146     ret[3]=min(ret[3],m[3]);
147     ret[4]+=m[4];
148     ret[5]=max(ret[5],m[5]);
149     // mem usage (max)
150     ret[6]=max(ret[6],m[6]);
151     // bytes per invocation for two types of entry methods
152     ret[7]+=m[7];
153     ret[8]+=m[8];
154     ret[9]+=m[9];
155     ret[10]+=m[10];
156     // Grain size (avg)
157     ret[11]+=m[11];
158   }  
159   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
160 }
161
162
163 /// Registers the control point framework's reduction handlers at startup on each PE
164 /*initproc*/ void registerCPReductions(void) {
165   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
166   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
167 }
168
169
170
171
172
173
174 /// Return an integer between 0 and num-1 inclusive
175 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
176 unsigned int randInt(unsigned int num, const char* name, int seed=0){
177   CkAssert(num > 0);
178   CkAssert(num < 1000);
179
180   unsigned long hash = 0;
181   unsigned int c;
182   unsigned char * str = (unsigned char*)name;
183
184   while (c = *str++){
185     unsigned int c2 = (c+64)%128;
186     unsigned int c3 = (c2*5953)%127;
187     hash = c3 + (hash << 6) + (hash << 16) - hash;
188   }
189
190   unsigned long shuffled1 = (hash*2083)%7907;
191   unsigned long shuffled2 = (seed*4297)%2017;
192
193   unsigned long shuffled3 = (random_seed*4799)%7919;
194
195   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
196
197   unsigned int result = ((namehash * 6029) % 1117) % num;
198
199   CkAssert(result >=0 && result < num);
200   return result;
201 }
202
203
204
205 controlPointManager::controlPointManager() {
206   generatedPlanForStep = -1;
207
208     exitWhenReady = false;
209     alreadyRequestedMemoryUsage = false;   
210     alreadyRequestedIdleTime = false;
211     alreadyRequestedAll = false;
212     
213     instrumentedPhase * newPhase = new instrumentedPhase();
214     allData.phases.push_back(newPhase);   
215     
216     frameworkShouldAdvancePhase = false;
217     haveControlPointChangeCallback = false;
218 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
219     
220     ControlPoint::initControlPointEffects();
221
222     phase_id = 0;
223
224     if(loadDataFileAtStartup){    
225       loadDataFile();
226     }
227
228     
229     if(CkMyPe() == 0){
230       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
231     }
232
233     traceRegisterUserEvent("No currently executing message", 5000);
234     traceRegisterUserEvent("Zero time along critical path", 5010);
235     traceRegisterUserEvent("Positive total time along critical path", 5020);
236     traceRegisterUserEvent("env->setPathHistory()", 6000);
237     traceRegisterUserEvent("Critical Path", 5900);
238     traceRegisterUserEvent("Table Entry", 5901);
239
240
241 #if USER_EVENT_FOR_REGISTERTERMINALPATH
242     traceRegisterUserEvent("registerTerminalPath", 100); 
243 #endif
244
245   }
246   
247
248  controlPointManager::~controlPointManager(){
249 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
250   }
251
252
253   /// Loads the previous run data file
254   void controlPointManager::loadDataFile(){
255     ifstream infile(CPDataFilename);
256     vector<std::string> names;
257     std::string line;
258   
259     while(getline(infile,line)){
260       if(line[0] != '#')
261         break;
262     }
263   
264     int numTimings = 0;
265     std::istringstream n(line);
266     n >> numTimings;
267   
268     while(getline(infile,line)){ 
269       if(line[0] != '#') 
270         break; 
271     }
272
273     int numControlPointNames = 0;
274     std::istringstream n2(line);
275     n2 >> numControlPointNames;
276   
277     for(int i=0; i<numControlPointNames; i++){
278       getline(infile,line);
279       names.push_back(line);
280     }
281
282     for(int i=0;i<numTimings;i++){
283       getline(infile,line);
284       while(line[0] == '#')
285         getline(infile,line); 
286
287       instrumentedPhase * ips = new instrumentedPhase();
288
289       std::istringstream iss(line);
290
291       // Read memory usage for phase
292       iss >> ips->memoryUsageMB;
293       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
294
295       // Read idle time
296       iss >> ips->idleTime.min;
297       iss >> ips->idleTime.avg;
298       iss >> ips->idleTime.max;
299
300       // Read overhead time
301       iss >> ips->overheadTime.min;
302       iss >> ips->overheadTime.avg;
303       iss >> ips->overheadTime.max;
304
305       // read bytePerInvoke
306       iss >> ips->bytesPerInvoke;
307
308       // read grain size
309       iss >> ips->grainSize;
310
311       // Read control point values
312       for(int cp=0;cp<numControlPointNames;cp++){
313         int cpvalue;
314         iss >> cpvalue;
315         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
316       }
317
318       // ignore median time
319       double mt;
320       iss >> mt;
321
322       // Read all times
323
324       double time;
325
326       while(iss >> time){
327         ips->times.push_back(time);
328 #if DEBUGPRINT > 5
329         CkPrintf("read time %lf from file\n", time);
330 #endif
331       }
332
333       allData.phases.push_back(ips);
334
335     }
336
337     infile.close();
338   }
339
340
341
342   /// Add the current data to allData and output it to a file
343   void controlPointManager::writeDataFile(){
344     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
345     ofstream outfile(CPDataFilename);
346     allData.cleanupNames();
347
348 //    string s = allData.toString();
349 //    CkPrintf("At end: \n %s\n", s.c_str());
350
351     if(shouldFilterOutputData){
352       allData.verify();
353       allData.filterOutIncompletePhases();
354     }
355
356 //    string s2 = allData.toString();
357 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
358     if(allData.toString().length() > 10){
359       outfile << allData.toString();
360     } else {
361       outfile << " No data available to save to disk " << endl;
362     }
363     outfile.close();
364   }
365
366   /// User can register a callback that is called when application should advance to next phase
367   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
368     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
369     controlPointChangeCallback = cb;
370     haveControlPointChangeCallback = true;
371   }
372
373
374 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
375 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
376   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
377 }
378
379   /// Called periodically by the runtime to handle the control points
380   /// Currently called on each PE
381   void controlPointManager::processControlPoints(){
382
383     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
384
385
386     //==========================================================================================
387     // Print the data for each phase
388
389     const int s = allData.phases.size();
390
391 #if DEBUGPRINT
392     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
393     for(int p=0;p<s;++p){
394       const instrumentedPhase &phase = allData.phases[p];
395       const idleTimeContainer &idle = phase.idleTime;
396       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
397       vector<double> const &times = phase.times;
398
399       CkPrintf("Phase %d:\n", p);
400       idle.print();
401      //   CkPrintf("critical paths: (* affected by control point)\n");
402         //   for(int i=0;i<criticalPaths.size(); i++){
403         // If affected by a control point
404         //      criticalPaths[i].print();
405
406       //        criticalPaths[i].print();
407       //        CkPrintf("\n");
408         
409
410       //   }
411       CkPrintf("Timings:\n");
412       for(int i=0;i<times.size(); i++){
413         CkPrintf("%lf ", times[i]);
414       }
415       CkPrintf("\n");
416
417     }
418     
419     CkPrintf("\n\n");
420
421
422 #endif
423
424
425
426     //==========================================================================================
427     // If this is a phase during which we try to adapt control point values based on critical path
428 #if 0
429     if( s%5 == 4) {
430
431       // Find the most recent phase with valid critical path data and idle time measurements      
432       int whichPhase=-1;
433       for(int p=0; p<s; ++p){
434         const instrumentedPhase &phase = allData.phases[p];
435         const idleTimeContainer &idle = phase.idleTime;
436         if(idle.isValid() && phase.criticalPaths.size()>0 ){
437           whichPhase = p;
438         }
439       }
440       
441       
442       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
443       const instrumentedPhase &phase = allData.phases[whichPhase];
444       const idleTimeContainer &idle = phase.idleTime;
445       
446       if(idle.min > 0.1){
447         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
448         
449         // Determine the median critical path for this phase
450         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
451         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
452         CkAssert(phase.criticalPaths.size() > 0);
453         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
454         
455         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
456         
457         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
458           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
459
460
461           CkPrintf("Finding control points related to the critical path\n");
462           int cpcount = 0;
463           std::set<std::string> controlPointsAffectingCriticalPath;
464
465           
466           for(int e=0;e<path.getNumUsed();e++){
467             if(path.getUsedCount(e)>0){
468               int ep = path.getUsedEp(e);
469
470               std::map<std::string, std::set<int> >::iterator iter;
471               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
472                 if(iter->second.count(ep)>0){
473                   controlPointsAffectingCriticalPath.insert(iter->first);
474                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
475                   cpcount++;
476                 }
477               }
478               
479             }
480           }
481           
482
483           if(cpcount==0){
484             CkPrintf("No control points are known to affect the critical path\n");
485           } else {
486             double beginTime = CmiWallTimer();
487
488             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
489             
490             newControlPoints = phase.controlPoints;
491             
492             if(frameworkShouldAdvancePhase){
493               gotoNextPhase();  
494             }
495             
496             if(haveControlPointChangeCallback){ 
497 #if DEBUGPRINT
498               CkPrintf("Calling control point change callback\n");
499 #endif
500               // Create a high priority message and send it to the callback
501               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
502               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
503               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
504               controlPointChangeCallback.send(msg);
505               
506             }
507             
508             
509             // adjust the control points that can affect the critical path
510
511             char textDescription[4096*2];
512             textDescription[0] = '\0';
513
514             std::map<std::string,int>::iterator newCP;
515             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
516               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
517                 // decrease the value (increase priority) if within range
518                 int lowerbound = controlPointSpace[newCP->first].first;
519                 if(newCP->second > lowerbound){
520                   newControlPointsAvailable = true;
521                   newCP->second --;
522                 }
523               }
524             }
525            
526             // Create a string for a projections user event
527             if(1){
528               std::map<std::string,int>::iterator newCP;
529               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
530                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
531               }
532             }
533
534             char *userEventString = new char[4096*2];
535             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
536             
537             static int userEventCounter = 20000;
538             CkPrintf("USER EVENT: %s\n", userEventString);
539             
540             traceRegisterUserEvent(userEventString, userEventCounter); 
541             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
542             userEventCounter++;
543             
544           }
545           
546         } else {
547           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
548         }
549         
550         
551       }
552       
553     } else {
554       
555       
556     }
557    
558 #endif
559
560
561
562     if(frameworkShouldAdvancePhase){
563       gotoNextPhase();  
564     }
565     
566     if(haveControlPointChangeCallback){ 
567       // Create a high priority message and send it to the callback
568       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
569       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
570       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
571       controlPointChangeCallback.send(msg);
572     }
573     
574   }
575   
576   /// Determine if any control point is known to affect an entry method
577   bool controlPointManager::controlPointAffectsThisEP(int ep){
578     std::map<std::string, std::set<int> >::iterator iter;
579     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
580       if(iter->second.count(ep)>0){
581         return true;
582       }
583     }
584     return false;    
585   }
586   
587   /// Determine if any control point is known to affect a chare array  
588   bool controlPointManager::controlPointAffectsThisArray(int array){
589     std::map<std::string, std::set<int> >::iterator iter;
590     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
591       if(iter->second.count(array)>0){
592         return true;
593       }
594     }
595     return false;   
596   }
597   
598
599   /// The data from the current phase
600   instrumentedPhase * controlPointManager::currentPhaseData(){
601     int s = allData.phases.size();
602     CkAssert(s>=1);
603     return allData.phases[s-1];
604   }
605  
606
607   /// The data from the previous phase
608   instrumentedPhase * controlPointManager::previousPhaseData(){
609     int s = allData.phases.size();
610     if(s >= 2 && phase_id > 0) {
611       return allData.phases[s-2];
612     } else {
613       return NULL;
614     }
615   }
616  
617   /// The data from two phases back
618   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
619     int s = allData.phases.size();
620     if(s >= 3 && phase_id > 0) {
621       return allData.phases[s-3];
622     } else {
623       return NULL;
624     }
625   }
626   
627
628   /// Called by either the application or the Control Point Framework to advance to the next phase  
629   void controlPointManager::gotoNextPhase(){
630     
631 #ifndef CP_DISABLE_TRACING
632     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
633     fflush(stdout);
634
635     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
636       alreadyRequestedAll = true;
637       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
638       CkPrintf("Requesting all measurements\n");
639       thisProxy.requestAll(*cb);
640       delete cb;
641     
642     } else {
643       
644       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
645         alreadyRequestedMemoryUsage = true;
646         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
647         thisProxy.requestMemoryUsage(*cb);
648         delete cb;
649       }
650       
651       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
652         alreadyRequestedIdleTime = true;
653         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
654         thisProxy.requestIdleTime(*cb);
655         delete cb;
656       }
657     }
658     
659
660 #endif
661
662
663
664 #if CMK_LBDB_ON && 0
665     LBDatabase * myLBdatabase = LBDatabaseObj();
666     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
667     const CkVec<LBObj*> objs = myLBDB->getObjs();
668     const int objCount = myLBDB->getObjCount();
669     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
670     
671     floatType maxObjWallTime = -1.0;
672     
673     for(int i=0;i<objs.length();i++){
674       LBObj* o = objs[i];
675       const LDObjData d = o->ObjData();
676       floatType cpuTime = d.cpuTime;
677       floatType wallTime = d.wallTime;
678       // can also get object handles from the LDObjData struct
679       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
680       if(wallTime > maxObjWallTime){
681
682       }
683       
684     }
685
686     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
687     
688 #endif    
689
690
691     
692     // increment phase id
693     phase_id++;
694     
695
696     // Create new entry for the phase we are starting now
697     instrumentedPhase * newPhase = new instrumentedPhase();
698     allData.phases.push_back(newPhase);
699     
700     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
701
702   }
703
704   /// An application uses this to register an instrumented timing for this phase
705   void controlPointManager::setTiming(double time){
706     currentPhaseData()->times.push_back(time);
707
708 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
709        
710     // First we should register this currently executing message as a path, because it is likely an important one to consider.
711     //    registerTerminalEntryMethod();
712     
713     // save the critical path for this phase
714     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
715     //    maxTerminalPathHistory.reset();
716     
717     
718     // Reset the counts for the currently executing message
719     //resetThisEntryPath();
720         
721 #endif
722     
723   }
724   
725   /// Entry method called on all PEs to request CPU utilization statistics
726   void controlPointManager::requestIdleTime(CkCallback cb){
727 #ifndef CP_DISABLE_TRACING
728     double i = localControlPointTracingInstance()->idleRatio();
729     double idle[3];
730     idle[0] = i;
731     idle[1] = i;
732     idle[2] = i;
733     
734     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
735     
736     localControlPointTracingInstance()->resetTimings();
737
738     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
739 #else
740     CkAbort("Should not get here\n");
741 #endif
742   }
743   
744   /// All processors reduce their memory usages in requestIdleTime() to this method
745   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
746 #ifndef CP_DISABLE_TRACING
747     int size=msg->getSize() / sizeof(double);
748     CkAssert(size==3);
749     double *r=(double *) msg->getData();
750         
751     instrumentedPhase* prevPhase = previousPhaseData();
752     if(prevPhase != NULL){
753       prevPhase->idleTime.min = r[0];
754       prevPhase->idleTime.avg = r[1]/CkNumPes();
755       prevPhase->idleTime.max = r[2];
756       prevPhase->idleTime.print();
757       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
758     } else {
759       CkPrintf("There is no previous phase to store the idle time measurements\n");
760     }
761     
762     alreadyRequestedIdleTime = false;
763     checkForShutdown();
764     delete msg;
765 #else
766     CkAbort("Should not get here\n");
767 #endif
768   }
769
770
771
772
773
774
775   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
776   void controlPointManager::requestAll(CkCallback cb){
777 #ifndef CP_DISABLE_TRACING
778     TraceControlPoints *t = localControlPointTracingInstance();
779
780     double data[ALL_REDUCTION_SIZE];
781
782     double *idle = data;
783     double *over = data+3;
784     double *mem = data+6;
785     double *msgBytes = data+7;  
786     double *grainsize = data+11;  
787
788     const double i = t->idleRatio();
789     idle[0] = i;
790     idle[1] = i;
791     idle[2] = i;
792
793     const double o = t->overheadRatio();
794     over[0] = o;
795     over[1] = o;
796     over[2] = o;
797
798     const double m = t->memoryUsageMB();
799     mem[0] = m;
800
801     msgBytes[0] = t->b2;
802     msgBytes[1] = t->b3;
803     msgBytes[2] = t->b2mlen;
804     msgBytes[3] = t->b3mlen;
805
806     grainsize[0] = t->grainSize();
807     
808     localControlPointTracingInstance()->resetAll();
809
810     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
811 #else
812     CkAbort("Should not get here\n");
813 #endif
814   }
815   
816   /// All processors reduce their memory usages in requestIdleTime() to this method
817   void controlPointManager::gatherAll(CkReductionMsg *msg){
818 #ifndef CP_DISABLE_TRACING
819     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
820     int size=msg->getSize() / sizeof(double);
821     double *data=(double *) msg->getData();
822         
823     double *idle = data;
824     double *over = data+3;
825     double *mem = data+6;
826     double *msgBytes = data+7;
827     double *granularity = data+11;
828
829
830     //    std::string b = allData.toString();
831
832     instrumentedPhase* prevPhase = previousPhaseData();
833     if(prevPhase != NULL){
834       prevPhase->idleTime.min = idle[0];
835       prevPhase->idleTime.avg = idle[1]/CkNumPes();
836       prevPhase->idleTime.max = idle[2];
837       
838       prevPhase->overheadTime.min = over[0];
839       prevPhase->overheadTime.avg = over[1]/CkNumPes();
840       prevPhase->overheadTime.max = over[2];
841       
842       prevPhase->memoryUsageMB = mem[0];
843       
844       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
845
846       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
847       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
848
849       /** The average of the grain sizes on all PEs in us */
850       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
851
852       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
853       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
854
855       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
856       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
857
858       if(bytesPerInvoke2 > bytesPerInvoke3)
859         prevPhase->bytesPerInvoke = bytesPerInvoke2;
860       else
861         prevPhase->bytesPerInvoke = bytesPerInvoke3;
862
863       //      prevPhase->print();
864       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
865
866       //      std::string a = allData.toString();
867
868       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
869       
870     } else {
871       CkPrintf("There is no previous phase to store measurements\n");
872     }
873     
874     alreadyRequestedAll = false;
875     checkForShutdown();
876     delete msg;
877 #else
878     CkAbort("Should not get here\n");
879 #endif
880   }
881
882
883
884
885   void controlPointManager::checkForShutdown(){
886     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
887       doExitNow();
888     }
889   }
890
891
892   void controlPointManager::exitIfReady(){
893      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
894        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
895        doExitNow();
896      } else {
897        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
898        exitWhenReady = true;
899      }
900   }
901
902
903
904   void controlPointManager::doExitNow(){
905           writeOutputToDisk();
906           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
907           CkExit();
908   }
909
910   void controlPointManager::writeOutputToDisk(){
911           if(writeDataFileAtShutdown){
912                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
913           }
914   }
915
916
917   /// Entry method called on all PEs to request memory usage
918   void controlPointManager::requestMemoryUsage(CkCallback cb){
919     int m = CmiMaxMemoryUsage() / 1024 / 1024;
920     CmiResetMaxMemory();
921     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
922     contribute(sizeof(int),&m,CkReduction::max_int, cb);
923   }
924
925   /// All processors reduce their memory usages to this method
926   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
927     int size=msg->getSize() / sizeof(int);
928     CkAssert(size==1);
929     int *m=(int *) msg->getData();
930
931     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
932     
933     instrumentedPhase* prevPhase = previousPhaseData();
934     if(prevPhase != NULL){
935       prevPhase->memoryUsageMB = m[0];
936     } else {
937       CkPrintf("No place to store memory usage");
938     }
939
940     alreadyRequestedMemoryUsage = false;
941     checkForShutdown();
942     delete msg;
943   }
944
945
946 //   /// Inform the control point framework that a named control point affects the priorities of some array  
947 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
948 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
949     
950 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
951 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
952 //     } else {
953 //       std::set<int> s;
954 //       s.insert(groupIdx);
955 //       affectsPrioritiesArray[std::string(name)] = s;
956 //     }
957     
958 // #if DEBUGPRINT   
959 //     std::map<std::string, std::set<int> >::iterator f;
960 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
961 //       std::string name = f->first;
962 //       std::set<int> &vals = f->second;
963 //       cout << "Control point " << name << " affects arrays: ";
964 //       std::set<int>::iterator i;
965 //       for(i=vals.begin(); i!=vals.end();++i){
966 //      cout << *i << " ";
967 //       }
968 //       cout << endl;
969 //     }
970 // #endif
971     
972 //   }
973   
974 //   /// Inform the control point framework that a named control point affects the priority of some entry method
975 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
976 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
977
978 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
979 //       affectsPrioritiesEP[std::string(name)].insert(idx);
980 //     } else {
981 //       std::set<int> s;
982 //       s.insert(idx);
983 //       affectsPrioritiesEP[std::string(name)] = s;
984 //     }
985     
986 // #if DEBUGPRINT
987 //     std::map<std::string, std::set<int> >::iterator f;
988 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
989 //       std::string name = f->first;
990 //       std::set<int> &vals = f->second;
991 //       cout << "Control point " << name << " affects EP: ";
992 //       std::set<int>::iterator i;
993 //       for(i=vals.begin(); i!=vals.end();++i){
994 //      cout << *i << " ";
995 //       }
996 //       cout << endl;
997 //     }
998 // #endif
999
1000
1001 //   }
1002   
1003
1004
1005 /// An interface callable by the application.
1006 void gotoNextPhase(){
1007   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1008 }
1009
1010 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1011 {
1012   gotoNextPhase();
1013 }
1014
1015
1016 /// A mainchare that is used just to create our controlPointManager group at startup
1017 class controlPointMain : public CBase_controlPointMain {
1018 public:
1019   controlPointMain(CkArgMsg* args){
1020 #if OLDRANDSEED
1021     struct timeval tp;
1022     gettimeofday(& tp, NULL);
1023     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1024 #else
1025     double time = CmiWallTimer();
1026     int sec = (int)time;
1027     int usec = (int)((time - (double)sec)*1000000.0);
1028     random_seed =  sec ^ usec;
1029 #endif
1030     
1031     
1032     double period;
1033     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1034     if(haveSamplePeriod){
1035       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1036       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1037     } else {
1038       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1039     }
1040     
1041     
1042     
1043     whichTuningScheme = RandomSelection;
1044
1045
1046     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1047       whichTuningScheme = RandomSelection;
1048     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1049       whichTuningScheme = ExhaustiveSearch;
1050     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1051       whichTuningScheme = AlwaysDefaults;
1052     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1053       whichTuningScheme = SimulatedAnnealing;
1054     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1055       whichTuningScheme = CriticalPathAutoPrioritization;
1056     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1057       whichTuningScheme = UseBestKnownTiming;
1058     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1059       whichTuningScheme = UseSteering;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1061       whichTuningScheme = MemoryAware;
1062     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1063       whichTuningScheme = Simplex;
1064     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1065       whichTuningScheme = DivideAndConquer;
1066     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period") ){
1067       whichTuningScheme = LDBPeriod;
1068     }
1069
1070     char *defValStr = NULL;
1071     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1072       CkPrintf("You specified default value string: %s\n", defValStr);
1073       
1074       // Parse the string, looking for commas
1075      
1076
1077       char *tok = strtok(defValStr, ",");
1078       while (tok) {
1079         // split on the equal sign
1080         int len = strlen(tok);
1081         char *eqsign = strchr(tok, '=');
1082         if(eqsign != NULL && eqsign != tok){
1083           *eqsign = '\0';
1084           char *cpname = tok;
1085           std::string cpName(tok);
1086           char *cpDefVal = eqsign+1;      
1087           int v=-1;
1088           if(sscanf(cpDefVal, "%d", &v) == 1){
1089             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1090             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1091             defaultControlPointValues[cpName] = v;
1092           }
1093         }
1094         tok = strtok(NULL, ",");
1095       }
1096
1097     }
1098
1099     shouldGatherAll = false;
1100     shouldGatherMemoryUsage = false;
1101     shouldGatherUtilization = false;
1102     
1103     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1104       shouldGatherAll = true;
1105     } else {
1106       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1107         shouldGatherMemoryUsage = true;
1108       }
1109       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1110         shouldGatherUtilization = true;
1111       }
1112     }
1113     
1114     writeDataFileAtShutdown = false;   
1115     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1116       writeDataFileAtShutdown = true;
1117     }
1118
1119     shouldFilterOutputData = true;
1120     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1121       shouldFilterOutputData = false;
1122     }
1123
1124
1125    loadDataFileAtStartup = false;   
1126     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1127       loadDataFileAtStartup = true;
1128     }
1129
1130     char *cpdatafile;
1131     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1132       sprintf(CPDataFilename, "%s", cpdatafile);
1133     } else {
1134       sprintf(CPDataFilename, "controlPointData.txt");
1135     }
1136
1137
1138     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1139   }
1140   ~controlPointMain(){}
1141 };
1142
1143 /// An interface callable by the application.
1144 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1145   CkAssert(CkMyPe() == 0);
1146   CkPrintf("Application has registered a control point change callback\n");
1147   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1148 }
1149
1150 /// An interface callable by the application.
1151 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1152   if(CkMyPe() == 0){
1153     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1154     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1155   }
1156 }
1157
1158 /// An interface callable by the application.
1159 void registerControlPointTiming(double time){
1160   CkAssert(CkMyPe() == 0);
1161 #if DEBUGPRINT>0
1162   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1163 #endif
1164   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1165 }
1166
1167 /// An interface callable by the application.
1168 void controlPointTimingStamp() {
1169   CkAssert(CkMyPe() == 0);
1170 #if DEBUGPRINT>0
1171   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1172 #endif
1173   
1174   static double prev_time = 0.0;
1175   double t = CmiWallTimer();
1176   double duration = t - prev_time;
1177   prev_time = t;
1178     
1179   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1180 }
1181
1182 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1183 {
1184   controlPointTimingStamp();
1185 }
1186
1187
1188
1189
1190
1191 /// Shutdown the control point framework, writing data to disk if necessary
1192 extern "C" void controlPointShutdown(){
1193   if(CkMyPe() == 0){
1194
1195     // wait for gathering of idle time & memory usage to complete
1196     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1197
1198   }
1199 }
1200
1201 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1202 void controlPointInitNode(){
1203 //  CkPrintf("controlPointInitNode()\n");
1204   registerExitFn(controlPointShutdown);
1205 }
1206
1207 /// Called periodically to allow control point framework to do things periodically
1208 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1209 #ifdef DEBUGPRINT
1210   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1211 #endif
1212   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1213   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1214 }
1215
1216
1217
1218
1219
1220 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1221 /// user observed characteristic to adapt specific control point values.
1222 /// This function must return valid values for newControlPoints.
1223 void controlPointManager::generatePlan() {
1224   const double startGenerateTime = CmiWallTimer();
1225   const int phase_id = this->phase_id;
1226   const int effective_phase = allData.phases.size();
1227
1228   // Only generate a plan once per phase
1229   if(generatedPlanForStep == phase_id) 
1230     return;
1231   generatedPlanForStep = phase_id;
1232  
1233   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1234   printTuningScheme();
1235
1236   // By default lets put the previous phase data into newControlPoints
1237   instrumentedPhase *prevPhase = previousPhaseData();
1238   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1239           newControlPoints[cpsIter->first] = cpsIter->second;
1240   }
1241
1242
1243   if( whichTuningScheme == RandomSelection ){
1244     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1245     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1246       const std::string &name = cpsIter->first;
1247       const std::pair<int,int> &bounds = cpsIter->second;
1248       const int lb = bounds.first;
1249       const int ub = bounds.second;
1250       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1251     }
1252   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1253     // -----------------------------------------------------------
1254     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1255     //   
1256     // This scheme will return the median value for the range 
1257     // early on, and then will transition over to the new control points
1258     // determined by the critical path adapting code
1259
1260     // This won't work until the periodic function is fixed up a bit
1261
1262     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1263     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1264       const std::string &name = cpsIter->first;
1265       const std::pair<int,int> &bounds = cpsIter->second;
1266       const int lb = bounds.first;
1267       const int ub = bounds.second;
1268       newControlPoints[name] =  (lb+ub)/2;
1269     }
1270
1271   } else if ( whichTuningScheme == MemoryAware ) {
1272
1273     // -----------------------------------------------------------
1274     //  STEERING BASED ON MEMORY USAGE
1275
1276     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1277     instrumentedPhase *prevPhase = previousPhaseData();
1278  
1279     if(phase_id%2 == 0){
1280       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1281       twoAgoPhase->print();
1282       CkPrintf("\n");
1283       fflush(stdout);
1284       
1285       // See if memory usage is low:
1286       double memUsage = twoAgoPhase->memoryUsageMB;
1287       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1288       fflush(stdout);
1289       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1290         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1291         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1292         
1293         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1294         newControlPoints = twoAgoPhase->controlPoints;
1295
1296
1297         CkPrintf("Steering (memory based) initialized plan\n");
1298         fflush(stdout);
1299
1300         // look for a possible control point knob to turn
1301         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1302         
1303         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1304         bool found = false;
1305         std::string cpName;
1306         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1307         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1308         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1309           cpName = iter->first;
1310           info = &iter->second;
1311           found = true;
1312           break;
1313         }
1314
1315         // Adapt the control point value
1316         if(found){
1317           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1318           fflush(stdout);
1319           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1320           const int maxValue = controlPointSpace[cpName].second;
1321           
1322           if(twoAgoValue+1 <= maxValue){
1323             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1324           }
1325         }
1326         
1327       }
1328     }
1329
1330     CkPrintf("Steering (memory based) done for this phase\n");
1331     fflush(stdout);
1332
1333   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1334
1335     // -----------------------------------------------------------
1336     //  USE BEST KNOWN TIME  
1337     static bool firstTime = true;
1338     if(firstTime){
1339       firstTime = false;
1340       instrumentedPhase *best = allData.findBest(); 
1341       CkPrintf("Best known phase is:\n");
1342       best->print();
1343       CkPrintf("\n");
1344       
1345       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1346       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1347         const std::string &name = cpsIter->first;
1348         newControlPoints[name] =  best->controlPoints[name];
1349       }
1350     }
1351   } else if( whichTuningScheme == LDBPeriod) {
1352     // Assume this is used in this manner:
1353     //  1) go to next phase
1354     //  2) request control point
1355     //  3) load balancing
1356     //  4) computation
1357
1358
1359     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1360     instrumentedPhase *prevPhase = previousPhaseData();
1361     
1362
1363     // Build model of execution time based on two phases ago
1364     // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1365     const std::vector<double> &times = twoAgoPhase->times;
1366     const int oldNumTimings = times.size();
1367     const int b1 = 2 + (oldNumTimings-2)/3;
1368     const int b2 = 2 + (2*(oldNumTimings-2))/3;
1369     double s1 = 0;
1370     double s2 = 0;
1371     double s3 = 0;
1372
1373     const double ldbStepsTime = times[0] + times[1];
1374     
1375     for(int i=2; i<=b1; i++){
1376       s1 += times[i];
1377     }
1378     for(int i=b1+1; i<=b2; i++){
1379       s2 += times[i];
1380     }
1381     for(int i=b2+1; i<=oldNumTimings; i++){
1382       s3 += times[i];
1383     }
1384
1385     
1386     // Compute the estimated time for the last phase's data
1387     const std::vector<double> &timesNew = prevPhase->times;
1388     const int newNumTimings = timesNew.size();
1389     
1390     const double a1 = s1 / ((newNumTimings-2)/3);
1391     const double a2 = s2 / ((newNumTimings-2)/3);
1392     const double a3 = s3 / ((newNumTimings-2)/3);
1393     
1394     const double a = (a1-2.0*a2+a3)/2.0;
1395     const double b = (a1-4.0*a2+3.0*a3)/2.0;
1396     const double c = a3;
1397     
1398     // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1399     const double x1 = (double)newNumTimings / (double)oldNumTimings + 0.5;
1400     const double x2 = 0.5;
1401     const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1402     
1403     
1404     // Measure actual time
1405     double sum = 0.0;
1406     for(int i=2; i<newNumTimings; ++i){
1407       sum += timesNew[i];
1408     }
1409
1410     const double avg = sum / ((double)(newNumTimings-2));
1411     const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1412
1413     // Determine load balance cost
1414     const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1415     
1416     // Determine whether LB cost outweights the estimated benefit
1417     CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1418     
1419     
1420     
1421   } else if ( whichTuningScheme == UseSteering ) {
1422           // -----------------------------------------------------------
1423           //  STEERING BASED ON KNOWLEDGE
1424
1425           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1426           // plans are only generated after 3 phases
1427
1428           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1429           instrumentedPhase *prevPhase = previousPhaseData();
1430
1431           if(phase_id%4 == 0){
1432                   CkPrintf("Steering based on 2 phases ago:\n");
1433                   twoAgoPhase->print();
1434                   CkPrintf("\n");
1435                   fflush(stdout);
1436
1437                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1438
1439
1440                   // ========================================= Concurrency =============================================
1441                   // See if idle time is high:
1442                   {
1443                           double idleTime = twoAgoPhase->idleTime.avg;
1444                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1445                           fflush(stdout);
1446                           if(idleTime > 0.10){
1447                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1448                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1449
1450                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1451
1452                                   bool found = false;
1453                                   std::string cpName;
1454                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1455                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1456                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1457                                           cpName = iter->first;
1458                                           info = &iter->second;
1459
1460                                           // Initialize a new plan based on two phases ago
1461                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1462
1463                                           CkPrintf("Steering found knob to turn\n");
1464                                           fflush(stdout);
1465
1466                                           if(info->first == ControlPoint::EFF_INC){
1467                                                   const int maxValue = controlPointSpace[cpName].second;
1468                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1469                                                   if(twoAgoValue+1 <= maxValue){
1470                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1471                                                   }
1472                                           } else {
1473                                                   const int minValue = controlPointSpace[cpName].second;
1474                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1475                                                   if(twoAgoValue-1 >= minValue){
1476                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1477                                                   }
1478                                           }
1479
1480                                           possibleNextStepPlans.push_back(aNewPlan);
1481
1482                                   }
1483                           }
1484                   }
1485
1486                   // ========================================= Grain Size =============================================
1487                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1488                   {
1489                           double overheadTime = twoAgoPhase->overheadTime.avg;
1490                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1491                           fflush(stdout);
1492                           if(overheadTime > 0.10){
1493                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1494                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1495
1496                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1497
1498                                   bool found = false;
1499                                   std::string cpName;
1500                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1501                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1502                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1503                                           cpName = iter->first;
1504                                           info = &iter->second;
1505
1506                                           // Initialize a new plan based on two phases ago
1507                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1508
1509
1510
1511                                           CkPrintf("Steering found knob to turn\n");
1512                                           fflush(stdout);
1513
1514                                           if(info->first == ControlPoint::EFF_INC){
1515                                                   const int maxValue = controlPointSpace[cpName].second;
1516                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1517                                                   if(twoAgoValue+1 <= maxValue){
1518                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1519                                                   }
1520                                           } else {
1521                                                   const int minValue = controlPointSpace[cpName].second;
1522                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1523                                                   if(twoAgoValue-1 >= minValue){
1524                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1525                                                   }
1526                                           }
1527
1528                                           possibleNextStepPlans.push_back(aNewPlan);
1529
1530                                   }
1531
1532                           }
1533                   }
1534                   // ========================================= GPU Offload =============================================
1535                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1536                   {
1537                           double idleTime = twoAgoPhase->idleTime.avg;
1538                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1539                           fflush(stdout);
1540                           if(idleTime > 0.10){
1541                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1542                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1543
1544                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1545
1546                                   bool found = false;
1547                                   std::string cpName;
1548                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1549                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1550                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1551                                           cpName = iter->first;
1552                                           info = &iter->second;
1553
1554                                           // Initialize a new plan based on two phases ago
1555                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1556
1557
1558                                           CkPrintf("Steering found knob to turn\n");
1559                                           fflush(stdout);
1560
1561                                           if(info->first == ControlPoint::EFF_DEC){
1562                                                   const int maxValue = controlPointSpace[cpName].second;
1563                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1564                                                   if(twoAgoValue+1 <= maxValue){
1565                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1566                                                   }
1567                                           } else {
1568                                                   const int minValue = controlPointSpace[cpName].second;
1569                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1570                                                   if(twoAgoValue-1 >= minValue){
1571                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1572                                                   }
1573                                           }
1574
1575                                           possibleNextStepPlans.push_back(aNewPlan);
1576
1577                                   }
1578
1579                           }
1580                   }
1581
1582                   // ========================================= Done =============================================
1583
1584
1585                   if(possibleNextStepPlans.size() > 0){
1586                           newControlPoints = possibleNextStepPlans[0];
1587                   }
1588
1589
1590                   CkPrintf("Steering done for this phase\n");
1591                   fflush(stdout);
1592
1593           }  else {
1594                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1595                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1596                   fflush(stdout);
1597           }
1598
1599
1600
1601   } else if( whichTuningScheme == SimulatedAnnealing ) {
1602
1603     // -----------------------------------------------------------
1604     //  SIMULATED ANNEALING
1605     //  Simulated Annealing style hill climbing method
1606     //
1607     //  Find the best search space configuration, and try something
1608     //  nearby it, with a radius decreasing as phases increase
1609
1610     CkPrintf("Finding best phase\n");
1611     instrumentedPhase *bestPhase = allData.findBest();  
1612     CkPrintf("best found:\n"); 
1613     bestPhase->print(); 
1614     CkPrintf("\n"); 
1615     
1616
1617     const int convergeByPhase = 100;
1618     // Determine from 0.0 to 1.0 how far along we are
1619     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1620
1621     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1622     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1623       const std::string &name = cpsIter->first;
1624       const std::pair<int,int> &bounds = cpsIter->second;
1625       const int minValue = bounds.first;
1626       const int maxValue = bounds.second;
1627       
1628       const int before = bestPhase->controlPoints[name];   
1629   
1630       const int range = (maxValue-minValue+1)*(1.0-progress);
1631
1632       int high = min(before+range, maxValue);
1633       int low = max(before-range, minValue);
1634       
1635       newControlPoints[name] = low;
1636       if(high-low > 0){
1637         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1638       } 
1639       
1640     }
1641
1642   } else if ( whichTuningScheme == DivideAndConquer ) {
1643
1644           // -----------------------------------------------------------
1645           //  STEERING FOR Divide & Conquer Programs
1646           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1647           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1648           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1649
1650           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1651           // plans are only generated after 3 phases
1652
1653           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1654           instrumentedPhase *prevPhase = previousPhaseData();
1655
1656           if(phase_id%4 == 0){
1657                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1658                   twoAgoPhase->print();
1659                   CkPrintf("\n");
1660                   fflush(stdout);
1661
1662                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1663
1664
1665                   // ========================================= Concurrency =============================================
1666                   // See if idle time is high:
1667                   {
1668                           double idleTime = twoAgoPhase->idleTime.avg;
1669                           double overheadTime = twoAgoPhase->overheadTime.avg;
1670
1671
1672                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1673                           fflush(stdout);
1674                           if(idleTime+overheadTime > 0.10){
1675                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1676                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1677
1678                                   int direction = -1;
1679                                   if (idleTime>overheadTime){
1680                                           // We need to decrease the grain size, or increase the available concurrency
1681                                           direction = 1;
1682                                   }
1683
1684                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1685
1686                                   bool found = false;
1687                                   std::string cpName;
1688                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1689                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1690                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1691                                           cpName = iter->first;
1692                                           info = &iter->second;
1693                                           
1694                                           // Initialize a new plan based on two phases ago
1695                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1696                                           bool newPlanExists = false;
1697
1698                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1699                                           fflush(stdout);
1700
1701
1702
1703                                           if(info->first == ControlPoint::EFF_INC){
1704                                                   const int minValue = controlPointSpace[cpName].first;
1705                                                   const int maxValue = controlPointSpace[cpName].second;
1706                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1707                                                   const int newVal = twoAgoValue+1*direction;
1708                                                   if(newVal <= maxValue && newVal >= minValue){
1709                                                     aNewPlan[cpName] = newVal;
1710                                                     newPlanExists = true;
1711                                                   } else {
1712                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1713                                                   }
1714                                           } else {
1715                                                   const int minValue = controlPointSpace[cpName].first;
1716                                                   const int maxValue = controlPointSpace[cpName].second;
1717                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1718                                                   const int newVal = twoAgoValue-1*direction;
1719                                                   if(newVal <= maxValue && newVal >= minValue){
1720                                                           aNewPlan[cpName] = newVal;
1721                                                           newPlanExists = true;
1722                                                   } else {
1723                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1724                                                   }
1725                                           }
1726
1727                                           if(newPlanExists){
1728                                             possibleNextStepPlans.push_back(aNewPlan);
1729                                           }
1730                                   }
1731                           }
1732                   }
1733
1734                   if(possibleNextStepPlans.size() > 0){
1735                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1736                     newControlPoints = possibleNextStepPlans[0];
1737                   } else {
1738                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1739                   }
1740           }
1741
1742   } else if( whichTuningScheme == Simplex ) {
1743
1744           // -----------------------------------------------------------
1745           //  Nelder Mead Simplex Algorithm
1746           //
1747           //  A scheme that takes a simplex (n+1 points) and moves it
1748           //  toward the minimum, eventually converging there.
1749
1750           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1751
1752   } else if( whichTuningScheme == ExhaustiveSearch ) {
1753     
1754     // -----------------------------------------------------------
1755     // EXHAUSTIVE SEARCH
1756    
1757     int numDimensions = controlPointSpace.size();
1758     CkAssert(numDimensions > 0);
1759   
1760     vector<int> lowerBounds(numDimensions);
1761     vector<int> upperBounds(numDimensions); 
1762   
1763     int d=0;
1764     std::map<std::string, pair<int,int> >::iterator iter;
1765     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1766       //    CkPrintf("Examining dimension %d\n", d);
1767       lowerBounds[d] = iter->second.first;
1768       upperBounds[d] = iter->second.second;
1769       d++;
1770     }
1771    
1772     // get names for each dimension (control point)
1773     vector<std::string> names(numDimensions);
1774     d=0;
1775     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
1776       names[d] = niter->first;
1777       d++;
1778     }
1779   
1780   
1781     // Create the first possible configuration
1782     vector<int> config = lowerBounds;
1783     config.push_back(0);
1784   
1785     // Increment until finding an unused configuration
1786     allData.cleanupNames(); // put -1 values in for any control points missing
1787     std::vector<instrumentedPhase*> &phases = allData.phases;     
1788
1789     while(true){
1790     
1791       std::vector<instrumentedPhase*>::iterator piter; 
1792       bool testedConfiguration = false; 
1793       for(piter = phases.begin(); piter != phases.end(); piter++){ 
1794       
1795         // Test if the configuration matches this phase
1796         bool match = true;
1797         for(int j=0;j<numDimensions;j++){
1798           match &= (*piter)->controlPoints[names[j]] == config[j];
1799         }
1800       
1801         if(match){
1802           testedConfiguration = true; 
1803           break;
1804         } 
1805       } 
1806     
1807       if(testedConfiguration == false){ 
1808         break;
1809       } 
1810     
1811       // go to next configuration
1812       config[0] ++;
1813       // Propagate "carrys"
1814       for(int i=0;i<numDimensions;i++){
1815         if(config[i] > upperBounds[i]){
1816           config[i+1]++;
1817           config[i] = lowerBounds[i];
1818         }
1819       }
1820       // check if we have exhausted all possible values
1821       if(config[numDimensions] > 0){
1822         break;
1823       }
1824        
1825     }
1826   
1827     if(config[numDimensions] > 0){
1828       for(int i=0;i<numDimensions;i++){ 
1829         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
1830       }
1831     }
1832
1833     // results are now in config[i]
1834     
1835     for(int i=0; i<numDimensions; i++){
1836       newControlPoints[names[i]] = config[i];
1837       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
1838     }
1839
1840
1841   } else {
1842     CkAbort("Some Control Point tuning strategy must be enabled.\n");
1843   }
1844
1845
1846   const double endGenerateTime = CmiWallTimer();
1847   
1848   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
1849   
1850 }
1851
1852
1853
1854
1855 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
1856
1857
1858 /// Get control point value from range of integers [lb,ub]
1859 int controlPoint(const char *name, int lb, int ub){
1860   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
1861   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
1862   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
1863   int result;
1864
1865   // if we already have control point values for phase, return them
1866   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
1867     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
1868     return thisPhaseData->controlPoints[std::string(name)];
1869   }
1870   
1871
1872   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
1873     // For the first few phases, just use the lower bound, or the default if one was provided 
1874     // This ensures that the ranges for all the control points are known before we do anything fancy
1875     result = lb;
1876
1877     if(defaultControlPointValues.count(std::string(name)) > 0){
1878       int v = defaultControlPointValues[std::string(name)];
1879       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
1880       result = v;
1881     }
1882
1883   } else if(controlPointSpace.count(std::string(name)) == 0){
1884     // if this is the first time we've seen the CP, then return the lower bound
1885     result = lb;
1886     
1887   }  else {
1888     // Generate a plan one time for each phase
1889     controlPointManagerProxy.ckLocalBranch()->generatePlan();
1890     
1891     // Use the planned value:
1892     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
1893     
1894   }
1895
1896   if(!isInRange(result,ub,lb)){
1897     std::cerr << "control point out of range: " << result << " " << lb << " " << ub << std::endl;
1898     fflush(stdout);
1899     fflush(stderr);
1900   }
1901   CkAssert(isInRange(result,ub,lb));
1902   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
1903
1904   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
1905
1906   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
1907   //  thisPhaseData->print();
1908   
1909   return result;
1910 }
1911
1912
1913 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
1914   CkAssert(sizeof(lb) == 4);
1915   CkAssert(CkMyPe() == 0);
1916   return controlPoint("FortranCP", *lb, *ub);
1917 }
1918
1919
1920
1921
1922 /// Inform the control point framework that a named control point affects the priorities of some array  
1923 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
1924 //   CkGroupID aid = arraybase.ckGetArrayID();
1925 //   int groupIdx = aid.idx;
1926 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
1927 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
1928 // }
1929
1930
1931 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
1932 // void controlPointPriorityEntry(const char *name, int idx){
1933 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
1934 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
1935 // }
1936
1937
1938
1939
1940
1941 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
1942
1943     This function decomposes the algorithm into a state machine that allows it to
1944     evaluate one or more configurations through subsequent clls to this function.
1945     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
1946
1947     At one point in the algorithm, n+1 parameter configurations must be evaluated,
1948     so a list of them will be created and they will be evaluated, one per call.
1949
1950     Currently there is no stopping criteria, but the simplex ought to contract down
1951     to a few close configurations, and hence not much change will happen after this 
1952     point.
1953
1954  */
1955 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
1956
1957         if(useBestKnown){
1958                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
1959                 return;
1960         }
1961
1962
1963         if(firstSimplexPhase< 0){
1964                 firstSimplexPhase = allData.phases.size()-1;
1965                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
1966         }
1967
1968         int n = controlPointSpace.size();
1969
1970         CkAssert(n>=2);
1971
1972
1973         if(simplexState == beginning){
1974                 // First we evaluate n+1 random points, then we go to a different state
1975                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
1976                         CkPrintf("Simplex Tuning: chose random configuration\n");
1977
1978                         // Choose random values from the middle third of the range in each dimension
1979                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1980                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1981                                 const std::string &name = cpsIter->first;
1982                                 const std::pair<int,int> &bounds = cpsIter->second;
1983                                 const int lb = bounds.first;
1984                                 const int ub = bounds.second;
1985                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1986                         }
1987                 } else {
1988                         // Set initial simplex:
1989                         for(int i=0; i<n+1; i++){
1990                                 simplexIndices.insert(firstSimplexPhase+i);
1991                         }
1992                         CkAssert(simplexIndices.size() == n+1);
1993
1994                         // Transition to reflecting state
1995                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1996
1997                 }
1998
1999         } else if (simplexState == reflecting){
2000                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2001                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2002
2003                 // Find the highest time from other points in the simplex
2004                 double highestTimeForOthersInSimplex = 0.0;
2005                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2006                         double t = allData.phases[*iter]->medianTime();
2007                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2008                                 highestTimeForOthersInSimplex = t;
2009                         }
2010                 }
2011
2012                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2013
2014                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2015                         // if y* < yl,  transition to "expanding"
2016                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2017
2018                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2019                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2020                         CkAssert(simplexIndices.size() == n+1);
2021                         simplexIndices.erase(worstPhase);
2022                         CkAssert(simplexIndices.size() == n);
2023                         simplexIndices.insert(pPhase);
2024                         CkAssert(simplexIndices.size() == n+1);
2025                         // Transition to reflecting state
2026                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2027
2028                 } else {
2029                         // if y* > yh
2030                         if(recentPhaseTime <= worstTime){
2031                                 // replace Ph with P*
2032                                 CkAssert(simplexIndices.size() == n+1);
2033                                 simplexIndices.erase(worstPhase);
2034                                 CkAssert(simplexIndices.size() == n);
2035                                 simplexIndices.insert(pPhase);
2036                                 CkAssert(simplexIndices.size() == n+1);
2037                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2038                                 worstPhase = pPhase;
2039                                 // Just as a sanity check, make sure we don't use the non-updated values.
2040                                 worst.clear();
2041                         }
2042
2043                         // Now, form P** and do contracting phase
2044                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2045
2046                 }
2047
2048         } else if (simplexState == doneExpanding){
2049                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2050                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2051                 // A new configuration has been evaluated
2052
2053                 // Check to see if y** < y1
2054                 if(recentPhaseTime < bestTime){
2055                         // replace Ph by P**
2056                         CkAssert(simplexIndices.size() == n+1);
2057                         simplexIndices.erase(worstPhase);
2058                         CkAssert(simplexIndices.size() == n);
2059                         simplexIndices.insert(p2Phase);
2060                         CkAssert(simplexIndices.size() == n+1);
2061                 } else {
2062                         //      replace Ph by P*
2063                         CkAssert(simplexIndices.size() == n+1);
2064                         simplexIndices.erase(worstPhase);
2065                         CkAssert(simplexIndices.size() == n);
2066                         simplexIndices.insert(pPhase);
2067                         CkAssert(simplexIndices.size() == n+1);
2068                 }
2069
2070                 // Transition to reflecting state
2071                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2072
2073         }  else if (simplexState == contracting){
2074                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2075                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2076                 // A new configuration has been evaluated
2077
2078                 // Check to see if y** > yh
2079                 if(recentPhaseTime <= worstTime){
2080                         // replace Ph by P**
2081                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2082                         CkAssert(simplexIndices.size() == n+1);
2083                         simplexIndices.erase(worstPhase);
2084                         CkAssert(simplexIndices.size() == n);
2085                         simplexIndices.insert(p2Phase);
2086                         CkAssert(simplexIndices.size() == n+1);
2087                         // Transition to reflecting state
2088                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2089
2090                 } else {
2091                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2092                         simplexState = stillContracting;
2093
2094                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2095                         stillMustContractList = simplexIndices;
2096
2097                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2098                 }
2099
2100         } else if (simplexState == stillContracting){
2101                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2102
2103                 if(stillMustContractList.size()>0){
2104                         int c = *stillMustContractList.begin();
2105                         stillMustContractList.erase(c);
2106                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2107
2108                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2109
2110                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2111                         int v = 0;
2112                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2113                                 const std::string &name = cpsIter->first;
2114                                 const std::pair<int,int> &bounds = cpsIter->second;
2115                                 const int lb = bounds.first;
2116                                 const int ub = bounds.second;
2117
2118                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2119
2120                                 newControlPoints[name] = keepInRange(val,lb,ub);
2121                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2122                                 v++;
2123                         }
2124                 } else {
2125                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2126                         CkAssert(stillMustContractList.size() == 0);
2127                         simplexIndices.clear();
2128                         CkAssert(simplexIndices.size()==0);
2129                         for(int i=0; i<n+1; i++){
2130                                 simplexIndices.insert(allData.phases.size()-2-i);
2131                         }
2132                         CkAssert(simplexIndices.size()==n+1);
2133
2134                         // Transition to reflecting state
2135                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2136
2137                 }
2138
2139
2140         } else if (simplexState == expanding){
2141                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2142                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2143                 // A new configuration has been evaluated
2144
2145                 // determine if y** < yl
2146                 if(recentPhaseTime < bestTime){
2147                         // replace Ph by P**
2148                         CkAssert(simplexIndices.size() == n+1);
2149                         simplexIndices.erase(worstPhase);
2150                         CkAssert(simplexIndices.size() == n);
2151                         simplexIndices.insert(p2Phase);
2152                         CkAssert(simplexIndices.size() == n+1);
2153                         // Transition to reflecting state
2154                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2155
2156                 } else {
2157                         // else, replace ph with p*
2158                         CkAssert(simplexIndices.size() == n+1);
2159                         simplexIndices.erase(worstPhase);
2160                         CkAssert(simplexIndices.size() == n);
2161                         simplexIndices.insert(pPhase);
2162                         CkAssert(simplexIndices.size() == n+1);
2163                         // Transition to reflecting state
2164                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2165                 }
2166
2167
2168         } else {
2169                 CkAbort("Unknown simplexState");
2170         }
2171
2172 }
2173
2174
2175
2176 /** Replace the worst point with its reflection across the centroid. */
2177 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2178
2179         int n = controlPointSpace.size();
2180
2181         printSimplex(allData);
2182
2183         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2184
2185
2186         // Quit if the diameter of our simplex is small
2187         double maxr = 0.0;
2188         for(int i=0; i<n+1; i++){
2189                 //              Compute r^2 of this simplex point from the centroid
2190                 double r2 = 0.0;
2191                 std::vector<double> p = pointCoords(allData, i);
2192                 for(int d=0; d<p.size(); d++){
2193                         double r1 = (p[d] * centroid[d]);
2194                         r2 += r1*r1;
2195                 }
2196                 if(r2 > maxr)
2197                         maxr = r2;
2198         }
2199
2200 #if 0
2201         // At some point we quit this tuning
2202         if(maxr < 10){
2203                 useBestKnown = true;
2204                 instrumentedPhase *best = allData.findBest();
2205                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2206
2207                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2208                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2209                         const std::string &name = cpsIter->first;
2210                         newControlPoints[name] =  best->controlPoints[name];
2211                 }
2212         }
2213 #endif
2214
2215         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2216
2217         pPhase = allData.phases.size()-1;
2218         P.resize(n);
2219         for(int i=0; i<n; i++){
2220                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2221         }
2222
2223         for(int i=0; i<P.size(); i++){
2224                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2225         }
2226
2227
2228         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2229         int v = 0;
2230         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2231                 const std::string &name = cpsIter->first;
2232                 const std::pair<int,int> &bounds = cpsIter->second;
2233                 const int lb = bounds.first;
2234                 const int ub = bounds.second;
2235                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2236                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2237                 v++;
2238         }
2239
2240
2241         // Transition to "reflecting" state
2242         simplexState = reflecting;
2243         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2244
2245 }
2246
2247
2248
2249
2250 /** Replace the newly tested reflection with a further expanded version of itself. */
2251 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2252         int n = controlPointSpace.size();
2253         printSimplex(allData);
2254
2255         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2256         // I believe the equation for P** in the text on page 308 is correct.
2257
2258         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2259
2260
2261         p2Phase = allData.phases.size()-1;
2262         P2.resize(n);
2263         for(int i=0; i<n; i++){
2264                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2265         }
2266
2267         for(int i=0; i<P2.size(); i++){
2268                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2269         }
2270
2271
2272         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2273         int v = 0;
2274         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2275                 const std::string &name = cpsIter->first;
2276                 const std::pair<int,int> &bounds = cpsIter->second;
2277                 const int lb = bounds.first;
2278                 const int ub = bounds.second;
2279                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2280                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2281                 v++;
2282         }
2283
2284
2285         // Transition to "doneExpanding" state
2286         simplexState = doneExpanding;
2287         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2288
2289 }
2290
2291
2292
2293
2294 /** Replace the newly tested reflection with a further expanded version of itself. */
2295 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2296         int n = controlPointSpace.size();
2297         printSimplex(allData);
2298
2299         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2300
2301
2302         p2Phase = allData.phases.size()-1;
2303         P2.resize(n);
2304         for(int i=0; i<n; i++){
2305                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2306         }
2307
2308         for(int i=0; i<P2.size(); i++){
2309                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2310         }
2311
2312
2313         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2314         int v = 0;
2315         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2316                 const std::string &name = cpsIter->first;
2317                 const std::pair<int,int> &bounds = cpsIter->second;
2318                 const int lb = bounds.first;
2319                 const int ub = bounds.second;
2320                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2321                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2322                 v++;
2323         }
2324         
2325         
2326         // Transition to "contracting" state
2327         simplexState = contracting;
2328         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2329
2330 }
2331
2332
2333
2334
2335 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2336         int n = controlPointSpace.size();
2337
2338         // Find worst performing point in the simplex
2339         worstPhase = -1;
2340         worstTime = -1.0;
2341         bestPhase = 10000000;
2342         bestTime = 10000000;
2343         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2344                 double t = allData.phases[*iter]->medianTime();
2345                 if(t > worstTime){
2346                         worstTime = t;
2347                         worstPhase = *iter;
2348                 }
2349                 if(t < bestTime){
2350                         bestTime = t;
2351                         bestPhase = *iter;
2352                 }
2353         }
2354         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2355
2356         best = pointCoords(allData, bestPhase);
2357         CkAssert(best.size() == n);
2358
2359         worst = pointCoords(allData, worstPhase);
2360         CkAssert(worst.size() == n);
2361
2362         // Calculate centroid of the remaining points in the simplex
2363         centroid.resize(n);
2364         for(int i=0; i<n; i++){
2365                 centroid[i] = 0.0;
2366         }
2367
2368         int numPts = 0;
2369
2370         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2371                 if(*iter != worstPhase){
2372                         numPts ++;
2373                         // Accumulate into the result vector
2374                         int c = 0;
2375                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2376                                 centroid[c] += citer->second;
2377                                 c++;
2378                         }
2379
2380                 }
2381         }
2382
2383         // Now divide the sums by the number of points.
2384         for(int v = 0; v<centroid.size(); v++) {
2385                 centroid[v] /= (double)numPts;
2386         }
2387
2388         CkAssert(centroid.size() == n);
2389
2390         for(int i=0; i<centroid.size(); i++){
2391                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2392         }
2393
2394
2395 }
2396
2397
2398
2399 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2400         std::vector<double> result;
2401         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2402                 result.push_back((double)citer->second);
2403         }
2404         return result;
2405 }
2406
2407
2408
2409
2410 void ControlPointWriteOutputToDisk(){
2411         CkAssert(CkMyPe() == 0);
2412         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2413 }
2414
2415
2416
2417 /*! @} */
2418
2419 #ifdef CP_DISABLE_TRACING
2420 #include "ControlPointsNoTrace.def.h"
2421 #else
2422 #include "ControlPoints.def.h"
2423 #endif