d6bdacab5b9bf63873c21a116876297e88f258a8
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4
5 #include "controlPoints.h"
6 #include "trace-controlPoints.h"
7 #include "LBDatabase.h"
8 #include "controlPoints.h"
9 #include "charm++.h"
10 #include "trace-projections.h"
11 #include <pathHistory.h>
12 #include "cp_effects.h"
13 #include <iostream>
14 #include <math.h>
15 #include <climits>
16
17 #if CMK_WITH_CONTROLPOINT
18
19 #define roundDouble(x)        ((long)(x+0.5))
20 #define myAbs(x)   (((x)>=0.0)?(x):(-1.0*(x)))
21 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
22
23 inline double closestInRange(double v, double a, double b){
24   return (v<a) ? a : ((v>b)?b:v);
25 }
26
27
28 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
29  
30
31 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
32 /**  @{ */
33
34 using namespace std;
35
36 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
37
38
39 //#undef DEBUGPRINT
40 //#define DEBUGPRINT 4
41
42 static void periodicProcessControlPoints(void* ptr, double currWallTime);
43
44
45 // A pointer to this PE's controlpoint manager Proxy
46 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
47 /* readonly */ int random_seed;
48 /* readonly */ long controlPointSamplePeriod;
49 /* readonly */ int whichTuningScheme;
50 /* readonly */ bool writeDataFileAtShutdown;
51 /* readonly */ bool shouldFilterOutputData;
52 /* readonly */ bool loadDataFileAtStartup;
53 /* readonly */ bool shouldGatherMemoryUsage;
54 /* readonly */ bool shouldGatherUtilization;
55 /* readonly */ bool shouldGatherAll;
56 /* readonly */ char CPDataFilename[512];
57
58 extern bool enableCPTracing;
59
60 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
61 /// These probably come from the command line arguments, so are available only on PE 0
62 std::map<std::string, int> defaultControlPointValues;
63
64
65
66 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
67
68
69
70 void printTuningScheme(){
71   switch(whichTuningScheme){
72   case RandomSelection:
73     CkPrintf("Tuning Scheme: RandomSelection\n");
74     break;
75   case AlwaysDefaults:
76     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
77     break;
78   case SimulatedAnnealing:
79     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
80     break;
81   case ExhaustiveSearch:
82     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
83     break;
84   case CriticalPathAutoPrioritization:
85     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
86     break;
87   case UseBestKnownTiming:
88     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
89     break;
90   case UseSteering:
91     CkPrintf("Tuning Scheme: UseSteering\n");
92     break;
93   case MemoryAware:
94     CkPrintf("Tuning Scheme: MemoryAware\n");
95     break;
96   case Simplex:
97     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
98     break;
99   case DivideAndConquer:
100     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
101     break;
102   case LDBPeriod:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
104     break;
105   case LDBPeriodLinear:
106     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
107     break;
108   case LDBPeriodQuadratic:
109     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
110     break;
111   case LDBPeriodOptimal:
112     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
113     break;
114   default:
115     CkPrintf("Unknown tuning scheme\n");
116     break;
117   }
118   fflush(stdout);
119 }
120
121
122
123 /// A reduction type that combines idle time measurements (min/sum/max etc.)
124 CkReduction::reducerType idleTimeReductionType;
125 /// A reducer that combines idle time measurements (min/sum/max etc.)
126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
127   double ret[3];
128   if(nMsg > 0){
129     CkAssert(msgs[0]->getSize()==3*sizeof(double));
130     double *m=(double *)msgs[0]->getData();
131     ret[0]=m[0];
132     ret[1]=m[1];
133     ret[2]=m[2];
134   }
135   for (int i=1;i<nMsg;i++) {
136     CkAssert(msgs[i]->getSize()==3*sizeof(double));
137     double *m=(double *)msgs[i]->getData();
138     ret[0]=min(ret[0],m[0]);
139     ret[1]+=m[1];
140     ret[2]=max(ret[2],m[2]);
141   }  
142   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
143 }
144
145
146
147 /// A reduction type that combines idle, overhead, and memory measurements
148 CkReduction::reducerType allMeasuresReductionType;
149 /// A reducer that combines idle, overhead, and memory measurements
150 #define ALL_REDUCTION_SIZE 12
151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
152   double ret[ALL_REDUCTION_SIZE];
153   if(nMsg > 0){
154     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
155     double *m=(double *)msgs[0]->getData();
156     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
157   }
158   for (int i=1;i<nMsg;i++) {
159     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
160     double *m=(double *)msgs[i]->getData();
161     // idle time (min/sum/max)
162     ret[0]=min(ret[0],m[0]);
163     ret[1]+=m[1];
164     ret[2]=max(ret[2],m[2]);
165     // overhead time (min/sum/max)
166     ret[3]=min(ret[3],m[3]);
167     ret[4]+=m[4];
168     ret[5]=max(ret[5],m[5]);
169     // mem usage (max)
170     ret[6]=max(ret[6],m[6]);
171     // bytes per invocation for two types of entry methods
172     ret[7]+=m[7];
173     ret[8]+=m[8];
174     ret[9]+=m[9];
175     ret[10]+=m[10];
176     // Grain size (avg)
177     ret[11]+=m[11];
178   }  
179   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
180 }
181
182
183 /// Registers the control point framework's reduction handlers at startup on each PE
184 /*initproc*/ void registerCPReductions(void) {
185   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
186   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
187 }
188
189
190
191
192
193
194 /// Return an integer between 0 and num-1 inclusive
195 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
197   CkAssert(num > 0);
198
199   unsigned long hash = 0;
200   unsigned int c;
201   unsigned char * str = (unsigned char*)name;
202
203   while (c = *str++){
204     unsigned int c2 = (c+64)%128;
205     unsigned int c3 = (c2*5953)%127;
206     hash = c3 + (hash << 6) + (hash << 16) - hash;
207   }
208
209   unsigned long shuffled1 = (hash*2083)%7907;
210   unsigned long shuffled2 = (seed*4297)%2017;
211
212   unsigned long shuffled3 = (random_seed*4799)%7919;
213
214   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
215
216   unsigned int result = ((namehash * 6029) % 1117) % num;
217
218   CkAssert(result >=0 && result < num);
219   return result;
220 }
221
222
223
224 controlPointManager::controlPointManager() {
225   generatedPlanForStep = -1;
226
227     exitWhenReady = false;
228     alreadyRequestedMemoryUsage = false;   
229     alreadyRequestedIdleTime = false;
230     alreadyRequestedAll = false;
231     
232     instrumentedPhase * newPhase = new instrumentedPhase();
233     allData.phases.push_back(newPhase);   
234     
235     frameworkShouldAdvancePhase = false;
236     haveControlPointChangeCallback = false;
237 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
238     
239     phase_id = 0;
240
241     if(loadDataFileAtStartup){    
242       loadDataFile();
243     }
244
245     
246     if(CkMyPe() == 0){
247       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
248     }
249
250     traceRegisterUserEvent("No currently executing message", 5000);
251     traceRegisterUserEvent("Zero time along critical path", 5010);
252     traceRegisterUserEvent("Positive total time along critical path", 5020);
253     traceRegisterUserEvent("env->setPathHistory()", 6000);
254     traceRegisterUserEvent("Critical Path", 5900);
255     traceRegisterUserEvent("Table Entry", 5901);
256
257
258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
259     traceRegisterUserEvent("registerTerminalPath", 100); 
260 #endif
261
262   }
263   
264
265  controlPointManager::~controlPointManager(){
266 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
267   }
268
269   void controlPointManager::pup(PUP::er &p)
270   {
271     CBase_controlPointManager::pup(p);
272       // FIXME: does not work when control point is actually used,
273       // just minimal pup so that it allows exit function to work (exitIfReady).
274     p|generatedPlanForStep;
275     p|exitWhenReady;
276     p|alreadyRequestedMemoryUsage;
277     p|alreadyRequestedIdleTime;
278     p|alreadyRequestedAll;
279     p|frameworkShouldAdvancePhase;
280     p|haveControlPointChangeCallback;
281     p|phase_id;
282   }
283   
284
285   /// Loads the previous run data file
286   void controlPointManager::loadDataFile(){
287     ifstream infile(CPDataFilename);
288     vector<std::string> names;
289     std::string line;
290   
291     while(getline(infile,line)){
292       if(line[0] != '#')
293         break;
294     }
295   
296     int numTimings = 0;
297     std::istringstream n(line);
298     n >> numTimings;
299   
300     while(getline(infile,line)){ 
301       if(line[0] != '#') 
302         break; 
303     }
304
305     int numControlPointNames = 0;
306     std::istringstream n2(line);
307     n2 >> numControlPointNames;
308   
309     for(int i=0; i<numControlPointNames; i++){
310       getline(infile,line);
311       names.push_back(line);
312     }
313
314     for(int i=0;i<numTimings;i++){
315       getline(infile,line);
316       while(line[0] == '#')
317         getline(infile,line); 
318
319       instrumentedPhase * ips = new instrumentedPhase();
320
321       std::istringstream iss(line);
322
323       // Read memory usage for phase
324       iss >> ips->memoryUsageMB;
325       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
326
327       // Read idle time
328       iss >> ips->idleTime.min;
329       iss >> ips->idleTime.avg;
330       iss >> ips->idleTime.max;
331
332       // Read overhead time
333       iss >> ips->overheadTime.min;
334       iss >> ips->overheadTime.avg;
335       iss >> ips->overheadTime.max;
336
337       // read bytePerInvoke
338       iss >> ips->bytesPerInvoke;
339
340       // read grain size
341       iss >> ips->grainSize;
342
343       // Read control point values
344       for(int cp=0;cp<numControlPointNames;cp++){
345         int cpvalue;
346         iss >> cpvalue;
347         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
348       }
349
350       // ignore median time
351       double mt;
352       iss >> mt;
353
354       // Read all times
355
356       double time;
357
358       while(iss >> time){
359         ips->times.push_back(time);
360 #if DEBUGPRINT > 5
361         CkPrintf("read time %lf from file\n", time);
362 #endif
363       }
364
365       allData.phases.push_back(ips);
366
367     }
368
369     infile.close();
370   }
371
372
373
374   /// Add the current data to allData and output it to a file
375   void controlPointManager::writeDataFile(){
376     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
377     ofstream outfile(CPDataFilename);
378     allData.cleanupNames();
379
380 //    string s = allData.toString();
381 //    CkPrintf("At end: \n %s\n", s.c_str());
382
383     if(shouldFilterOutputData){
384       allData.verify();
385       allData.filterOutIncompletePhases();
386     }
387
388 //    string s2 = allData.toString();
389 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
390     if(allData.toString().length() > 10){
391       outfile << allData.toString();
392     } else {
393       outfile << " No data available to save to disk " << endl;
394     }
395     outfile.close();
396   }
397
398   /// User can register a callback that is called when application should advance to next phase
399   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
400     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
401     controlPointChangeCallback = cb;
402     haveControlPointChangeCallback = true;
403   }
404
405
406 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
407 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
408   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
409 }
410
411   /// Called periodically by the runtime to handle the control points
412   /// Currently called on each PE
413   void controlPointManager::processControlPoints(){
414
415     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
416
417
418     //==========================================================================================
419     // Print the data for each phase
420
421     const int s = allData.phases.size();
422
423 #if DEBUGPRINT
424     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
425     for(int p=0;p<s;++p){
426       const instrumentedPhase &phase = allData.phases[p];
427       const idleTimeContainer &idle = phase.idleTime;
428       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
429       vector<double> const &times = phase.times;
430
431       CkPrintf("Phase %d:\n", p);
432       idle.print();
433      //   CkPrintf("critical paths: (* affected by control point)\n");
434         //   for(int i=0;i<criticalPaths.size(); i++){
435         // If affected by a control point
436         //      criticalPaths[i].print();
437
438       //        criticalPaths[i].print();
439       //        CkPrintf("\n");
440         
441
442       //   }
443       CkPrintf("Timings:\n");
444       for(int i=0;i<times.size(); i++){
445         CkPrintf("%lf ", times[i]);
446       }
447       CkPrintf("\n");
448
449     }
450     
451     CkPrintf("\n\n");
452
453
454 #endif
455
456
457
458     //==========================================================================================
459     // If this is a phase during which we try to adapt control point values based on critical path
460 #if 0
461     if( s%5 == 4) {
462
463       // Find the most recent phase with valid critical path data and idle time measurements      
464       int whichPhase=-1;
465       for(int p=0; p<s; ++p){
466         const instrumentedPhase &phase = allData.phases[p];
467         const idleTimeContainer &idle = phase.idleTime;
468         if(idle.isValid() && phase.criticalPaths.size()>0 ){
469           whichPhase = p;
470         }
471       }
472       
473       
474       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
475       const instrumentedPhase &phase = allData.phases[whichPhase];
476       const idleTimeContainer &idle = phase.idleTime;
477       
478       if(idle.min > 0.1){
479         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
480         
481         // Determine the median critical path for this phase
482         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
483         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
484         CkAssert(phase.criticalPaths.size() > 0);
485         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
486         
487         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
488         
489         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
490           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
491
492
493           CkPrintf("Finding control points related to the critical path\n");
494           int cpcount = 0;
495           std::set<std::string> controlPointsAffectingCriticalPath;
496
497           
498           for(int e=0;e<path.getNumUsed();e++){
499             if(path.getUsedCount(e)>0){
500               int ep = path.getUsedEp(e);
501
502               std::map<std::string, std::set<int> >::iterator iter;
503               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
504                 if(iter->second.count(ep)>0){
505                   controlPointsAffectingCriticalPath.insert(iter->first);
506                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
507                   cpcount++;
508                 }
509               }
510               
511             }
512           }
513           
514
515           if(cpcount==0){
516             CkPrintf("No control points are known to affect the critical path\n");
517           } else {
518             double beginTime = CmiWallTimer();
519
520             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
521             
522             newControlPoints = phase.controlPoints;
523             
524             if(frameworkShouldAdvancePhase){
525               gotoNextPhase();  
526             }
527             
528             if(haveControlPointChangeCallback){ 
529 #if DEBUGPRINT
530               CkPrintf("Calling control point change callback\n");
531 #endif
532               // Create a high priority message and send it to the callback
533               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
534               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
535               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
536               controlPointChangeCallback.send(msg);
537               
538             }
539             
540             
541             // adjust the control points that can affect the critical path
542
543             char textDescription[4096*2];
544             textDescription[0] = '\0';
545
546             std::map<std::string,int>::iterator newCP;
547             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
548               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
549                 // decrease the value (increase priority) if within range
550                 int lowerbound = controlPointSpace[newCP->first].first;
551                 if(newCP->second > lowerbound){
552                   newControlPointsAvailable = true;
553                   newCP->second --;
554                 }
555               }
556             }
557            
558             // Create a string for a projections user event
559             if(1){
560               std::map<std::string,int>::iterator newCP;
561               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
562                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
563               }
564             }
565
566             char *userEventString = new char[4096*2];
567             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
568             
569             static int userEventCounter = 20000;
570             CkPrintf("USER EVENT: %s\n", userEventString);
571             
572             traceRegisterUserEvent(userEventString, userEventCounter); 
573             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
574             userEventCounter++;
575             
576           }
577           
578         } else {
579           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
580         }
581         
582         
583       }
584       
585     } else {
586       
587       
588     }
589    
590 #endif
591
592
593
594     if(frameworkShouldAdvancePhase){
595       gotoNextPhase();  
596     }
597     
598     if(haveControlPointChangeCallback){ 
599       // Create a high priority message and send it to the callback
600       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
601       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
602       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
603       controlPointChangeCallback.send(msg);
604     }
605     
606   }
607   
608   /// Determine if any control point is known to affect an entry method
609   bool controlPointManager::controlPointAffectsThisEP(int ep){
610     std::map<std::string, std::set<int> >::iterator iter;
611     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
612       if(iter->second.count(ep)>0){
613         return true;
614       }
615     }
616     return false;    
617   }
618   
619   /// Determine if any control point is known to affect a chare array  
620   bool controlPointManager::controlPointAffectsThisArray(int array){
621     std::map<std::string, std::set<int> >::iterator iter;
622     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
623       if(iter->second.count(array)>0){
624         return true;
625       }
626     }
627     return false;   
628   }
629   
630
631   /// The data from the current phase
632   instrumentedPhase * controlPointManager::currentPhaseData(){
633     int s = allData.phases.size();
634     CkAssert(s>=1);
635     return allData.phases[s-1];
636   }
637  
638
639   /// The data from the previous phase
640   instrumentedPhase * controlPointManager::previousPhaseData(){
641     int s = allData.phases.size();
642     if(s >= 2 && phase_id > 0) {
643       return allData.phases[s-2];
644     } else {
645       return NULL;
646     }
647   }
648  
649   /// The data from two phases back
650   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
651     int s = allData.phases.size();
652     if(s >= 3 && phase_id > 0) {
653       return allData.phases[s-3];
654     } else {
655       return NULL;
656     }
657   }
658   
659
660   /// Called by either the application or the Control Point Framework to advance to the next phase  
661   void controlPointManager::gotoNextPhase(){
662     CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
663     fflush(stdout);
664       
665     if(enableCPTracing){
666       if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
667         alreadyRequestedAll = true;
668         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
669         CkPrintf("Requesting all measurements\n");
670         thisProxy.requestAll(*cb);
671         delete cb;
672         
673       } else {
674         
675         if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
676           alreadyRequestedMemoryUsage = true;
677           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
678           thisProxy.requestMemoryUsage(*cb);
679           delete cb;
680         }
681         
682         if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
683           alreadyRequestedIdleTime = true;
684           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
685           thisProxy.requestIdleTime(*cb);
686           delete cb;
687         }
688       }
689     }
690
691
692
693 #if CMK_LBDB_ON && 0
694     LBDatabase * myLBdatabase = LBDatabaseObj();
695     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
696     const CkVec<LBObj*> objs = myLBDB->getObjs();
697     const int objCount = myLBDB->getObjCount();
698     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
699     
700     LBRealType maxObjWallTime = -1.0;
701     
702     for(int i=0;i<objs.length();i++){
703       LBObj* o = objs[i];
704       const LDObjData d = o->ObjData();
705       LBRealType cpuTime = d.cpuTime;
706       LBRealType wallTime = d.wallTime;
707       // can also get object handles from the LDObjData struct
708       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
709       if(wallTime > maxObjWallTime){
710
711       }
712       
713     }
714
715     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
716     
717 #endif    
718
719
720     
721     // increment phase id
722     phase_id++;
723     
724
725     // Create new entry for the phase we are starting now
726     instrumentedPhase * newPhase = new instrumentedPhase();
727     allData.phases.push_back(newPhase);
728     
729     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
730
731   }
732
733   /// An application uses this to register an instrumented timing for this phase
734   void controlPointManager::setTiming(double time){
735     currentPhaseData()->times.push_back(time);
736
737 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
738        
739     // First we should register this currently executing message as a path, because it is likely an important one to consider.
740     //    registerTerminalEntryMethod();
741     
742     // save the critical path for this phase
743     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
744     //    maxTerminalPathHistory.reset();
745     
746     
747     // Reset the counts for the currently executing message
748     //resetThisEntryPath();
749         
750 #endif
751     
752   }
753   
754   /// Entry method called on all PEs to request CPU utilization statistics
755   void controlPointManager::requestIdleTime(CkCallback cb){
756     CkAssert(enableCPTracing);
757    
758     double i = localControlPointTracingInstance()->idleRatio();
759     double idle[3];
760     idle[0] = i;
761     idle[1] = i;
762     idle[2] = i;
763     
764     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
765     
766     localControlPointTracingInstance()->resetTimings();
767     
768     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
769   }
770   
771   /// All processors reduce their memory usages in requestIdleTime() to this method
772   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
773     CkAssert(enableCPTracing);
774
775     int size=msg->getSize() / sizeof(double);
776     CkAssert(size==3);
777     double *r=(double *) msg->getData();
778         
779     instrumentedPhase* prevPhase = previousPhaseData();
780     if(prevPhase != NULL){
781       prevPhase->idleTime.min = r[0];
782       prevPhase->idleTime.avg = r[1]/CkNumPes();
783       prevPhase->idleTime.max = r[2];
784       prevPhase->idleTime.print();
785       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
786     } else {
787       CkPrintf("There is no previous phase to store the idle time measurements\n");
788     }
789     
790     alreadyRequestedIdleTime = false;
791     checkForShutdown();
792     delete msg;
793   }
794
795
796
797
798
799
800   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
801   void controlPointManager::requestAll(CkCallback cb){
802     CkAssert(enableCPTracing);
803
804     TraceControlPoints *t = localControlPointTracingInstance();
805
806     double data[ALL_REDUCTION_SIZE];
807
808     double *idle = data;
809     double *over = data+3;
810     double *mem = data+6;
811     double *msgBytes = data+7;  
812     double *grainsize = data+11;  
813
814     const double i = t->idleRatio();
815     idle[0] = i;
816     idle[1] = i;
817     idle[2] = i;
818
819     const double o = t->overheadRatio();
820     over[0] = o;
821     over[1] = o;
822     over[2] = o;
823
824     const double m = t->memoryUsageMB();
825     mem[0] = m;
826
827     msgBytes[0] = t->b2;
828     msgBytes[1] = t->b3;
829     msgBytes[2] = t->b2mlen;
830     msgBytes[3] = t->b3mlen;
831
832     grainsize[0] = t->grainSize();
833     
834     localControlPointTracingInstance()->resetAll();
835
836     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
837   }
838   
839   /// All processors reduce their memory usages in requestIdleTime() to this method
840   void controlPointManager::gatherAll(CkReductionMsg *msg){
841     CkAssert(enableCPTracing);
842
843     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
844     int size=msg->getSize() / sizeof(double);
845     double *data=(double *) msg->getData();
846         
847     double *idle = data;
848     double *over = data+3;
849     double *mem = data+6;
850     double *msgBytes = data+7;
851     double *granularity = data+11;
852
853
854     //    std::string b = allData.toString();
855
856     instrumentedPhase* prevPhase = previousPhaseData();
857     if(prevPhase != NULL){
858       prevPhase->idleTime.min = idle[0];
859       prevPhase->idleTime.avg = idle[1]/CkNumPes();
860       prevPhase->idleTime.max = idle[2];
861       
862       prevPhase->overheadTime.min = over[0];
863       prevPhase->overheadTime.avg = over[1]/CkNumPes();
864       prevPhase->overheadTime.max = over[2];
865       
866       prevPhase->memoryUsageMB = mem[0];
867       
868       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
869
870       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
871       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
872
873       /** The average of the grain sizes on all PEs in us */
874       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
875
876       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
877       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
878
879       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
880       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
881
882       if(bytesPerInvoke2 > bytesPerInvoke3)
883         prevPhase->bytesPerInvoke = bytesPerInvoke2;
884       else
885         prevPhase->bytesPerInvoke = bytesPerInvoke3;
886
887       //      prevPhase->print();
888       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
889
890       //      std::string a = allData.toString();
891
892       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
893       
894     } else {
895       CkPrintf("There is no previous phase to store measurements\n");
896     }
897     
898     alreadyRequestedAll = false;
899     checkForShutdown();
900     delete msg;
901   }
902
903
904
905
906   void controlPointManager::checkForShutdown(){
907     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
908       doExitNow();
909     }
910   }
911
912
913   void controlPointManager::exitIfReady(){
914      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
915        //  CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
916        doExitNow();
917      } else {
918        // CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
919        exitWhenReady = true;
920      }
921   }
922
923
924
925   void controlPointManager::doExitNow(){
926           writeOutputToDisk();
927           //      CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
928           CkExit();
929   }
930
931   void controlPointManager::writeOutputToDisk(){
932           if(writeDataFileAtShutdown){
933                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
934           }
935   }
936
937
938   /// Entry method called on all PEs to request memory usage
939   void controlPointManager::requestMemoryUsage(CkCallback cb){
940     int m = CmiMaxMemoryUsage() / 1024 / 1024;
941     CmiResetMaxMemory();
942     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
943     contribute(sizeof(int),&m,CkReduction::max_int, cb);
944   }
945
946   /// All processors reduce their memory usages to this method
947   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
948     int size=msg->getSize() / sizeof(int);
949     CkAssert(size==1);
950     int *m=(int *) msg->getData();
951
952     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
953     
954     instrumentedPhase* prevPhase = previousPhaseData();
955     if(prevPhase != NULL){
956       prevPhase->memoryUsageMB = m[0];
957     } else {
958       CkPrintf("No place to store memory usage");
959     }
960
961     alreadyRequestedMemoryUsage = false;
962     checkForShutdown();
963     delete msg;
964   }
965
966
967 //   /// Inform the control point framework that a named control point affects the priorities of some array  
968 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
969 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
970     
971 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
972 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
973 //     } else {
974 //       std::set<int> s;
975 //       s.insert(groupIdx);
976 //       affectsPrioritiesArray[std::string(name)] = s;
977 //     }
978     
979 // #if DEBUGPRINT   
980 //     std::map<std::string, std::set<int> >::iterator f;
981 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
982 //       std::string name = f->first;
983 //       std::set<int> &vals = f->second;
984 //       cout << "Control point " << name << " affects arrays: ";
985 //       std::set<int>::iterator i;
986 //       for(i=vals.begin(); i!=vals.end();++i){
987 //      cout << *i << " ";
988 //       }
989 //       cout << endl;
990 //     }
991 // #endif
992     
993 //   }
994   
995 //   /// Inform the control point framework that a named control point affects the priority of some entry method
996 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
997 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
998
999 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
1000 //       affectsPrioritiesEP[std::string(name)].insert(idx);
1001 //     } else {
1002 //       std::set<int> s;
1003 //       s.insert(idx);
1004 //       affectsPrioritiesEP[std::string(name)] = s;
1005 //     }
1006     
1007 // #if DEBUGPRINT
1008 //     std::map<std::string, std::set<int> >::iterator f;
1009 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1010 //       std::string name = f->first;
1011 //       std::set<int> &vals = f->second;
1012 //       cout << "Control point " << name << " affects EP: ";
1013 //       std::set<int>::iterator i;
1014 //       for(i=vals.begin(); i!=vals.end();++i){
1015 //      cout << *i << " ";
1016 //       }
1017 //       cout << endl;
1018 //     }
1019 // #endif
1020
1021
1022 //   }
1023   
1024
1025
1026 /// An interface callable by the application.
1027 void gotoNextPhase(){
1028   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1029 }
1030
1031 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1032 {
1033   gotoNextPhase();
1034 }
1035
1036
1037 /// A mainchare that is used just to create our controlPointManager group at startup
1038 class controlPointMain : public CBase_controlPointMain {
1039 public:
1040   controlPointMain(CkArgMsg* args){
1041 #if OLDRANDSEED
1042     struct timeval tp;
1043     gettimeofday(& tp, NULL);
1044     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1045 #else
1046     double time = CmiWallTimer();
1047     int sec = (int)time;
1048     int usec = (int)((time - (double)sec)*1000000.0);
1049     random_seed =  sec ^ usec;
1050 #endif
1051     
1052     
1053     double period, periodms;
1054     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1055     bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
1056     if(haveSamplePeriod){
1057       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1058       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1059     } else if(haveSamplePeriodMs){
1060       CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
1061       controlPointSamplePeriod = periodms; /**< A readonly */
1062     } else {
1063       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1064     }
1065
1066   
1067     
1068     whichTuningScheme = RandomSelection;
1069
1070
1071     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1072       whichTuningScheme = RandomSelection;
1073     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1074       whichTuningScheme = ExhaustiveSearch;
1075     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1076       whichTuningScheme = AlwaysDefaults;
1077     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1078       whichTuningScheme = SimulatedAnnealing;
1079     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1080       whichTuningScheme = CriticalPathAutoPrioritization;
1081     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1082       whichTuningScheme = UseBestKnownTiming;
1083     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1084       whichTuningScheme = UseSteering;
1085     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1086       whichTuningScheme = MemoryAware;
1087     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1088       whichTuningScheme = Simplex;
1089     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1090       whichTuningScheme = DivideAndConquer;
1091     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1092       whichTuningScheme = LDBPeriod;
1093     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1094       whichTuningScheme = LDBPeriodLinear;
1095     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1096       whichTuningScheme = LDBPeriodQuadratic;
1097     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1098       whichTuningScheme = LDBPeriodOptimal;
1099     }
1100
1101     char *defValStr = NULL;
1102     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1103       CkPrintf("You specified default value string: %s\n", defValStr);
1104       
1105       // Parse the string, looking for commas
1106      
1107
1108       char *tok = strtok(defValStr, ",");
1109       while (tok) {
1110         // split on the equal sign
1111         int len = strlen(tok);
1112         char *eqsign = strchr(tok, '=');
1113         if(eqsign != NULL && eqsign != tok){
1114           *eqsign = '\0';
1115           char *cpname = tok;
1116           std::string cpName(tok);
1117           char *cpDefVal = eqsign+1;      
1118           int v=-1;
1119           if(sscanf(cpDefVal, "%d", &v) == 1){
1120             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1121             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1122             defaultControlPointValues[cpName] = v;
1123           }
1124         }
1125         tok = strtok(NULL, ",");
1126       }
1127
1128     }
1129
1130     shouldGatherAll = false;
1131     shouldGatherMemoryUsage = false;
1132     shouldGatherUtilization = false;
1133     
1134     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1135       shouldGatherAll = true;
1136     } else {
1137       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1138         shouldGatherMemoryUsage = true;
1139       }
1140       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1141         shouldGatherUtilization = true;
1142       }
1143     }
1144     
1145     writeDataFileAtShutdown = false;   
1146     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1147       writeDataFileAtShutdown = true;
1148     }
1149
1150     shouldFilterOutputData = true;
1151     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1152       shouldFilterOutputData = false;
1153     }
1154
1155
1156    loadDataFileAtStartup = false;   
1157     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1158       loadDataFileAtStartup = true;
1159     }
1160
1161     char *cpdatafile;
1162     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1163       sprintf(CPDataFilename, "%s", cpdatafile);
1164     } else {
1165       sprintf(CPDataFilename, "controlPointData.txt");
1166     }
1167
1168
1169     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1170   }
1171   ~controlPointMain(){}
1172 };
1173
1174 /// An interface callable by the application.
1175 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1176   CkAssert(CkMyPe() == 0);
1177   CkPrintf("Application has registered a control point change callback\n");
1178   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1179 }
1180
1181 /// An interface callable by the application.
1182 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1183   if(CkMyPe() == 0){
1184     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1185     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1186   }
1187 }
1188
1189 /// An interface callable by the application.
1190 void registerControlPointTiming(double time){
1191   CkAssert(CkMyPe() == 0);
1192 #if DEBUGPRINT>0
1193   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1194 #endif
1195   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1196 }
1197
1198 /// An interface callable by the application.
1199 void controlPointTimingStamp() {
1200   CkAssert(CkMyPe() == 0);
1201 #if DEBUGPRINT>0
1202   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1203 #endif
1204   
1205   static double prev_time = 0.0;
1206   double t = CmiWallTimer();
1207   double duration = t - prev_time;
1208   prev_time = t;
1209     
1210   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1211 }
1212
1213 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1214 {
1215   controlPointTimingStamp();
1216 }
1217
1218
1219 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value) 
1220 {
1221   setFrameworkAdvancePhase(*value);
1222 }
1223
1224
1225
1226
1227 /// Shutdown the control point framework, writing data to disk if necessary
1228 extern "C" void controlPointShutdown(){
1229   if(CkMyPe() == 0){
1230
1231     // wait for gathering of idle time & memory usage to complete
1232     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1233
1234   }
1235 }
1236
1237 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1238 void controlPointInitNode(){
1239 //  CkPrintf("controlPointInitNode()\n");
1240   registerExitFn(controlPointShutdown);
1241 }
1242
1243 /// Called periodically to allow control point framework to do things periodically
1244 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1245 #ifdef DEBUGPRINT
1246   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1247 #endif
1248   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1249   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1250 }
1251
1252
1253
1254
1255
1256 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1257 /// user observed characteristic to adapt specific control point values.
1258 /// This function must return valid values for newControlPoints.
1259 void controlPointManager::generatePlan() {
1260   const double startGenerateTime = CmiWallTimer();
1261   const int phase_id = this->phase_id;
1262   const int effective_phase = allData.phases.size();
1263
1264   // Only generate a plan once per phase
1265   if(generatedPlanForStep == phase_id) 
1266     return;
1267   generatedPlanForStep = phase_id;
1268  
1269   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1270   printTuningScheme();
1271
1272   // By default lets put the previous phase data into newControlPoints
1273   instrumentedPhase *prevPhase = previousPhaseData();
1274   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1275           newControlPoints[cpsIter->first] = cpsIter->second;
1276   }
1277
1278
1279   if( whichTuningScheme == RandomSelection ){
1280     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1281     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1282       const std::string &name = cpsIter->first;
1283       const std::pair<int,int> &bounds = cpsIter->second;
1284       const int lb = bounds.first;
1285       const int ub = bounds.second;
1286       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1287     }
1288   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1289     // -----------------------------------------------------------
1290     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1291     //   
1292     // This scheme will return the median value for the range 
1293     // early on, and then will transition over to the new control points
1294     // determined by the critical path adapting code
1295
1296     // This won't work until the periodic function is fixed up a bit
1297
1298     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1299     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1300       const std::string &name = cpsIter->first;
1301       const std::pair<int,int> &bounds = cpsIter->second;
1302       const int lb = bounds.first;
1303       const int ub = bounds.second;
1304       newControlPoints[name] =  (lb+ub)/2;
1305     }
1306
1307   } else if ( whichTuningScheme == MemoryAware ) {
1308
1309     // -----------------------------------------------------------
1310     //  STEERING BASED ON MEMORY USAGE
1311
1312     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1313     instrumentedPhase *prevPhase = previousPhaseData();
1314  
1315     if(phase_id%2 == 0){
1316       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1317       twoAgoPhase->print();
1318       CkPrintf("\n");
1319       fflush(stdout);
1320       
1321       // See if memory usage is low:
1322       double memUsage = twoAgoPhase->memoryUsageMB;
1323       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1324       fflush(stdout);
1325       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1326         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1327         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1328         
1329         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1330         newControlPoints = twoAgoPhase->controlPoints;
1331
1332
1333         CkPrintf("Steering (memory based) initialized plan\n");
1334         fflush(stdout);
1335
1336         // look for a possible control point knob to turn
1337         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1338         
1339         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1340         bool found = false;
1341         std::string cpName;
1342         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1343         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1344         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1345           cpName = iter->first;
1346           info = &iter->second;
1347           found = true;
1348           break;
1349         }
1350
1351         // Adapt the control point value
1352         if(found){
1353           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1354           fflush(stdout);
1355           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1356           const int maxValue = controlPointSpace[cpName].second;
1357           
1358           if(twoAgoValue+1 <= maxValue){
1359             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1360           }
1361         }
1362         
1363       }
1364     }
1365
1366     CkPrintf("Steering (memory based) done for this phase\n");
1367     fflush(stdout);
1368
1369   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1370
1371     // -----------------------------------------------------------
1372     //  USE BEST KNOWN TIME  
1373     static bool firstTime = true;
1374     if(firstTime){
1375       firstTime = false;
1376       instrumentedPhase *best = allData.findBest(); 
1377       CkPrintf("Best known phase is:\n");
1378       best->print();
1379       CkPrintf("\n");
1380       
1381       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1382       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1383         const std::string &name = cpsIter->first;
1384         newControlPoints[name] =  best->controlPoints[name];
1385       }
1386     }
1387   } else if( whichTuningScheme == LDBPeriod) {
1388     // Assume this is used in this manner:
1389     //  1) go to next phase
1390     //  2) request control point
1391     //  3) load balancing
1392     //  4) computation
1393     
1394     
1395     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1396     instrumentedPhase *prevPhase = previousPhaseData();
1397     
1398     
1399     const std::vector<double> &times = twoAgoPhase->times;
1400     const int oldNumTimings = times.size();
1401
1402
1403     const std::vector<double> &timesNew = prevPhase->times;
1404     const int newNumTimings = timesNew.size();
1405
1406
1407     if(oldNumTimings > 4 && newNumTimings > 4){
1408       
1409       // Build model of execution time based on two phases ago
1410       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1411       
1412       double oldSum = 0;
1413       
1414       for(int i=2; i<oldNumTimings; i++){
1415         oldSum += times[i];
1416       }
1417       
1418       const double oldAvg = oldSum / (oldNumTimings-2);
1419       
1420       
1421       
1422       
1423       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1424       const double expectedTotalTime = oldAvg * newNumTimings;
1425       
1426       
1427       // Measure actual time
1428       double newSum = 0.0;
1429       for(int i=2; i<newNumTimings; ++i){
1430         newSum += timesNew[i];
1431       }
1432       
1433       const double newAvg = newSum / (newNumTimings-2);
1434       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1435       
1436       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1437       
1438       // Determine load balance cost
1439       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1440       
1441       const double benefitAfterLB = benefit - lbcost;
1442     
1443     
1444       // Determine whether LB cost outweights the estimated benefit
1445       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1446     
1447     
1448       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1449       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1450         const std::string &name = cpsIter->first;
1451         const std::pair<int,int> &bounds = cpsIter->second;
1452         const int lb = bounds.first;
1453         const int ub = bounds.second;
1454       
1455         if(benefitAfterLB > 0){
1456           CkPrintf("Constant Model: Beneficial LB\n");
1457           int newval = newControlPoints[name] / 2;
1458           if(newval > lb)
1459             newControlPoints[name] = newval;
1460           else 
1461             newControlPoints[name] = lb;
1462         } else {
1463           CkPrintf("Constant Model: Detrimental LB\n");
1464           int newval = newControlPoints[name] * 2;
1465           if(newval < ub)
1466             newControlPoints[name] = newval;
1467           else
1468             newControlPoints[name] = ub;
1469         }
1470       }
1471     }
1472     
1473     
1474   }  else if( whichTuningScheme == LDBPeriodLinear) {
1475     // Assume this is used in this manner:
1476     //  1) go to next phase
1477     //  2) request control point
1478     //  3) load balancing
1479     //  4) computation
1480
1481
1482     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1483     instrumentedPhase *prevPhase = previousPhaseData();
1484     
1485     const std::vector<double> &times = twoAgoPhase->times;
1486     const int oldNumTimings = times.size();
1487
1488     const std::vector<double> &timesNew = prevPhase->times;
1489     const int newNumTimings = timesNew.size();
1490     
1491
1492     if(oldNumTimings > 4 && newNumTimings > 4){
1493
1494       // Build model of execution time based on two phases ago
1495       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1496       const int b1 = 2 + (oldNumTimings-2)/2;
1497       double s1 = 0;
1498       double s2 = 0;
1499     
1500       const double ldbStepsTime = times[0] + times[1];
1501     
1502       for(int i=2; i<b1; i++){
1503         s1 += times[i];
1504       }
1505       for(int i=b1; i<oldNumTimings; i++){
1506         s2 += times[i];
1507       }
1508       
1509       
1510       // Compute the estimated time for the last phase's data
1511     
1512       const double a1 = s1 / (double)(b1-2);
1513       const double a2 = s2 / (double)(oldNumTimings-b1);
1514       const double aold = (a1+a2)/2.0;
1515
1516       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1517         
1518     
1519       // Measure actual time
1520       double sum = 0.0;
1521       for(int i=2; i<newNumTimings; ++i){
1522         sum += timesNew[i];
1523       }
1524
1525       const double avg = sum / ((double)(newNumTimings-2));
1526       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1527
1528       const double benefit = expectedTotalTime - actualTotalTime;
1529
1530       // Determine load balance cost
1531       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1532
1533       const double benefitAfterLB = benefit - lbcost;
1534
1535     
1536       // Determine whether LB cost outweights the estimated benefit
1537       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1538     
1539     
1540     
1541       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1542       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1543         const std::string &name = cpsIter->first;
1544         const std::pair<int,int> &bounds = cpsIter->second;
1545         const int lb = bounds.first;
1546         const int ub = bounds.second;
1547       
1548         if(benefitAfterLB > 0){
1549           CkPrintf("Linear Model: Beneficial LB\n");
1550           int newval = newControlPoints[name] / 2;
1551           if(newval > lb)
1552             newControlPoints[name] = newval;
1553           else 
1554             newControlPoints[name] = lb;
1555         } else {
1556           CkPrintf("Linear Model: Detrimental LB\n");
1557           int newval = newControlPoints[name] * 2;
1558           if(newval < ub)
1559             newControlPoints[name] = newval;
1560           else 
1561             newControlPoints[name] = ub;
1562         }
1563       }
1564     }
1565
1566   }
1567
1568   else if( whichTuningScheme == LDBPeriodQuadratic) {
1569     // Assume this is used in this manner:
1570     //  1) go to next phase
1571     //  2) request control point
1572     //  3) load balancing
1573     //  4) computation
1574
1575
1576     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1577     instrumentedPhase *prevPhase = previousPhaseData();
1578         
1579     const std::vector<double> &times = twoAgoPhase->times;
1580     const int oldNumTimings = times.size();
1581
1582     const std::vector<double> &timesNew = prevPhase->times;
1583     const int newNumTimings = timesNew.size();
1584
1585     
1586     if(oldNumTimings > 4 && newNumTimings > 4){
1587
1588
1589       // Build model of execution time based on two phases ago
1590       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1591       const int b1 = 2 + (oldNumTimings-2)/3;
1592       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1593       double s1 = 0;
1594       double s2 = 0;
1595       double s3 = 0;
1596
1597       const double ldbStepsTime = times[0] + times[1];
1598     
1599       for(int i=2; i<b1; i++){
1600         s1 += times[i];
1601       }
1602       for(int i=b1; i<b2; i++){
1603         s2 += times[i];
1604       }
1605       for(int i=b2; i<oldNumTimings; i++){
1606         s3 += times[i];
1607       }
1608
1609     
1610       // Compute the estimated time for the last phase's data
1611     
1612       const double a1 = s1 / (double)(b1-2);
1613       const double a2 = s2 / (double)(b2-b1);
1614       const double a3 = s3 / (double)(oldNumTimings-b2);
1615     
1616       const double a = (a1-2.0*a2+a3)/2.0;
1617       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1618       const double c = a3;
1619     
1620       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1621       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1622       const double x2 = 0.5;
1623       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1624    
1625     
1626       // Measure actual time
1627       double sum = 0.0;
1628       for(int i=2; i<newNumTimings; ++i){
1629         sum += timesNew[i];
1630       }
1631
1632       const double avg = sum / ((double)(newNumTimings-2));
1633       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1634
1635       const double benefit = expectedTotalTime - actualTotalTime;
1636
1637       // Determine load balance cost
1638       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1639
1640       const double benefitAfterLB = benefit - lbcost;
1641
1642     
1643       // Determine whether LB cost outweights the estimated benefit
1644       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1645     
1646     
1647     
1648       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1649       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1650         const std::string &name = cpsIter->first;
1651         const std::pair<int,int> &bounds = cpsIter->second;
1652         const int lb = bounds.first;
1653         const int ub = bounds.second;
1654       
1655         if(benefitAfterLB > 0){
1656           CkPrintf("QuadraticModel: Beneficial LB\n");
1657           int newval = newControlPoints[name] / 2;
1658           if(newval > lb)
1659             newControlPoints[name] = newval;
1660           else 
1661             newControlPoints[name] = lb;
1662         } else {
1663           CkPrintf("QuadraticModel: Detrimental LB\n");
1664           int newval = newControlPoints[name] * 2;
1665           if(newval < ub)
1666             newControlPoints[name] = newval;
1667           else 
1668             newControlPoints[name] = ub;
1669         }
1670       
1671       }
1672     }
1673     
1674
1675   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1676     // Assume this is used in this manner:
1677     //  1) go to next phase
1678     //  2) request control point
1679     //  3) load balancing
1680     //  4) computation
1681
1682
1683
1684     instrumentedPhase *prevPhase = previousPhaseData();
1685     
1686     const std::vector<double> &times = prevPhase->times;
1687     const int numTimings = times.size();
1688     
1689     if( numTimings > 4){
1690
1691       const int b1 = 2 + (numTimings-2)/2;
1692       double s1 = 0;
1693       double s2 = 0;
1694     
1695     
1696       for(int i=2; i<b1; i++){
1697         s1 += times[i];
1698       }
1699       for(int i=b1; i<numTimings; i++){
1700         s2 += times[i];
1701       }
1702       
1703     
1704       const double a1 = s1 / (double)(b1-2);
1705       const double a2 = s2 / (double)(numTimings-b1);
1706       const double avg = (a1+a1) / 2.0;
1707
1708       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1709
1710       const double ldbStepsTime = times[0] + times[1];
1711       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1712       
1713
1714       int newval = roundDouble(sqrt(2.0*lbcost/m));
1715       
1716       // We don't really know what to do if m<=0, so we'll just double the period
1717       if(m<=0)
1718         newval = 2*numTimings;     
1719       
1720       CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1721     
1722     
1723       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1724       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1725         // TODO: lookup only control points that are relevant instead of all of them
1726         const std::string &name = cpsIter->first;
1727         const std::pair<int,int> &bounds = cpsIter->second;
1728         const int lb = bounds.first;
1729         const int ub = bounds.second;
1730         
1731         if(newval < lb){
1732           newControlPoints[name] = lb;
1733         } else if(newval > ub){
1734           newControlPoints[name] = ub;
1735         } else {
1736           newControlPoints[name] = newval;
1737         } 
1738         
1739       }
1740       
1741       
1742     }
1743     
1744  
1745
1746   } else if ( whichTuningScheme == UseSteering ) {
1747           // -----------------------------------------------------------
1748           //  STEERING BASED ON KNOWLEDGE
1749
1750           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1751           // plans are only generated after 3 phases
1752
1753           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1754           instrumentedPhase *prevPhase = previousPhaseData();
1755
1756           if(phase_id%4 == 0){
1757                   CkPrintf("Steering based on 2 phases ago:\n");
1758                   twoAgoPhase->print();
1759                   CkPrintf("\n");
1760                   fflush(stdout);
1761
1762                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1763
1764
1765                   // ========================================= Concurrency =============================================
1766                   // See if idle time is high:
1767                   {
1768                           double idleTime = twoAgoPhase->idleTime.avg;
1769                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1770                           fflush(stdout);
1771                           if(idleTime > 0.10){
1772                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1773                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1774
1775                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1776
1777                                   bool found = false;
1778                                   std::string cpName;
1779                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1780                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1781                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1782                                           cpName = iter->first;
1783                                           info = &iter->second;
1784
1785                                           // Initialize a new plan based on two phases ago
1786                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1787
1788                                           CkPrintf("Steering found knob to turn\n");
1789                                           fflush(stdout);
1790
1791                                           if(info->first == ControlPoint::EFF_INC){
1792                                                   const int maxValue = controlPointSpace[cpName].second;
1793                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1794                                                   if(twoAgoValue+1 <= maxValue){
1795                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1796                                                   }
1797                                           } else {
1798                                                   const int minValue = controlPointSpace[cpName].second;
1799                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1800                                                   if(twoAgoValue-1 >= minValue){
1801                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1802                                                   }
1803                                           }
1804
1805                                           possibleNextStepPlans.push_back(aNewPlan);
1806
1807                                   }
1808                           }
1809                   }
1810
1811                   // ========================================= Grain Size =============================================
1812                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1813                   {
1814                           double overheadTime = twoAgoPhase->overheadTime.avg;
1815                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1816                           fflush(stdout);
1817                           if(overheadTime > 0.10){
1818                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1819                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1820
1821                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1822
1823                                   bool found = false;
1824                                   std::string cpName;
1825                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1826                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1827                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1828                                           cpName = iter->first;
1829                                           info = &iter->second;
1830
1831                                           // Initialize a new plan based on two phases ago
1832                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1833
1834
1835
1836                                           CkPrintf("Steering found knob to turn\n");
1837                                           fflush(stdout);
1838
1839                                           if(info->first == ControlPoint::EFF_INC){
1840                                                   const int maxValue = controlPointSpace[cpName].second;
1841                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1842                                                   if(twoAgoValue+1 <= maxValue){
1843                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1844                                                   }
1845                                           } else {
1846                                                   const int minValue = controlPointSpace[cpName].second;
1847                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1848                                                   if(twoAgoValue-1 >= minValue){
1849                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1850                                                   }
1851                                           }
1852
1853                                           possibleNextStepPlans.push_back(aNewPlan);
1854
1855                                   }
1856
1857                           }
1858                   }
1859                   // ========================================= GPU Offload =============================================
1860                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1861                   {
1862                           double idleTime = twoAgoPhase->idleTime.avg;
1863                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1864                           fflush(stdout);
1865                           if(idleTime > 0.10){
1866                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1867                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1868
1869                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1870
1871                                   bool found = false;
1872                                   std::string cpName;
1873                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1874                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1875                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1876                                           cpName = iter->first;
1877                                           info = &iter->second;
1878
1879                                           // Initialize a new plan based on two phases ago
1880                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1881
1882
1883                                           CkPrintf("Steering found knob to turn\n");
1884                                           fflush(stdout);
1885
1886                                           if(info->first == ControlPoint::EFF_DEC){
1887                                                   const int maxValue = controlPointSpace[cpName].second;
1888                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1889                                                   if(twoAgoValue+1 <= maxValue){
1890                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1891                                                   }
1892                                           } else {
1893                                                   const int minValue = controlPointSpace[cpName].second;
1894                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1895                                                   if(twoAgoValue-1 >= minValue){
1896                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1897                                                   }
1898                                           }
1899
1900                                           possibleNextStepPlans.push_back(aNewPlan);
1901
1902                                   }
1903
1904                           }
1905                   }
1906
1907                   // ========================================= Done =============================================
1908
1909
1910                   if(possibleNextStepPlans.size() > 0){
1911                           newControlPoints = possibleNextStepPlans[0];
1912                   }
1913
1914
1915                   CkPrintf("Steering done for this phase\n");
1916                   fflush(stdout);
1917
1918           }  else {
1919                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1920                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1921                   fflush(stdout);
1922           }
1923
1924
1925
1926   } else if( whichTuningScheme == SimulatedAnnealing ) {
1927
1928     // -----------------------------------------------------------
1929     //  SIMULATED ANNEALING
1930     //  Simulated Annealing style hill climbing method
1931     //
1932     //  Find the best search space configuration, and try something
1933     //  nearby it, with a radius decreasing as phases increase
1934
1935     CkPrintf("Finding best phase\n");
1936     instrumentedPhase *bestPhase = allData.findBest();  
1937     CkPrintf("best found:\n"); 
1938     bestPhase->print(); 
1939     CkPrintf("\n"); 
1940     
1941
1942     const int convergeByPhase = 100;
1943     // Determine from 0.0 to 1.0 how far along we are
1944     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1945
1946     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1947     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1948       const std::string &name = cpsIter->first;
1949       const std::pair<int,int> &bounds = cpsIter->second;
1950       const int minValue = bounds.first;
1951       const int maxValue = bounds.second;
1952       
1953       const int before = bestPhase->controlPoints[name];   
1954   
1955       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1956
1957       int high = min(before+range, maxValue);
1958       int low = max(before-range, minValue);
1959       
1960       newControlPoints[name] = low;
1961       if(high-low > 0){
1962         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1963       } 
1964       
1965     }
1966
1967   } else if ( whichTuningScheme == DivideAndConquer ) {
1968
1969           // -----------------------------------------------------------
1970           //  STEERING FOR Divide & Conquer Programs
1971           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1972           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1973           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1974
1975           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1976           // plans are only generated after 3 phases
1977
1978           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1979           instrumentedPhase *prevPhase = previousPhaseData();
1980
1981           if(phase_id%4 == 0){
1982                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1983                   twoAgoPhase->print();
1984                   CkPrintf("\n");
1985                   fflush(stdout);
1986
1987                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1988
1989
1990                   // ========================================= Concurrency =============================================
1991                   // See if idle time is high:
1992                   {
1993                           double idleTime = twoAgoPhase->idleTime.avg;
1994                           double overheadTime = twoAgoPhase->overheadTime.avg;
1995
1996
1997                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1998                           fflush(stdout);
1999                           if(idleTime+overheadTime > 0.10){
2000                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
2001                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
2002
2003                                   int direction = -1;
2004                                   if (idleTime>overheadTime){
2005                                           // We need to decrease the grain size, or increase the available concurrency
2006                                           direction = 1;
2007                                   }
2008
2009                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
2010
2011                                   bool found = false;
2012                                   std::string cpName;
2013                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2014                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2015                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2016                                           cpName = iter->first;
2017                                           info = &iter->second;
2018                                           
2019                                           // Initialize a new plan based on two phases ago
2020                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2021
2022                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2023                                           fflush(stdout);
2024
2025                                           int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
2026                                           
2027                                           if(info->first == ControlPoint::EFF_INC){
2028                                             const int minValue = controlPointSpace[cpName].first;
2029                                             const int maxValue = controlPointSpace[cpName].second;
2030                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2031                                             const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);                                          
2032                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2033                                             aNewPlan[cpName] = newVal;
2034                                           } else {
2035                                             const int minValue = controlPointSpace[cpName].first;
2036                                             const int maxValue = controlPointSpace[cpName].second;
2037                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2038                                             const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
2039                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2040                                             aNewPlan[cpName] = newVal;
2041                                           }
2042                                           
2043                                           possibleNextStepPlans.push_back(aNewPlan);
2044                                   }
2045                           }
2046                   }
2047
2048                   if(possibleNextStepPlans.size() > 0){
2049                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2050                     newControlPoints = possibleNextStepPlans[0];
2051                   } else {
2052                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2053                   }
2054           }
2055
2056   } else if( whichTuningScheme == Simplex ) {
2057
2058           // -----------------------------------------------------------
2059           //  Nelder Mead Simplex Algorithm
2060           //
2061           //  A scheme that takes a simplex (n+1 points) and moves it
2062           //  toward the minimum, eventually converging there.
2063
2064           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2065
2066   } else if( whichTuningScheme == ExhaustiveSearch ) {
2067     
2068     // -----------------------------------------------------------
2069     // EXHAUSTIVE SEARCH
2070    
2071     int numDimensions = controlPointSpace.size();
2072     CkAssert(numDimensions > 0);
2073   
2074     vector<int> lowerBounds(numDimensions);
2075     vector<int> upperBounds(numDimensions); 
2076   
2077     int d=0;
2078     std::map<std::string, pair<int,int> >::iterator iter;
2079     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2080       //    CkPrintf("Examining dimension %d\n", d);
2081       lowerBounds[d] = iter->second.first;
2082       upperBounds[d] = iter->second.second;
2083       d++;
2084     }
2085    
2086     // get names for each dimension (control point)
2087     vector<std::string> names(numDimensions);
2088     d=0;
2089     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2090       names[d] = niter->first;
2091       d++;
2092     }
2093   
2094   
2095     // Create the first possible configuration
2096     vector<int> config = lowerBounds;
2097     config.push_back(0);
2098   
2099     // Increment until finding an unused configuration
2100     allData.cleanupNames(); // put -1 values in for any control points missing
2101     std::vector<instrumentedPhase*> &phases = allData.phases;     
2102
2103     while(true){
2104     
2105       std::vector<instrumentedPhase*>::iterator piter; 
2106       bool testedConfiguration = false; 
2107       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2108       
2109         // Test if the configuration matches this phase
2110         bool match = true;
2111         for(int j=0;j<numDimensions;j++){
2112           match &= (*piter)->controlPoints[names[j]] == config[j];
2113         }
2114       
2115         if(match){
2116           testedConfiguration = true; 
2117           break;
2118         } 
2119       } 
2120     
2121       if(testedConfiguration == false){ 
2122         break;
2123       } 
2124     
2125       // go to next configuration
2126       config[0] ++;
2127       // Propagate "carrys"
2128       for(int i=0;i<numDimensions;i++){
2129         if(config[i] > upperBounds[i]){
2130           config[i+1]++;
2131           config[i] = lowerBounds[i];
2132         }
2133       }
2134       // check if we have exhausted all possible values
2135       if(config[numDimensions] > 0){
2136         break;
2137       }
2138        
2139     }
2140   
2141     if(config[numDimensions] > 0){
2142       for(int i=0;i<numDimensions;i++){ 
2143         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2144       }
2145     }
2146
2147     // results are now in config[i]
2148     
2149     for(int i=0; i<numDimensions; i++){
2150       newControlPoints[names[i]] = config[i];
2151       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2152     }
2153
2154
2155   } else {
2156     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2157   }
2158
2159
2160   const double endGenerateTime = CmiWallTimer();
2161   
2162   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2163   
2164 }
2165
2166
2167
2168
2169
2170 /// Get control point value from range of integers [lb,ub]
2171 int controlPoint(const char *name, int lb, int ub){
2172   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2173   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2174   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2175   int result;
2176
2177   // if we already have control point values for phase, return them
2178   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2179     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2180     return thisPhaseData->controlPoints[std::string(name)];
2181   }
2182   
2183
2184   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2185     // For the first few phases, just use the lower bound, or the default if one was provided 
2186     // This ensures that the ranges for all the control points are known before we do anything fancy
2187     result = lb;
2188
2189     if(defaultControlPointValues.count(std::string(name)) > 0){
2190       int v = defaultControlPointValues[std::string(name)];
2191       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2192       result = v;
2193     }
2194
2195   } else if(controlPointSpace.count(std::string(name)) == 0){
2196     // if this is the first time we've seen the CP, then return the lower bound
2197     result = lb;
2198     
2199   }  else {
2200     // Generate a plan one time for each phase
2201     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2202     
2203     // Use the planned value:
2204     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2205     
2206   }
2207
2208   if(!isInRange(result,ub,lb)){
2209     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2210     fflush(stdout);
2211     fflush(stderr);
2212   }
2213   CkAssert(isInRange(result,ub,lb));
2214   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2215
2216   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2217
2218   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2219   //  thisPhaseData->print();
2220   
2221   return result;
2222 }
2223
2224
2225 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2226   CkAssert(CkMyPe() == 0);
2227   return controlPoint("FortranCP", *lb, *ub);
2228 }
2229
2230
2231
2232
2233 /// Inform the control point framework that a named control point affects the priorities of some array  
2234 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2235 //   CkGroupID aid = arraybase.ckGetArrayID();
2236 //   int groupIdx = aid.idx;
2237 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2238 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2239 // }
2240
2241
2242 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2243 // void controlPointPriorityEntry(const char *name, int idx){
2244 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2245 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2246 // }
2247
2248
2249
2250
2251
2252 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2253
2254     This function decomposes the algorithm into a state machine that allows it to
2255     evaluate one or more configurations through subsequent clls to this function.
2256     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2257
2258     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2259     so a list of them will be created and they will be evaluated, one per call.
2260
2261     Currently there is no stopping criteria, but the simplex ought to contract down
2262     to a few close configurations, and hence not much change will happen after this 
2263     point.
2264
2265  */
2266 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2267
2268         if(useBestKnown){
2269                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2270                 return;
2271         }
2272
2273
2274         if(firstSimplexPhase< 0){
2275                 firstSimplexPhase = allData.phases.size()-1;
2276                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2277         }
2278
2279         int n = controlPointSpace.size();
2280
2281         CkAssert(n>=2);
2282
2283
2284         if(simplexState == beginning){
2285                 // First we evaluate n+1 random points, then we go to a different state
2286                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2287                         CkPrintf("Simplex Tuning: chose random configuration\n");
2288
2289                         // Choose random values from the middle third of the range in each dimension
2290                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2291                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2292                                 const std::string &name = cpsIter->first;
2293                                 const std::pair<int,int> &bounds = cpsIter->second;
2294                                 const int lb = bounds.first;
2295                                 const int ub = bounds.second;
2296                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2297                         }
2298                 } else {
2299                         // Set initial simplex:
2300                         for(int i=0; i<n+1; i++){
2301                                 simplexIndices.insert(firstSimplexPhase+i);
2302                         }
2303                         CkAssert(simplexIndices.size() == n+1);
2304
2305                         // Transition to reflecting state
2306                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2307
2308                 }
2309
2310         } else if (simplexState == reflecting){
2311                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2312                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2313
2314                 // Find the highest time from other points in the simplex
2315                 double highestTimeForOthersInSimplex = 0.0;
2316                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2317                         double t = allData.phases[*iter]->medianTime();
2318                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2319                                 highestTimeForOthersInSimplex = t;
2320                         }
2321                 }
2322
2323                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2324
2325                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2326                         // if y* < yl,  transition to "expanding"
2327                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2328
2329                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2330                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2331                         CkAssert(simplexIndices.size() == n+1);
2332                         simplexIndices.erase(worstPhase);
2333                         CkAssert(simplexIndices.size() == n);
2334                         simplexIndices.insert(pPhase);
2335                         CkAssert(simplexIndices.size() == n+1);
2336                         // Transition to reflecting state
2337                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2338
2339                 } else {
2340                         // if y* > yh
2341                         if(recentPhaseTime <= worstTime){
2342                                 // replace Ph with P*
2343                                 CkAssert(simplexIndices.size() == n+1);
2344                                 simplexIndices.erase(worstPhase);
2345                                 CkAssert(simplexIndices.size() == n);
2346                                 simplexIndices.insert(pPhase);
2347                                 CkAssert(simplexIndices.size() == n+1);
2348                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2349                                 worstPhase = pPhase;
2350                                 // Just as a sanity check, make sure we don't use the non-updated values.
2351                                 worst.clear();
2352                         }
2353
2354                         // Now, form P** and do contracting phase
2355                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2356
2357                 }
2358
2359         } else if (simplexState == doneExpanding){
2360                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2361                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2362                 // A new configuration has been evaluated
2363
2364                 // Check to see if y** < y1
2365                 if(recentPhaseTime < bestTime){
2366                         // replace Ph by P**
2367                         CkAssert(simplexIndices.size() == n+1);
2368                         simplexIndices.erase(worstPhase);
2369                         CkAssert(simplexIndices.size() == n);
2370                         simplexIndices.insert(p2Phase);
2371                         CkAssert(simplexIndices.size() == n+1);
2372                 } else {
2373                         //      replace Ph by P*
2374                         CkAssert(simplexIndices.size() == n+1);
2375                         simplexIndices.erase(worstPhase);
2376                         CkAssert(simplexIndices.size() == n);
2377                         simplexIndices.insert(pPhase);
2378                         CkAssert(simplexIndices.size() == n+1);
2379                 }
2380
2381                 // Transition to reflecting state
2382                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2383
2384         }  else if (simplexState == contracting){
2385                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2386                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2387                 // A new configuration has been evaluated
2388
2389                 // Check to see if y** > yh
2390                 if(recentPhaseTime <= worstTime){
2391                         // replace Ph by P**
2392                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2393                         CkAssert(simplexIndices.size() == n+1);
2394                         simplexIndices.erase(worstPhase);
2395                         CkAssert(simplexIndices.size() == n);
2396                         simplexIndices.insert(p2Phase);
2397                         CkAssert(simplexIndices.size() == n+1);
2398                         // Transition to reflecting state
2399                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2400
2401                 } else {
2402                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2403                         simplexState = stillContracting;
2404
2405                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2406                         stillMustContractList = simplexIndices;
2407
2408                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2409                 }
2410
2411         } else if (simplexState == stillContracting){
2412                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2413
2414                 if(stillMustContractList.size()>0){
2415                         int c = *stillMustContractList.begin();
2416                         stillMustContractList.erase(c);
2417                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2418
2419                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2420
2421                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2422                         int v = 0;
2423                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2424                                 const std::string &name = cpsIter->first;
2425                                 const std::pair<int,int> &bounds = cpsIter->second;
2426                                 const int lb = bounds.first;
2427                                 const int ub = bounds.second;
2428
2429                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2430
2431                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2432                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2433                                 v++;
2434                         }
2435                 } else {
2436                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2437                         CkAssert(stillMustContractList.size() == 0);
2438                         simplexIndices.clear();
2439                         CkAssert(simplexIndices.size()==0);
2440                         for(int i=0; i<n+1; i++){
2441                                 simplexIndices.insert(allData.phases.size()-2-i);
2442                         }
2443                         CkAssert(simplexIndices.size()==n+1);
2444
2445                         // Transition to reflecting state
2446                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2447
2448                 }
2449
2450
2451         } else if (simplexState == expanding){
2452                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2453                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2454                 // A new configuration has been evaluated
2455
2456                 // determine if y** < yl
2457                 if(recentPhaseTime < bestTime){
2458                         // replace Ph by P**
2459                         CkAssert(simplexIndices.size() == n+1);
2460                         simplexIndices.erase(worstPhase);
2461                         CkAssert(simplexIndices.size() == n);
2462                         simplexIndices.insert(p2Phase);
2463                         CkAssert(simplexIndices.size() == n+1);
2464                         // Transition to reflecting state
2465                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2466
2467                 } else {
2468                         // else, replace ph with p*
2469                         CkAssert(simplexIndices.size() == n+1);
2470                         simplexIndices.erase(worstPhase);
2471                         CkAssert(simplexIndices.size() == n);
2472                         simplexIndices.insert(pPhase);
2473                         CkAssert(simplexIndices.size() == n+1);
2474                         // Transition to reflecting state
2475                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2476                 }
2477
2478
2479         } else {
2480                 CkAbort("Unknown simplexState");
2481         }
2482
2483 }
2484
2485
2486
2487 /** Replace the worst point with its reflection across the centroid. */
2488 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2489
2490         int n = controlPointSpace.size();
2491
2492         printSimplex(allData);
2493
2494         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2495
2496
2497         // Quit if the diameter of our simplex is small
2498         double maxr = 0.0;
2499         for(int i=0; i<n+1; i++){
2500                 //              Compute r^2 of this simplex point from the centroid
2501                 double r2 = 0.0;
2502                 std::vector<double> p = pointCoords(allData, i);
2503                 for(int d=0; d<p.size(); d++){
2504                         double r1 = (p[d] * centroid[d]);
2505                         r2 += r1*r1;
2506                 }
2507                 if(r2 > maxr)
2508                         maxr = r2;
2509         }
2510
2511 #if 0
2512         // At some point we quit this tuning
2513         if(maxr < 10){
2514                 useBestKnown = true;
2515                 instrumentedPhase *best = allData.findBest();
2516                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2517
2518                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2519                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2520                         const std::string &name = cpsIter->first;
2521                         newControlPoints[name] =  best->controlPoints[name];
2522                 }
2523         }
2524 #endif
2525
2526         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2527
2528         pPhase = allData.phases.size()-1;
2529         P.resize(n);
2530         for(int i=0; i<n; i++){
2531                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2532         }
2533
2534         for(int i=0; i<P.size(); i++){
2535                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2536         }
2537
2538
2539         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2540         int v = 0;
2541         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2542                 const std::string &name = cpsIter->first;
2543                 const std::pair<int,int> &bounds = cpsIter->second;
2544                 const int lb = bounds.first;
2545                 const int ub = bounds.second;
2546                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2547                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2548                 v++;
2549         }
2550
2551
2552         // Transition to "reflecting" state
2553         simplexState = reflecting;
2554         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2555
2556 }
2557
2558
2559
2560
2561 /** Replace the newly tested reflection with a further expanded version of itself. */
2562 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2563         int n = controlPointSpace.size();
2564         printSimplex(allData);
2565
2566         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2567         // I believe the equation for P** in the text on page 308 is correct.
2568
2569         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2570
2571
2572         p2Phase = allData.phases.size()-1;
2573         P2.resize(n);
2574         for(int i=0; i<n; i++){
2575                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2576         }
2577
2578         for(int i=0; i<P2.size(); i++){
2579                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2580         }
2581
2582
2583         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2584         int v = 0;
2585         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2586                 const std::string &name = cpsIter->first;
2587                 const std::pair<int,int> &bounds = cpsIter->second;
2588                 const int lb = bounds.first;
2589                 const int ub = bounds.second;
2590                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2591                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2592                 v++;
2593         }
2594
2595
2596         // Transition to "doneExpanding" state
2597         simplexState = doneExpanding;
2598         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2599
2600 }
2601
2602
2603
2604
2605 /** Replace the newly tested reflection with a further expanded version of itself. */
2606 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2607         int n = controlPointSpace.size();
2608         printSimplex(allData);
2609
2610         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2611
2612
2613         p2Phase = allData.phases.size()-1;
2614         P2.resize(n);
2615         for(int i=0; i<n; i++){
2616                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2617         }
2618
2619         for(int i=0; i<P2.size(); i++){
2620                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2621         }
2622
2623
2624         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2625         int v = 0;
2626         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2627                 const std::string &name = cpsIter->first;
2628                 const std::pair<int,int> &bounds = cpsIter->second;
2629                 const int lb = bounds.first;
2630                 const int ub = bounds.second;
2631                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2632                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2633                 v++;
2634         }
2635         
2636         
2637         // Transition to "contracting" state
2638         simplexState = contracting;
2639         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2640
2641 }
2642
2643
2644
2645
2646 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2647         int n = controlPointSpace.size();
2648
2649         // Find worst performing point in the simplex
2650         worstPhase = -1;
2651         worstTime = -1.0;
2652         bestPhase = 10000000;
2653         bestTime = 10000000;
2654         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2655                 double t = allData.phases[*iter]->medianTime();
2656                 if(t > worstTime){
2657                         worstTime = t;
2658                         worstPhase = *iter;
2659                 }
2660                 if(t < bestTime){
2661                         bestTime = t;
2662                         bestPhase = *iter;
2663                 }
2664         }
2665         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2666
2667         best = pointCoords(allData, bestPhase);
2668         CkAssert(best.size() == n);
2669
2670         worst = pointCoords(allData, worstPhase);
2671         CkAssert(worst.size() == n);
2672
2673         // Calculate centroid of the remaining points in the simplex
2674         centroid.resize(n);
2675         for(int i=0; i<n; i++){
2676                 centroid[i] = 0.0;
2677         }
2678
2679         int numPts = 0;
2680
2681         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2682                 if(*iter != worstPhase){
2683                         numPts ++;
2684                         // Accumulate into the result vector
2685                         int c = 0;
2686                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2687                                 centroid[c] += citer->second;
2688                                 c++;
2689                         }
2690
2691                 }
2692         }
2693
2694         // Now divide the sums by the number of points.
2695         for(int v = 0; v<centroid.size(); v++) {
2696                 centroid[v] /= (double)numPts;
2697         }
2698
2699         CkAssert(centroid.size() == n);
2700
2701         for(int i=0; i<centroid.size(); i++){
2702                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2703         }
2704
2705
2706 }
2707
2708
2709
2710 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2711         std::vector<double> result;
2712         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2713                 result.push_back((double)citer->second);
2714         }
2715         return result;
2716 }
2717
2718
2719
2720
2721 void ControlPointWriteOutputToDisk(){
2722         CkAssert(CkMyPe() == 0);
2723         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2724 }
2725
2726
2727
2728 /*! @} */
2729
2730 #include "ControlPoints.def.h"
2731
2732 #endif