fixed a bug in CkExit() with projections which has end_execute without matching begin...
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4
5 #include "controlPoints.h"
6 #include "trace-controlPoints.h"
7 #include "LBDatabase.h"
8 #include "controlPoints.h"
9 #include "charm++.h"
10 #include "trace-projections.h"
11 #include <pathHistory.h>
12 #include "cp_effects.h"
13 #include <iostream>
14 #include <math.h>
15 #include <climits>
16
17 #if CMK_WITH_CONTROLPOINT
18
19 #define roundDouble(x)        ((long)(x+0.5))
20 #define myAbs(x)   (((x)>=0.0)?(x):(-1.0*(x)))
21 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
22
23 inline double closestInRange(double v, double a, double b){
24   return (v<a) ? a : ((v>b)?b:v);
25 }
26
27
28 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
29  
30
31 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
32 /**  @{ */
33
34 using namespace std;
35
36 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
37
38
39 //#undef DEBUGPRINT
40 //#define DEBUGPRINT 4
41
42 static void periodicProcessControlPoints(void* ptr, double currWallTime);
43
44
45 // A pointer to this PE's controlpoint manager Proxy
46 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
47 /* readonly */ int random_seed;
48 /* readonly */ long controlPointSamplePeriod;
49 /* readonly */ int whichTuningScheme;
50 /* readonly */ bool writeDataFileAtShutdown;
51 /* readonly */ bool shouldFilterOutputData;
52 /* readonly */ bool loadDataFileAtStartup;
53 /* readonly */ bool shouldGatherMemoryUsage;
54 /* readonly */ bool shouldGatherUtilization;
55 /* readonly */ bool shouldGatherAll;
56 /* readonly */ char CPDataFilename[512];
57
58 extern bool enableCPTracing;
59
60 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
61 /// These probably come from the command line arguments, so are available only on PE 0
62 std::map<std::string, int> defaultControlPointValues;
63
64
65
66 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
67
68
69
70 void printTuningScheme(){
71   switch(whichTuningScheme){
72   case RandomSelection:
73     CkPrintf("Tuning Scheme: RandomSelection\n");
74     break;
75   case AlwaysDefaults:
76     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
77     break;
78   case SimulatedAnnealing:
79     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
80     break;
81   case ExhaustiveSearch:
82     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
83     break;
84   case CriticalPathAutoPrioritization:
85     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
86     break;
87   case UseBestKnownTiming:
88     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
89     break;
90   case UseSteering:
91     CkPrintf("Tuning Scheme: UseSteering\n");
92     break;
93   case MemoryAware:
94     CkPrintf("Tuning Scheme: MemoryAware\n");
95     break;
96   case Simplex:
97     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
98     break;
99   case DivideAndConquer:
100     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
101     break;
102   case LDBPeriod:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
104     break;
105   case LDBPeriodLinear:
106     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
107     break;
108   case LDBPeriodQuadratic:
109     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
110     break;
111   case LDBPeriodOptimal:
112     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
113     break;
114   default:
115     CkPrintf("Unknown tuning scheme\n");
116     break;
117   }
118   fflush(stdout);
119 }
120
121
122
123 /// A reduction type that combines idle time measurements (min/sum/max etc.)
124 CkReduction::reducerType idleTimeReductionType;
125 /// A reducer that combines idle time measurements (min/sum/max etc.)
126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
127   double ret[3];
128   if(nMsg > 0){
129     CkAssert(msgs[0]->getSize()==3*sizeof(double));
130     double *m=(double *)msgs[0]->getData();
131     ret[0]=m[0];
132     ret[1]=m[1];
133     ret[2]=m[2];
134   }
135   for (int i=1;i<nMsg;i++) {
136     CkAssert(msgs[i]->getSize()==3*sizeof(double));
137     double *m=(double *)msgs[i]->getData();
138     ret[0]=min(ret[0],m[0]);
139     ret[1]+=m[1];
140     ret[2]=max(ret[2],m[2]);
141   }  
142   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
143 }
144
145
146
147 /// A reduction type that combines idle, overhead, and memory measurements
148 CkReduction::reducerType allMeasuresReductionType;
149 /// A reducer that combines idle, overhead, and memory measurements
150 #define ALL_REDUCTION_SIZE 12
151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
152   double ret[ALL_REDUCTION_SIZE];
153   if(nMsg > 0){
154     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
155     double *m=(double *)msgs[0]->getData();
156     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
157   }
158   for (int i=1;i<nMsg;i++) {
159     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
160     double *m=(double *)msgs[i]->getData();
161     // idle time (min/sum/max)
162     ret[0]=min(ret[0],m[0]);
163     ret[1]+=m[1];
164     ret[2]=max(ret[2],m[2]);
165     // overhead time (min/sum/max)
166     ret[3]=min(ret[3],m[3]);
167     ret[4]+=m[4];
168     ret[5]=max(ret[5],m[5]);
169     // mem usage (max)
170     ret[6]=max(ret[6],m[6]);
171     // bytes per invocation for two types of entry methods
172     ret[7]+=m[7];
173     ret[8]+=m[8];
174     ret[9]+=m[9];
175     ret[10]+=m[10];
176     // Grain size (avg)
177     ret[11]+=m[11];
178   }  
179   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
180 }
181
182
183 /// Registers the control point framework's reduction handlers at startup on each PE
184 /*initproc*/ void registerCPReductions(void) {
185   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
186   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
187 }
188
189
190
191
192
193
194 /// Return an integer between 0 and num-1 inclusive
195 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
197   CkAssert(num > 0);
198
199   unsigned long hash = 0;
200   unsigned int c;
201   unsigned char * str = (unsigned char*)name;
202
203   while (c = *str++){
204     unsigned int c2 = (c+64)%128;
205     unsigned int c3 = (c2*5953)%127;
206     hash = c3 + (hash << 6) + (hash << 16) - hash;
207   }
208
209   unsigned long shuffled1 = (hash*2083)%7907;
210   unsigned long shuffled2 = (seed*4297)%2017;
211
212   unsigned long shuffled3 = (random_seed*4799)%7919;
213
214   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
215
216   unsigned int result = ((namehash * 6029) % 1117) % num;
217
218   CkAssert(result >=0 && result < num);
219   return result;
220 }
221
222
223
224 controlPointManager::controlPointManager() {
225   generatedPlanForStep = -1;
226
227     exitWhenReady = false;
228     alreadyRequestedMemoryUsage = false;   
229     alreadyRequestedIdleTime = false;
230     alreadyRequestedAll = false;
231     
232     instrumentedPhase * newPhase = new instrumentedPhase();
233     allData.phases.push_back(newPhase);   
234     
235     frameworkShouldAdvancePhase = false;
236     haveControlPointChangeCallback = false;
237 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
238     
239     phase_id = 0;
240
241     if(loadDataFileAtStartup){    
242       loadDataFile();
243     }
244
245     
246     if(CkMyPe() == 0){
247       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
248     }
249
250     traceRegisterUserEvent("No currently executing message", 5000);
251     traceRegisterUserEvent("Zero time along critical path", 5010);
252     traceRegisterUserEvent("Positive total time along critical path", 5020);
253     traceRegisterUserEvent("env->setPathHistory()", 6000);
254     traceRegisterUserEvent("Critical Path", 5900);
255     traceRegisterUserEvent("Table Entry", 5901);
256
257
258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
259     traceRegisterUserEvent("registerTerminalPath", 100); 
260 #endif
261
262   }
263   
264
265  controlPointManager::~controlPointManager(){
266 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
267   }
268
269   void controlPointManager::pup(PUP::er &p)
270   {
271     CBase_controlPointManager::pup(p);
272       // FIXME: does not work when control point is actually used,
273       // just minimal pup so that it allows exit function to work (exitIfReady).
274     p|generatedPlanForStep;
275     p|exitWhenReady;
276     p|alreadyRequestedMemoryUsage;
277     p|alreadyRequestedIdleTime;
278     p|alreadyRequestedAll;
279     p|frameworkShouldAdvancePhase;
280     p|haveControlPointChangeCallback;
281     p|phase_id;
282   }
283   
284
285   /// Loads the previous run data file
286   void controlPointManager::loadDataFile(){
287     ifstream infile(CPDataFilename);
288     vector<std::string> names;
289     std::string line;
290   
291     while(getline(infile,line)){
292       if(line[0] != '#')
293         break;
294     }
295   
296     int numTimings = 0;
297     std::istringstream n(line);
298     n >> numTimings;
299   
300     while(getline(infile,line)){ 
301       if(line[0] != '#') 
302         break; 
303     }
304
305     int numControlPointNames = 0;
306     std::istringstream n2(line);
307     n2 >> numControlPointNames;
308   
309     for(int i=0; i<numControlPointNames; i++){
310       getline(infile,line);
311       names.push_back(line);
312     }
313
314     for(int i=0;i<numTimings;i++){
315       getline(infile,line);
316       while(line[0] == '#')
317         getline(infile,line); 
318
319       instrumentedPhase * ips = new instrumentedPhase();
320
321       std::istringstream iss(line);
322
323       // Read memory usage for phase
324       iss >> ips->memoryUsageMB;
325       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
326
327       // Read idle time
328       iss >> ips->idleTime.min;
329       iss >> ips->idleTime.avg;
330       iss >> ips->idleTime.max;
331
332       // Read overhead time
333       iss >> ips->overheadTime.min;
334       iss >> ips->overheadTime.avg;
335       iss >> ips->overheadTime.max;
336
337       // read bytePerInvoke
338       iss >> ips->bytesPerInvoke;
339
340       // read grain size
341       iss >> ips->grainSize;
342
343       // Read control point values
344       for(int cp=0;cp<numControlPointNames;cp++){
345         int cpvalue;
346         iss >> cpvalue;
347         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
348       }
349
350       // ignore median time
351       double mt;
352       iss >> mt;
353
354       // Read all times
355
356       double time;
357
358       while(iss >> time){
359         ips->times.push_back(time);
360 #if DEBUGPRINT > 5
361         CkPrintf("read time %lf from file\n", time);
362 #endif
363       }
364
365       allData.phases.push_back(ips);
366
367     }
368
369     infile.close();
370   }
371
372
373
374   /// Add the current data to allData and output it to a file
375   void controlPointManager::writeDataFile(){
376     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
377     ofstream outfile(CPDataFilename);
378     allData.cleanupNames();
379
380 //    string s = allData.toString();
381 //    CkPrintf("At end: \n %s\n", s.c_str());
382
383     if(shouldFilterOutputData){
384       allData.verify();
385       allData.filterOutIncompletePhases();
386     }
387
388 //    string s2 = allData.toString();
389 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
390     if(allData.toString().length() > 10){
391       outfile << allData.toString();
392     } else {
393       outfile << " No data available to save to disk " << endl;
394     }
395     outfile.close();
396   }
397
398   /// User can register a callback that is called when application should advance to next phase
399   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
400     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
401     controlPointChangeCallback = cb;
402     haveControlPointChangeCallback = true;
403   }
404
405
406 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
407 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
408   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
409 }
410
411   /// Called periodically by the runtime to handle the control points
412   /// Currently called on each PE
413   void controlPointManager::processControlPoints(){
414
415 #if DEBUGPRINT
416     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
417 #endif
418
419     //==========================================================================================
420     // Print the data for each phase
421
422     const int s = allData.phases.size();
423
424 #if DEBUGPRINT
425     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
426     for(int p=0;p<s;++p){
427       const instrumentedPhase &phase = allData.phases[p];
428       const idleTimeContainer &idle = phase.idleTime;
429       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
430       vector<double> const &times = phase.times;
431
432       CkPrintf("Phase %d:\n", p);
433       idle.print();
434      //   CkPrintf("critical paths: (* affected by control point)\n");
435         //   for(int i=0;i<criticalPaths.size(); i++){
436         // If affected by a control point
437         //      criticalPaths[i].print();
438
439       //        criticalPaths[i].print();
440       //        CkPrintf("\n");
441         
442
443       //   }
444       CkPrintf("Timings:\n");
445       for(int i=0;i<times.size(); i++){
446         CkPrintf("%lf ", times[i]);
447       }
448       CkPrintf("\n");
449
450     }
451     
452     CkPrintf("\n\n");
453
454
455 #endif
456
457
458
459     //==========================================================================================
460     // If this is a phase during which we try to adapt control point values based on critical path
461 #if 0
462     if( s%5 == 4) {
463
464       // Find the most recent phase with valid critical path data and idle time measurements      
465       int whichPhase=-1;
466       for(int p=0; p<s; ++p){
467         const instrumentedPhase &phase = allData.phases[p];
468         const idleTimeContainer &idle = phase.idleTime;
469         if(idle.isValid() && phase.criticalPaths.size()>0 ){
470           whichPhase = p;
471         }
472       }
473       
474       
475       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
476       const instrumentedPhase &phase = allData.phases[whichPhase];
477       const idleTimeContainer &idle = phase.idleTime;
478       
479       if(idle.min > 0.1){
480         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
481         
482         // Determine the median critical path for this phase
483         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
484         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
485         CkAssert(phase.criticalPaths.size() > 0);
486         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
487         
488         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
489         
490         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
491           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
492
493
494           CkPrintf("Finding control points related to the critical path\n");
495           int cpcount = 0;
496           std::set<std::string> controlPointsAffectingCriticalPath;
497
498           
499           for(int e=0;e<path.getNumUsed();e++){
500             if(path.getUsedCount(e)>0){
501               int ep = path.getUsedEp(e);
502
503               std::map<std::string, std::set<int> >::iterator iter;
504               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
505                 if(iter->second.count(ep)>0){
506                   controlPointsAffectingCriticalPath.insert(iter->first);
507                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
508                   cpcount++;
509                 }
510               }
511               
512             }
513           }
514           
515
516           if(cpcount==0){
517             CkPrintf("No control points are known to affect the critical path\n");
518           } else {
519             double beginTime = CmiWallTimer();
520
521             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
522             
523             newControlPoints = phase.controlPoints;
524             
525             if(frameworkShouldAdvancePhase){
526               gotoNextPhase();  
527             }
528             
529             if(haveControlPointChangeCallback){ 
530 #if DEBUGPRINT
531               CkPrintf("Calling control point change callback\n");
532 #endif
533               // Create a high priority message and send it to the callback
534               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
535               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
536               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
537               controlPointChangeCallback.send(msg);
538               
539             }
540             
541             
542             // adjust the control points that can affect the critical path
543
544             char textDescription[4096*2];
545             textDescription[0] = '\0';
546
547             std::map<std::string,int>::iterator newCP;
548             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
549               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
550                 // decrease the value (increase priority) if within range
551                 int lowerbound = controlPointSpace[newCP->first].first;
552                 if(newCP->second > lowerbound){
553                   newControlPointsAvailable = true;
554                   newCP->second --;
555                 }
556               }
557             }
558            
559             // Create a string for a projections user event
560             if(1){
561               std::map<std::string,int>::iterator newCP;
562               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
563                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
564               }
565             }
566
567             char *userEventString = new char[4096*2];
568             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
569             
570             static int userEventCounter = 20000;
571             CkPrintf("USER EVENT: %s\n", userEventString);
572             
573             traceRegisterUserEvent(userEventString, userEventCounter); 
574             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
575             userEventCounter++;
576             
577           }
578           
579         } else {
580           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
581         }
582         
583         
584       }
585       
586     } else {
587       
588       
589     }
590    
591 #endif
592
593
594
595     if(frameworkShouldAdvancePhase){
596       gotoNextPhase();  
597     }
598     
599     if(haveControlPointChangeCallback){ 
600       // Create a high priority message and send it to the callback
601       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
602       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
603       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
604       controlPointChangeCallback.send(msg);
605     }
606     
607   }
608   
609   /// Determine if any control point is known to affect an entry method
610   bool controlPointManager::controlPointAffectsThisEP(int ep){
611     std::map<std::string, std::set<int> >::iterator iter;
612     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
613       if(iter->second.count(ep)>0){
614         return true;
615       }
616     }
617     return false;    
618   }
619   
620   /// Determine if any control point is known to affect a chare array  
621   bool controlPointManager::controlPointAffectsThisArray(int array){
622     std::map<std::string, std::set<int> >::iterator iter;
623     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
624       if(iter->second.count(array)>0){
625         return true;
626       }
627     }
628     return false;   
629   }
630   
631
632   /// The data from the current phase
633   instrumentedPhase * controlPointManager::currentPhaseData(){
634     int s = allData.phases.size();
635     CkAssert(s>=1);
636     return allData.phases[s-1];
637   }
638  
639
640   /// The data from the previous phase
641   instrumentedPhase * controlPointManager::previousPhaseData(){
642     int s = allData.phases.size();
643     if(s >= 2 && phase_id > 0) {
644       return allData.phases[s-2];
645     } else {
646       return NULL;
647     }
648   }
649  
650   /// The data from two phases back
651   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
652     int s = allData.phases.size();
653     if(s >= 3 && phase_id > 0) {
654       return allData.phases[s-3];
655     } else {
656       return NULL;
657     }
658   }
659   
660
661   /// Called by either the application or the Control Point Framework to advance to the next phase  
662   void controlPointManager::gotoNextPhase(){
663     CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
664     fflush(stdout);
665       
666     if(enableCPTracing){
667       if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
668         alreadyRequestedAll = true;
669         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
670         CkPrintf("Requesting all measurements\n");
671         thisProxy.requestAll(*cb);
672         delete cb;
673         
674       } else {
675         
676         if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
677           alreadyRequestedMemoryUsage = true;
678           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
679           thisProxy.requestMemoryUsage(*cb);
680           delete cb;
681         }
682         
683         if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
684           alreadyRequestedIdleTime = true;
685           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
686           thisProxy.requestIdleTime(*cb);
687           delete cb;
688         }
689       }
690     }
691
692
693
694 #if CMK_LBDB_ON && 0
695     LBDatabase * myLBdatabase = LBDatabaseObj();
696     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
697     const CkVec<LBObj*> objs = myLBDB->getObjs();
698     const int objCount = myLBDB->getObjCount();
699     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
700     
701     LBRealType maxObjWallTime = -1.0;
702     
703     for(int i=0;i<objs.length();i++){
704       LBObj* o = objs[i];
705       const LDObjData d = o->ObjData();
706       LBRealType cpuTime = d.cpuTime;
707       LBRealType wallTime = d.wallTime;
708       // can also get object handles from the LDObjData struct
709       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
710       if(wallTime > maxObjWallTime){
711
712       }
713       
714     }
715
716     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
717     
718 #endif    
719
720
721     
722     // increment phase id
723     phase_id++;
724     
725
726     // Create new entry for the phase we are starting now
727     instrumentedPhase * newPhase = new instrumentedPhase();
728     allData.phases.push_back(newPhase);
729     
730     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
731
732   }
733
734   /// An application uses this to register an instrumented timing for this phase
735   void controlPointManager::setTiming(double time){
736     currentPhaseData()->times.push_back(time);
737
738 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
739        
740     // First we should register this currently executing message as a path, because it is likely an important one to consider.
741     //    registerTerminalEntryMethod();
742     
743     // save the critical path for this phase
744     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
745     //    maxTerminalPathHistory.reset();
746     
747     
748     // Reset the counts for the currently executing message
749     //resetThisEntryPath();
750         
751 #endif
752     
753   }
754   
755   /// Entry method called on all PEs to request CPU utilization statistics
756   void controlPointManager::requestIdleTime(CkCallback cb){
757     CkAssert(enableCPTracing);
758    
759     double i = localControlPointTracingInstance()->idleRatio();
760     double idle[3];
761     idle[0] = i;
762     idle[1] = i;
763     idle[2] = i;
764     
765     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
766     
767     localControlPointTracingInstance()->resetTimings();
768     
769     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
770   }
771   
772   /// All processors reduce their memory usages in requestIdleTime() to this method
773   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
774     CkAssert(enableCPTracing);
775
776     int size=msg->getSize() / sizeof(double);
777     CkAssert(size==3);
778     double *r=(double *) msg->getData();
779         
780     instrumentedPhase* prevPhase = previousPhaseData();
781     if(prevPhase != NULL){
782       prevPhase->idleTime.min = r[0];
783       prevPhase->idleTime.avg = r[1]/CkNumPes();
784       prevPhase->idleTime.max = r[2];
785       prevPhase->idleTime.print();
786       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
787     } else {
788       CkPrintf("There is no previous phase to store the idle time measurements\n");
789     }
790     
791     alreadyRequestedIdleTime = false;
792     checkForShutdown();
793     delete msg;
794   }
795
796
797
798
799
800
801   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
802   void controlPointManager::requestAll(CkCallback cb){
803     CkAssert(enableCPTracing);
804
805     TraceControlPoints *t = localControlPointTracingInstance();
806
807     double data[ALL_REDUCTION_SIZE];
808
809     double *idle = data;
810     double *over = data+3;
811     double *mem = data+6;
812     double *msgBytes = data+7;  
813     double *grainsize = data+11;  
814
815     const double i = t->idleRatio();
816     idle[0] = i;
817     idle[1] = i;
818     idle[2] = i;
819
820     const double o = t->overheadRatio();
821     over[0] = o;
822     over[1] = o;
823     over[2] = o;
824
825     const double m = t->memoryUsageMB();
826     mem[0] = m;
827
828     msgBytes[0] = t->b2;
829     msgBytes[1] = t->b3;
830     msgBytes[2] = t->b2mlen;
831     msgBytes[3] = t->b3mlen;
832
833     grainsize[0] = t->grainSize();
834     
835     localControlPointTracingInstance()->resetAll();
836
837     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
838   }
839   
840   /// All processors reduce their memory usages in requestIdleTime() to this method
841   void controlPointManager::gatherAll(CkReductionMsg *msg){
842     CkAssert(enableCPTracing);
843
844     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
845     int size=msg->getSize() / sizeof(double);
846     double *data=(double *) msg->getData();
847         
848     double *idle = data;
849     double *over = data+3;
850     double *mem = data+6;
851     double *msgBytes = data+7;
852     double *granularity = data+11;
853
854
855     //    std::string b = allData.toString();
856
857     instrumentedPhase* prevPhase = previousPhaseData();
858     if(prevPhase != NULL){
859       prevPhase->idleTime.min = idle[0];
860       prevPhase->idleTime.avg = idle[1]/CkNumPes();
861       prevPhase->idleTime.max = idle[2];
862       
863       prevPhase->overheadTime.min = over[0];
864       prevPhase->overheadTime.avg = over[1]/CkNumPes();
865       prevPhase->overheadTime.max = over[2];
866       
867       prevPhase->memoryUsageMB = mem[0];
868       
869       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
870
871       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
872       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
873
874       /** The average of the grain sizes on all PEs in us */
875       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
876
877       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
878       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
879
880       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
881       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
882
883       if(bytesPerInvoke2 > bytesPerInvoke3)
884         prevPhase->bytesPerInvoke = bytesPerInvoke2;
885       else
886         prevPhase->bytesPerInvoke = bytesPerInvoke3;
887
888       //      prevPhase->print();
889       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
890
891       //      std::string a = allData.toString();
892
893       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
894       
895     } else {
896       CkPrintf("There is no previous phase to store measurements\n");
897     }
898     
899     alreadyRequestedAll = false;
900     checkForShutdown();
901     delete msg;
902   }
903
904
905
906
907   void controlPointManager::checkForShutdown(){
908     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
909       doExitNow();
910     }
911   }
912
913
914   void controlPointManager::exitIfReady(){
915      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
916        //  CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
917        doExitNow();
918      } else {
919        // CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
920        exitWhenReady = true;
921      }
922   }
923
924
925
926   void controlPointManager::doExitNow(){
927           _TRACE_BEGIN_EXECUTE_DETAILED(-1, -1, _threadEP,CkMyPe(), 0, NULL);
928           writeOutputToDisk();
929           //      CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
930           CkExit();
931   }
932
933   void controlPointManager::writeOutputToDisk(){
934           if(writeDataFileAtShutdown){
935                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
936           }
937   }
938
939
940   /// Entry method called on all PEs to request memory usage
941   void controlPointManager::requestMemoryUsage(CkCallback cb){
942     int m = CmiMaxMemoryUsage() / 1024 / 1024;
943     CmiResetMaxMemory();
944     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
945     contribute(sizeof(int),&m,CkReduction::max_int, cb);
946   }
947
948   /// All processors reduce their memory usages to this method
949   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
950     int size=msg->getSize() / sizeof(int);
951     CkAssert(size==1);
952     int *m=(int *) msg->getData();
953
954     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
955     
956     instrumentedPhase* prevPhase = previousPhaseData();
957     if(prevPhase != NULL){
958       prevPhase->memoryUsageMB = m[0];
959     } else {
960       CkPrintf("No place to store memory usage");
961     }
962
963     alreadyRequestedMemoryUsage = false;
964     checkForShutdown();
965     delete msg;
966   }
967
968
969 //   /// Inform the control point framework that a named control point affects the priorities of some array  
970 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
971 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
972     
973 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
974 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
975 //     } else {
976 //       std::set<int> s;
977 //       s.insert(groupIdx);
978 //       affectsPrioritiesArray[std::string(name)] = s;
979 //     }
980     
981 // #if DEBUGPRINT   
982 //     std::map<std::string, std::set<int> >::iterator f;
983 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
984 //       std::string name = f->first;
985 //       std::set<int> &vals = f->second;
986 //       cout << "Control point " << name << " affects arrays: ";
987 //       std::set<int>::iterator i;
988 //       for(i=vals.begin(); i!=vals.end();++i){
989 //      cout << *i << " ";
990 //       }
991 //       cout << endl;
992 //     }
993 // #endif
994     
995 //   }
996   
997 //   /// Inform the control point framework that a named control point affects the priority of some entry method
998 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
999 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
1000
1001 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
1002 //       affectsPrioritiesEP[std::string(name)].insert(idx);
1003 //     } else {
1004 //       std::set<int> s;
1005 //       s.insert(idx);
1006 //       affectsPrioritiesEP[std::string(name)] = s;
1007 //     }
1008     
1009 // #if DEBUGPRINT
1010 //     std::map<std::string, std::set<int> >::iterator f;
1011 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1012 //       std::string name = f->first;
1013 //       std::set<int> &vals = f->second;
1014 //       cout << "Control point " << name << " affects EP: ";
1015 //       std::set<int>::iterator i;
1016 //       for(i=vals.begin(); i!=vals.end();++i){
1017 //      cout << *i << " ";
1018 //       }
1019 //       cout << endl;
1020 //     }
1021 // #endif
1022
1023
1024 //   }
1025   
1026
1027
1028 /// An interface callable by the application.
1029 void gotoNextPhase(){
1030   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1031 }
1032
1033 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1034 {
1035   gotoNextPhase();
1036 }
1037
1038
1039 /// A mainchare that is used just to create our controlPointManager group at startup
1040 class controlPointMain : public CBase_controlPointMain {
1041 public:
1042   controlPointMain(CkArgMsg* args){
1043 #if OLDRANDSEED
1044     struct timeval tp;
1045     gettimeofday(& tp, NULL);
1046     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1047 #else
1048     double time = CmiWallTimer();
1049     int sec = (int)time;
1050     int usec = (int)((time - (double)sec)*1000000.0);
1051     random_seed =  sec ^ usec;
1052 #endif
1053     
1054     
1055     double period, periodms;
1056     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1057     bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
1058     if(haveSamplePeriod){
1059       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1060       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1061     } else if(haveSamplePeriodMs){
1062       CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
1063       controlPointSamplePeriod = periodms; /**< A readonly */
1064     } else {
1065       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1066     }
1067
1068   
1069     
1070     whichTuningScheme = RandomSelection;
1071
1072
1073     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1074       whichTuningScheme = RandomSelection;
1075     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1076       whichTuningScheme = ExhaustiveSearch;
1077     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1078       whichTuningScheme = AlwaysDefaults;
1079     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1080       whichTuningScheme = SimulatedAnnealing;
1081     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1082       whichTuningScheme = CriticalPathAutoPrioritization;
1083     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1084       whichTuningScheme = UseBestKnownTiming;
1085     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1086       whichTuningScheme = UseSteering;
1087     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1088       whichTuningScheme = MemoryAware;
1089     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1090       whichTuningScheme = Simplex;
1091     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1092       whichTuningScheme = DivideAndConquer;
1093     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1094       whichTuningScheme = LDBPeriod;
1095     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1096       whichTuningScheme = LDBPeriodLinear;
1097     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1098       whichTuningScheme = LDBPeriodQuadratic;
1099     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1100       whichTuningScheme = LDBPeriodOptimal;
1101     }
1102
1103     char *defValStr = NULL;
1104     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1105       CkPrintf("You specified default value string: %s\n", defValStr);
1106       
1107       // Parse the string, looking for commas
1108      
1109
1110       char *tok = strtok(defValStr, ",");
1111       while (tok) {
1112         // split on the equal sign
1113         int len = strlen(tok);
1114         char *eqsign = strchr(tok, '=');
1115         if(eqsign != NULL && eqsign != tok){
1116           *eqsign = '\0';
1117           char *cpname = tok;
1118           std::string cpName(tok);
1119           char *cpDefVal = eqsign+1;      
1120           int v=-1;
1121           if(sscanf(cpDefVal, "%d", &v) == 1){
1122             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1123             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1124             defaultControlPointValues[cpName] = v;
1125           }
1126         }
1127         tok = strtok(NULL, ",");
1128       }
1129
1130     }
1131
1132     shouldGatherAll = false;
1133     shouldGatherMemoryUsage = false;
1134     shouldGatherUtilization = false;
1135     
1136     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1137       shouldGatherAll = true;
1138     } else {
1139       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1140         shouldGatherMemoryUsage = true;
1141       }
1142       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1143         shouldGatherUtilization = true;
1144       }
1145     }
1146     
1147     writeDataFileAtShutdown = false;   
1148     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1149       writeDataFileAtShutdown = true;
1150     }
1151
1152     shouldFilterOutputData = true;
1153     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1154       shouldFilterOutputData = false;
1155     }
1156
1157
1158    loadDataFileAtStartup = false;   
1159     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1160       loadDataFileAtStartup = true;
1161     }
1162
1163     char *cpdatafile;
1164     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1165       sprintf(CPDataFilename, "%s", cpdatafile);
1166     } else {
1167       sprintf(CPDataFilename, "controlPointData.txt");
1168     }
1169
1170
1171     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1172   }
1173   ~controlPointMain(){}
1174 };
1175
1176 /// An interface callable by the application.
1177 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1178   CkAssert(CkMyPe() == 0);
1179   CkPrintf("Application has registered a control point change callback\n");
1180   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1181 }
1182
1183 /// An interface callable by the application.
1184 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1185   if(CkMyPe() == 0){
1186     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1187     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1188   }
1189 }
1190
1191 /// An interface callable by the application.
1192 void registerControlPointTiming(double time){
1193   CkAssert(CkMyPe() == 0);
1194 #if DEBUGPRINT>0
1195   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1196 #endif
1197   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1198 }
1199
1200 /// An interface callable by the application.
1201 void controlPointTimingStamp() {
1202   CkAssert(CkMyPe() == 0);
1203 #if DEBUGPRINT>0
1204   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1205 #endif
1206   
1207   static double prev_time = 0.0;
1208   double t = CmiWallTimer();
1209   double duration = t - prev_time;
1210   prev_time = t;
1211     
1212   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1213 }
1214
1215 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1216 {
1217   controlPointTimingStamp();
1218 }
1219
1220
1221 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value) 
1222 {
1223   setFrameworkAdvancePhase(*value);
1224 }
1225
1226
1227
1228
1229 /// Shutdown the control point framework, writing data to disk if necessary
1230 extern "C" void controlPointShutdown(){
1231   if(CkMyPe() == 0){
1232
1233     // wait for gathering of idle time & memory usage to complete
1234     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1235
1236   }
1237 }
1238
1239 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1240 void controlPointInitNode(){
1241 //  CkPrintf("controlPointInitNode()\n");
1242   registerExitFn(controlPointShutdown);
1243 }
1244
1245 /// Called periodically to allow control point framework to do things periodically
1246 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1247 #ifdef DEBUGPRINT
1248   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1249 #endif
1250   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1251   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1252 }
1253
1254
1255
1256
1257
1258 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1259 /// user observed characteristic to adapt specific control point values.
1260 /// This function must return valid values for newControlPoints.
1261 void controlPointManager::generatePlan() {
1262   const double startGenerateTime = CmiWallTimer();
1263   const int phase_id = this->phase_id;
1264   const int effective_phase = allData.phases.size();
1265
1266   // Only generate a plan once per phase
1267   if(generatedPlanForStep == phase_id) 
1268     return;
1269   generatedPlanForStep = phase_id;
1270  
1271   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1272   printTuningScheme();
1273
1274   // By default lets put the previous phase data into newControlPoints
1275   instrumentedPhase *prevPhase = previousPhaseData();
1276   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1277           newControlPoints[cpsIter->first] = cpsIter->second;
1278   }
1279
1280
1281   if( whichTuningScheme == RandomSelection ){
1282     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1283     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1284       const std::string &name = cpsIter->first;
1285       const std::pair<int,int> &bounds = cpsIter->second;
1286       const int lb = bounds.first;
1287       const int ub = bounds.second;
1288       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1289     }
1290   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1291     // -----------------------------------------------------------
1292     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1293     //   
1294     // This scheme will return the median value for the range 
1295     // early on, and then will transition over to the new control points
1296     // determined by the critical path adapting code
1297
1298     // This won't work until the periodic function is fixed up a bit
1299
1300     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1301     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1302       const std::string &name = cpsIter->first;
1303       const std::pair<int,int> &bounds = cpsIter->second;
1304       const int lb = bounds.first;
1305       const int ub = bounds.second;
1306       newControlPoints[name] =  (lb+ub)/2;
1307     }
1308
1309   } else if ( whichTuningScheme == MemoryAware ) {
1310
1311     // -----------------------------------------------------------
1312     //  STEERING BASED ON MEMORY USAGE
1313
1314     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1315     instrumentedPhase *prevPhase = previousPhaseData();
1316  
1317     if(phase_id%2 == 0){
1318       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1319       twoAgoPhase->print();
1320       CkPrintf("\n");
1321       fflush(stdout);
1322       
1323       // See if memory usage is low:
1324       double memUsage = twoAgoPhase->memoryUsageMB;
1325       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1326       fflush(stdout);
1327       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1328         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1329         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1330         
1331         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1332         newControlPoints = twoAgoPhase->controlPoints;
1333
1334
1335         CkPrintf("Steering (memory based) initialized plan\n");
1336         fflush(stdout);
1337
1338         // look for a possible control point knob to turn
1339         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1340         
1341         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1342         bool found = false;
1343         std::string cpName;
1344         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1345         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1346         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1347           cpName = iter->first;
1348           info = &iter->second;
1349           found = true;
1350           break;
1351         }
1352
1353         // Adapt the control point value
1354         if(found){
1355           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1356           fflush(stdout);
1357           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1358           const int maxValue = controlPointSpace[cpName].second;
1359           
1360           if(twoAgoValue+1 <= maxValue){
1361             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1362           }
1363         }
1364         
1365       }
1366     }
1367
1368     CkPrintf("Steering (memory based) done for this phase\n");
1369     fflush(stdout);
1370
1371   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1372
1373     // -----------------------------------------------------------
1374     //  USE BEST KNOWN TIME  
1375     static bool firstTime = true;
1376     if(firstTime){
1377       firstTime = false;
1378       instrumentedPhase *best = allData.findBest(); 
1379       CkPrintf("Best known phase is:\n");
1380       best->print();
1381       CkPrintf("\n");
1382       
1383       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1384       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1385         const std::string &name = cpsIter->first;
1386         newControlPoints[name] =  best->controlPoints[name];
1387       }
1388     }
1389   } else if( whichTuningScheme == LDBPeriod) {
1390     // Assume this is used in this manner:
1391     //  1) go to next phase
1392     //  2) request control point
1393     //  3) load balancing
1394     //  4) computation
1395     
1396     
1397     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1398     instrumentedPhase *prevPhase = previousPhaseData();
1399     
1400     
1401     const std::vector<double> &times = twoAgoPhase->times;
1402     const int oldNumTimings = times.size();
1403
1404
1405     const std::vector<double> &timesNew = prevPhase->times;
1406     const int newNumTimings = timesNew.size();
1407
1408
1409     if(oldNumTimings > 4 && newNumTimings > 4){
1410       
1411       // Build model of execution time based on two phases ago
1412       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1413       
1414       double oldSum = 0;
1415       
1416       for(int i=2; i<oldNumTimings; i++){
1417         oldSum += times[i];
1418       }
1419       
1420       const double oldAvg = oldSum / (oldNumTimings-2);
1421       
1422       
1423       
1424       
1425       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1426       const double expectedTotalTime = oldAvg * newNumTimings;
1427       
1428       
1429       // Measure actual time
1430       double newSum = 0.0;
1431       for(int i=2; i<newNumTimings; ++i){
1432         newSum += timesNew[i];
1433       }
1434       
1435       const double newAvg = newSum / (newNumTimings-2);
1436       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1437       
1438       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1439       
1440       // Determine load balance cost
1441       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1442       
1443       const double benefitAfterLB = benefit - lbcost;
1444     
1445     
1446       // Determine whether LB cost outweights the estimated benefit
1447       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1448     
1449     
1450       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1451       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1452         const std::string &name = cpsIter->first;
1453         const std::pair<int,int> &bounds = cpsIter->second;
1454         const int lb = bounds.first;
1455         const int ub = bounds.second;
1456       
1457         if(benefitAfterLB > 0){
1458           CkPrintf("Constant Model: Beneficial LB\n");
1459           int newval = newControlPoints[name] / 2;
1460           if(newval > lb)
1461             newControlPoints[name] = newval;
1462           else 
1463             newControlPoints[name] = lb;
1464         } else {
1465           CkPrintf("Constant Model: Detrimental LB\n");
1466           int newval = newControlPoints[name] * 2;
1467           if(newval < ub)
1468             newControlPoints[name] = newval;
1469           else
1470             newControlPoints[name] = ub;
1471         }
1472       }
1473     }
1474     
1475     
1476   }  else if( whichTuningScheme == LDBPeriodLinear) {
1477     // Assume this is used in this manner:
1478     //  1) go to next phase
1479     //  2) request control point
1480     //  3) load balancing
1481     //  4) computation
1482
1483
1484     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1485     instrumentedPhase *prevPhase = previousPhaseData();
1486     
1487     const std::vector<double> &times = twoAgoPhase->times;
1488     const int oldNumTimings = times.size();
1489
1490     const std::vector<double> &timesNew = prevPhase->times;
1491     const int newNumTimings = timesNew.size();
1492     
1493
1494     if(oldNumTimings > 4 && newNumTimings > 4){
1495
1496       // Build model of execution time based on two phases ago
1497       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1498       const int b1 = 2 + (oldNumTimings-2)/2;
1499       double s1 = 0;
1500       double s2 = 0;
1501     
1502       const double ldbStepsTime = times[0] + times[1];
1503     
1504       for(int i=2; i<b1; i++){
1505         s1 += times[i];
1506       }
1507       for(int i=b1; i<oldNumTimings; i++){
1508         s2 += times[i];
1509       }
1510       
1511       
1512       // Compute the estimated time for the last phase's data
1513     
1514       const double a1 = s1 / (double)(b1-2);
1515       const double a2 = s2 / (double)(oldNumTimings-b1);
1516       const double aold = (a1+a2)/2.0;
1517
1518       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1519         
1520     
1521       // Measure actual time
1522       double sum = 0.0;
1523       for(int i=2; i<newNumTimings; ++i){
1524         sum += timesNew[i];
1525       }
1526
1527       const double avg = sum / ((double)(newNumTimings-2));
1528       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1529
1530       const double benefit = expectedTotalTime - actualTotalTime;
1531
1532       // Determine load balance cost
1533       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1534
1535       const double benefitAfterLB = benefit - lbcost;
1536
1537     
1538       // Determine whether LB cost outweights the estimated benefit
1539       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1540     
1541     
1542     
1543       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1544       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1545         const std::string &name = cpsIter->first;
1546         const std::pair<int,int> &bounds = cpsIter->second;
1547         const int lb = bounds.first;
1548         const int ub = bounds.second;
1549       
1550         if(benefitAfterLB > 0){
1551           CkPrintf("Linear Model: Beneficial LB\n");
1552           int newval = newControlPoints[name] / 2;
1553           if(newval > lb)
1554             newControlPoints[name] = newval;
1555           else 
1556             newControlPoints[name] = lb;
1557         } else {
1558           CkPrintf("Linear Model: Detrimental LB\n");
1559           int newval = newControlPoints[name] * 2;
1560           if(newval < ub)
1561             newControlPoints[name] = newval;
1562           else 
1563             newControlPoints[name] = ub;
1564         }
1565       }
1566     }
1567
1568   }
1569
1570   else if( whichTuningScheme == LDBPeriodQuadratic) {
1571     // Assume this is used in this manner:
1572     //  1) go to next phase
1573     //  2) request control point
1574     //  3) load balancing
1575     //  4) computation
1576
1577
1578     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1579     instrumentedPhase *prevPhase = previousPhaseData();
1580         
1581     const std::vector<double> &times = twoAgoPhase->times;
1582     const int oldNumTimings = times.size();
1583
1584     const std::vector<double> &timesNew = prevPhase->times;
1585     const int newNumTimings = timesNew.size();
1586
1587     
1588     if(oldNumTimings > 4 && newNumTimings > 4){
1589
1590
1591       // Build model of execution time based on two phases ago
1592       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1593       const int b1 = 2 + (oldNumTimings-2)/3;
1594       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1595       double s1 = 0;
1596       double s2 = 0;
1597       double s3 = 0;
1598
1599       const double ldbStepsTime = times[0] + times[1];
1600     
1601       for(int i=2; i<b1; i++){
1602         s1 += times[i];
1603       }
1604       for(int i=b1; i<b2; i++){
1605         s2 += times[i];
1606       }
1607       for(int i=b2; i<oldNumTimings; i++){
1608         s3 += times[i];
1609       }
1610
1611     
1612       // Compute the estimated time for the last phase's data
1613     
1614       const double a1 = s1 / (double)(b1-2);
1615       const double a2 = s2 / (double)(b2-b1);
1616       const double a3 = s3 / (double)(oldNumTimings-b2);
1617     
1618       const double a = (a1-2.0*a2+a3)/2.0;
1619       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1620       const double c = a3;
1621     
1622       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1623       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1624       const double x2 = 0.5;
1625       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1626    
1627     
1628       // Measure actual time
1629       double sum = 0.0;
1630       for(int i=2; i<newNumTimings; ++i){
1631         sum += timesNew[i];
1632       }
1633
1634       const double avg = sum / ((double)(newNumTimings-2));
1635       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1636
1637       const double benefit = expectedTotalTime - actualTotalTime;
1638
1639       // Determine load balance cost
1640       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1641
1642       const double benefitAfterLB = benefit - lbcost;
1643
1644     
1645       // Determine whether LB cost outweights the estimated benefit
1646       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1647     
1648     
1649     
1650       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1651       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1652         const std::string &name = cpsIter->first;
1653         const std::pair<int,int> &bounds = cpsIter->second;
1654         const int lb = bounds.first;
1655         const int ub = bounds.second;
1656       
1657         if(benefitAfterLB > 0){
1658           CkPrintf("QuadraticModel: Beneficial LB\n");
1659           int newval = newControlPoints[name] / 2;
1660           if(newval > lb)
1661             newControlPoints[name] = newval;
1662           else 
1663             newControlPoints[name] = lb;
1664         } else {
1665           CkPrintf("QuadraticModel: Detrimental LB\n");
1666           int newval = newControlPoints[name] * 2;
1667           if(newval < ub)
1668             newControlPoints[name] = newval;
1669           else 
1670             newControlPoints[name] = ub;
1671         }
1672       
1673       }
1674     }
1675     
1676
1677   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1678     // Assume this is used in this manner:
1679     //  1) go to next phase
1680     //  2) request control point
1681     //  3) load balancing
1682     //  4) computation
1683
1684
1685
1686     instrumentedPhase *prevPhase = previousPhaseData();
1687     
1688     const std::vector<double> &times = prevPhase->times;
1689     const int numTimings = times.size();
1690     
1691     if( numTimings > 4){
1692
1693       const int b1 = 2 + (numTimings-2)/2;
1694       double s1 = 0;
1695       double s2 = 0;
1696     
1697     
1698       for(int i=2; i<b1; i++){
1699         s1 += times[i];
1700       }
1701       for(int i=b1; i<numTimings; i++){
1702         s2 += times[i];
1703       }
1704       
1705     
1706       const double a1 = s1 / (double)(b1-2);
1707       const double a2 = s2 / (double)(numTimings-b1);
1708       const double avg = (a1+a1) / 2.0;
1709
1710       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1711
1712       const double ldbStepsTime = times[0] + times[1];
1713       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1714       
1715
1716       int newval = roundDouble(sqrt(2.0*lbcost/m));
1717       
1718       // We don't really know what to do if m<=0, so we'll just double the period
1719       if(m<=0)
1720         newval = 2*numTimings;     
1721       
1722       CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1723     
1724     
1725       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1726       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1727         // TODO: lookup only control points that are relevant instead of all of them
1728         const std::string &name = cpsIter->first;
1729         const std::pair<int,int> &bounds = cpsIter->second;
1730         const int lb = bounds.first;
1731         const int ub = bounds.second;
1732         
1733         if(newval < lb){
1734           newControlPoints[name] = lb;
1735         } else if(newval > ub){
1736           newControlPoints[name] = ub;
1737         } else {
1738           newControlPoints[name] = newval;
1739         } 
1740         
1741       }
1742       
1743       
1744     }
1745     
1746  
1747
1748   } else if ( whichTuningScheme == UseSteering ) {
1749           // -----------------------------------------------------------
1750           //  STEERING BASED ON KNOWLEDGE
1751
1752           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1753           // plans are only generated after 3 phases
1754
1755           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1756           instrumentedPhase *prevPhase = previousPhaseData();
1757
1758           if(phase_id%4 == 0){
1759                   CkPrintf("Steering based on 2 phases ago:\n");
1760                   twoAgoPhase->print();
1761                   CkPrintf("\n");
1762                   fflush(stdout);
1763
1764                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1765
1766
1767                   // ========================================= Concurrency =============================================
1768                   // See if idle time is high:
1769                   {
1770                           double idleTime = twoAgoPhase->idleTime.avg;
1771                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1772                           fflush(stdout);
1773                           if(idleTime > 0.10){
1774                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1775                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1776
1777                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1778
1779                                   bool found = false;
1780                                   std::string cpName;
1781                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1782                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1783                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1784                                           cpName = iter->first;
1785                                           info = &iter->second;
1786
1787                                           // Initialize a new plan based on two phases ago
1788                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1789
1790                                           CkPrintf("Steering found knob to turn\n");
1791                                           fflush(stdout);
1792
1793                                           if(info->first == ControlPoint::EFF_INC){
1794                                                   const int maxValue = controlPointSpace[cpName].second;
1795                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1796                                                   if(twoAgoValue+1 <= maxValue){
1797                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1798                                                   }
1799                                           } else {
1800                                                   const int minValue = controlPointSpace[cpName].second;
1801                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1802                                                   if(twoAgoValue-1 >= minValue){
1803                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1804                                                   }
1805                                           }
1806
1807                                           possibleNextStepPlans.push_back(aNewPlan);
1808
1809                                   }
1810                           }
1811                   }
1812
1813                   // ========================================= Grain Size =============================================
1814                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1815                   {
1816                           double overheadTime = twoAgoPhase->overheadTime.avg;
1817                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1818                           fflush(stdout);
1819                           if(overheadTime > 0.10){
1820                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1821                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1822
1823                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1824
1825                                   bool found = false;
1826                                   std::string cpName;
1827                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1828                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1829                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1830                                           cpName = iter->first;
1831                                           info = &iter->second;
1832
1833                                           // Initialize a new plan based on two phases ago
1834                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1835
1836
1837
1838                                           CkPrintf("Steering found knob to turn\n");
1839                                           fflush(stdout);
1840
1841                                           if(info->first == ControlPoint::EFF_INC){
1842                                                   const int maxValue = controlPointSpace[cpName].second;
1843                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1844                                                   if(twoAgoValue+1 <= maxValue){
1845                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1846                                                   }
1847                                           } else {
1848                                                   const int minValue = controlPointSpace[cpName].second;
1849                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1850                                                   if(twoAgoValue-1 >= minValue){
1851                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1852                                                   }
1853                                           }
1854
1855                                           possibleNextStepPlans.push_back(aNewPlan);
1856
1857                                   }
1858
1859                           }
1860                   }
1861                   // ========================================= GPU Offload =============================================
1862                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1863                   {
1864                           double idleTime = twoAgoPhase->idleTime.avg;
1865                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1866                           fflush(stdout);
1867                           if(idleTime > 0.10){
1868                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1869                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1870
1871                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1872
1873                                   bool found = false;
1874                                   std::string cpName;
1875                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1876                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1877                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1878                                           cpName = iter->first;
1879                                           info = &iter->second;
1880
1881                                           // Initialize a new plan based on two phases ago
1882                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1883
1884
1885                                           CkPrintf("Steering found knob to turn\n");
1886                                           fflush(stdout);
1887
1888                                           if(info->first == ControlPoint::EFF_DEC){
1889                                                   const int maxValue = controlPointSpace[cpName].second;
1890                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1891                                                   if(twoAgoValue+1 <= maxValue){
1892                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1893                                                   }
1894                                           } else {
1895                                                   const int minValue = controlPointSpace[cpName].second;
1896                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1897                                                   if(twoAgoValue-1 >= minValue){
1898                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1899                                                   }
1900                                           }
1901
1902                                           possibleNextStepPlans.push_back(aNewPlan);
1903
1904                                   }
1905
1906                           }
1907                   }
1908
1909                   // ========================================= Done =============================================
1910
1911
1912                   if(possibleNextStepPlans.size() > 0){
1913                           newControlPoints = possibleNextStepPlans[0];
1914                   }
1915
1916
1917                   CkPrintf("Steering done for this phase\n");
1918                   fflush(stdout);
1919
1920           }  else {
1921                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1922                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1923                   fflush(stdout);
1924           }
1925
1926
1927
1928   } else if( whichTuningScheme == SimulatedAnnealing ) {
1929
1930     // -----------------------------------------------------------
1931     //  SIMULATED ANNEALING
1932     //  Simulated Annealing style hill climbing method
1933     //
1934     //  Find the best search space configuration, and try something
1935     //  nearby it, with a radius decreasing as phases increase
1936
1937     CkPrintf("Finding best phase\n");
1938     instrumentedPhase *bestPhase = allData.findBest();  
1939     CkPrintf("best found:\n"); 
1940     bestPhase->print(); 
1941     CkPrintf("\n"); 
1942     
1943
1944     const int convergeByPhase = 100;
1945     // Determine from 0.0 to 1.0 how far along we are
1946     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1947
1948     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1949     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1950       const std::string &name = cpsIter->first;
1951       const std::pair<int,int> &bounds = cpsIter->second;
1952       const int minValue = bounds.first;
1953       const int maxValue = bounds.second;
1954       
1955       const int before = bestPhase->controlPoints[name];   
1956   
1957       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1958
1959       int high = min(before+range, maxValue);
1960       int low = max(before-range, minValue);
1961       
1962       newControlPoints[name] = low;
1963       if(high-low > 0){
1964         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1965       } 
1966       
1967     }
1968
1969   } else if ( whichTuningScheme == DivideAndConquer ) {
1970
1971           // -----------------------------------------------------------
1972           //  STEERING FOR Divide & Conquer Programs
1973           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1974           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1975           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1976
1977           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1978           // plans are only generated after 3 phases
1979
1980           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1981           instrumentedPhase *prevPhase = previousPhaseData();
1982
1983           if(phase_id%4 == 0){
1984                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1985                   twoAgoPhase->print();
1986                   CkPrintf("\n");
1987                   fflush(stdout);
1988
1989                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1990
1991
1992                   // ========================================= Concurrency =============================================
1993                   // See if idle time is high:
1994                   {
1995                           double idleTime = twoAgoPhase->idleTime.avg;
1996                           double overheadTime = twoAgoPhase->overheadTime.avg;
1997
1998
1999                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
2000                           fflush(stdout);
2001                           if(idleTime+overheadTime > 0.10){
2002                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
2003                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
2004
2005                                   int direction = -1;
2006                                   if (idleTime>overheadTime){
2007                                           // We need to decrease the grain size, or increase the available concurrency
2008                                           direction = 1;
2009                                   }
2010
2011                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
2012
2013                                   bool found = false;
2014                                   std::string cpName;
2015                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2016                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2017                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2018                                           cpName = iter->first;
2019                                           info = &iter->second;
2020                                           
2021                                           // Initialize a new plan based on two phases ago
2022                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2023
2024                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2025                                           fflush(stdout);
2026
2027                                           int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
2028                                           
2029                                           if(info->first == ControlPoint::EFF_INC){
2030                                             const int minValue = controlPointSpace[cpName].first;
2031                                             const int maxValue = controlPointSpace[cpName].second;
2032                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2033                                             const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);                                          
2034                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2035                                             aNewPlan[cpName] = newVal;
2036                                           } else {
2037                                             const int minValue = controlPointSpace[cpName].first;
2038                                             const int maxValue = controlPointSpace[cpName].second;
2039                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2040                                             const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
2041                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2042                                             aNewPlan[cpName] = newVal;
2043                                           }
2044                                           
2045                                           possibleNextStepPlans.push_back(aNewPlan);
2046                                   }
2047                           }
2048                   }
2049
2050                   if(possibleNextStepPlans.size() > 0){
2051                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2052                     newControlPoints = possibleNextStepPlans[0];
2053                   } else {
2054                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2055                   }
2056           }
2057
2058   } else if( whichTuningScheme == Simplex ) {
2059
2060           // -----------------------------------------------------------
2061           //  Nelder Mead Simplex Algorithm
2062           //
2063           //  A scheme that takes a simplex (n+1 points) and moves it
2064           //  toward the minimum, eventually converging there.
2065
2066           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2067
2068   } else if( whichTuningScheme == ExhaustiveSearch ) {
2069     
2070     // -----------------------------------------------------------
2071     // EXHAUSTIVE SEARCH
2072    
2073     int numDimensions = controlPointSpace.size();
2074     CkAssert(numDimensions > 0);
2075   
2076     vector<int> lowerBounds(numDimensions);
2077     vector<int> upperBounds(numDimensions); 
2078   
2079     int d=0;
2080     std::map<std::string, pair<int,int> >::iterator iter;
2081     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2082       //    CkPrintf("Examining dimension %d\n", d);
2083       lowerBounds[d] = iter->second.first;
2084       upperBounds[d] = iter->second.second;
2085       d++;
2086     }
2087    
2088     // get names for each dimension (control point)
2089     vector<std::string> names(numDimensions);
2090     d=0;
2091     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2092       names[d] = niter->first;
2093       d++;
2094     }
2095   
2096   
2097     // Create the first possible configuration
2098     vector<int> config = lowerBounds;
2099     config.push_back(0);
2100   
2101     // Increment until finding an unused configuration
2102     allData.cleanupNames(); // put -1 values in for any control points missing
2103     std::vector<instrumentedPhase*> &phases = allData.phases;     
2104
2105     while(true){
2106     
2107       std::vector<instrumentedPhase*>::iterator piter; 
2108       bool testedConfiguration = false; 
2109       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2110       
2111         // Test if the configuration matches this phase
2112         bool match = true;
2113         for(int j=0;j<numDimensions;j++){
2114           match &= (*piter)->controlPoints[names[j]] == config[j];
2115         }
2116       
2117         if(match){
2118           testedConfiguration = true; 
2119           break;
2120         } 
2121       } 
2122     
2123       if(testedConfiguration == false){ 
2124         break;
2125       } 
2126     
2127       // go to next configuration
2128       config[0] ++;
2129       // Propagate "carrys"
2130       for(int i=0;i<numDimensions;i++){
2131         if(config[i] > upperBounds[i]){
2132           config[i+1]++;
2133           config[i] = lowerBounds[i];
2134         }
2135       }
2136       // check if we have exhausted all possible values
2137       if(config[numDimensions] > 0){
2138         break;
2139       }
2140        
2141     }
2142   
2143     if(config[numDimensions] > 0){
2144       for(int i=0;i<numDimensions;i++){ 
2145         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2146       }
2147     }
2148
2149     // results are now in config[i]
2150     
2151     for(int i=0; i<numDimensions; i++){
2152       newControlPoints[names[i]] = config[i];
2153       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2154     }
2155
2156
2157   } else {
2158     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2159   }
2160
2161
2162   const double endGenerateTime = CmiWallTimer();
2163   
2164   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2165   
2166 }
2167
2168
2169
2170
2171
2172 /// Get control point value from range of integers [lb,ub]
2173 int controlPoint(const char *name, int lb, int ub){
2174   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2175   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2176   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2177   int result;
2178
2179   // if we already have control point values for phase, return them
2180   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2181     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2182     return thisPhaseData->controlPoints[std::string(name)];
2183   }
2184   
2185
2186   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2187     // For the first few phases, just use the lower bound, or the default if one was provided 
2188     // This ensures that the ranges for all the control points are known before we do anything fancy
2189     result = lb;
2190
2191     if(defaultControlPointValues.count(std::string(name)) > 0){
2192       int v = defaultControlPointValues[std::string(name)];
2193       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2194       result = v;
2195     }
2196
2197   } else if(controlPointSpace.count(std::string(name)) == 0){
2198     // if this is the first time we've seen the CP, then return the lower bound
2199     result = lb;
2200     
2201   }  else {
2202     // Generate a plan one time for each phase
2203     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2204     
2205     // Use the planned value:
2206     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2207     
2208   }
2209
2210   if(!isInRange(result,ub,lb)){
2211     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2212     fflush(stdout);
2213     fflush(stderr);
2214   }
2215   CkAssert(isInRange(result,ub,lb));
2216   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2217
2218   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2219
2220   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2221   //  thisPhaseData->print();
2222   
2223   return result;
2224 }
2225
2226
2227 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2228   CkAssert(CkMyPe() == 0);
2229   return controlPoint("FortranCP", *lb, *ub);
2230 }
2231
2232
2233
2234
2235 /// Inform the control point framework that a named control point affects the priorities of some array  
2236 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2237 //   CkGroupID aid = arraybase.ckGetArrayID();
2238 //   int groupIdx = aid.idx;
2239 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2240 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2241 // }
2242
2243
2244 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2245 // void controlPointPriorityEntry(const char *name, int idx){
2246 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2247 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2248 // }
2249
2250
2251
2252
2253
2254 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2255
2256     This function decomposes the algorithm into a state machine that allows it to
2257     evaluate one or more configurations through subsequent clls to this function.
2258     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2259
2260     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2261     so a list of them will be created and they will be evaluated, one per call.
2262
2263     Currently there is no stopping criteria, but the simplex ought to contract down
2264     to a few close configurations, and hence not much change will happen after this 
2265     point.
2266
2267  */
2268 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2269
2270         if(useBestKnown){
2271                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2272                 return;
2273         }
2274
2275
2276         if(firstSimplexPhase< 0){
2277                 firstSimplexPhase = allData.phases.size()-1;
2278                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2279         }
2280
2281         int n = controlPointSpace.size();
2282
2283         CkAssert(n>=2);
2284
2285
2286         if(simplexState == beginning){
2287                 // First we evaluate n+1 random points, then we go to a different state
2288                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2289                         CkPrintf("Simplex Tuning: chose random configuration\n");
2290
2291                         // Choose random values from the middle third of the range in each dimension
2292                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2293                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2294                                 const std::string &name = cpsIter->first;
2295                                 const std::pair<int,int> &bounds = cpsIter->second;
2296                                 const int lb = bounds.first;
2297                                 const int ub = bounds.second;
2298                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2299                         }
2300                 } else {
2301                         // Set initial simplex:
2302                         for(int i=0; i<n+1; i++){
2303                                 simplexIndices.insert(firstSimplexPhase+i);
2304                         }
2305                         CkAssert(simplexIndices.size() == n+1);
2306
2307                         // Transition to reflecting state
2308                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2309
2310                 }
2311
2312         } else if (simplexState == reflecting){
2313                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2314                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2315
2316                 // Find the highest time from other points in the simplex
2317                 double highestTimeForOthersInSimplex = 0.0;
2318                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2319                         double t = allData.phases[*iter]->medianTime();
2320                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2321                                 highestTimeForOthersInSimplex = t;
2322                         }
2323                 }
2324
2325                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2326
2327                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2328                         // if y* < yl,  transition to "expanding"
2329                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2330
2331                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2332                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2333                         CkAssert(simplexIndices.size() == n+1);
2334                         simplexIndices.erase(worstPhase);
2335                         CkAssert(simplexIndices.size() == n);
2336                         simplexIndices.insert(pPhase);
2337                         CkAssert(simplexIndices.size() == n+1);
2338                         // Transition to reflecting state
2339                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2340
2341                 } else {
2342                         // if y* > yh
2343                         if(recentPhaseTime <= worstTime){
2344                                 // replace Ph with P*
2345                                 CkAssert(simplexIndices.size() == n+1);
2346                                 simplexIndices.erase(worstPhase);
2347                                 CkAssert(simplexIndices.size() == n);
2348                                 simplexIndices.insert(pPhase);
2349                                 CkAssert(simplexIndices.size() == n+1);
2350                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2351                                 worstPhase = pPhase;
2352                                 // Just as a sanity check, make sure we don't use the non-updated values.
2353                                 worst.clear();
2354                         }
2355
2356                         // Now, form P** and do contracting phase
2357                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2358
2359                 }
2360
2361         } else if (simplexState == doneExpanding){
2362                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2363                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2364                 // A new configuration has been evaluated
2365
2366                 // Check to see if y** < y1
2367                 if(recentPhaseTime < bestTime){
2368                         // replace Ph by P**
2369                         CkAssert(simplexIndices.size() == n+1);
2370                         simplexIndices.erase(worstPhase);
2371                         CkAssert(simplexIndices.size() == n);
2372                         simplexIndices.insert(p2Phase);
2373                         CkAssert(simplexIndices.size() == n+1);
2374                 } else {
2375                         //      replace Ph by P*
2376                         CkAssert(simplexIndices.size() == n+1);
2377                         simplexIndices.erase(worstPhase);
2378                         CkAssert(simplexIndices.size() == n);
2379                         simplexIndices.insert(pPhase);
2380                         CkAssert(simplexIndices.size() == n+1);
2381                 }
2382
2383                 // Transition to reflecting state
2384                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2385
2386         }  else if (simplexState == contracting){
2387                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2388                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2389                 // A new configuration has been evaluated
2390
2391                 // Check to see if y** > yh
2392                 if(recentPhaseTime <= worstTime){
2393                         // replace Ph by P**
2394                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2395                         CkAssert(simplexIndices.size() == n+1);
2396                         simplexIndices.erase(worstPhase);
2397                         CkAssert(simplexIndices.size() == n);
2398                         simplexIndices.insert(p2Phase);
2399                         CkAssert(simplexIndices.size() == n+1);
2400                         // Transition to reflecting state
2401                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2402
2403                 } else {
2404                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2405                         simplexState = stillContracting;
2406
2407                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2408                         stillMustContractList = simplexIndices;
2409
2410                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2411                 }
2412
2413         } else if (simplexState == stillContracting){
2414                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2415
2416                 if(stillMustContractList.size()>0){
2417                         int c = *stillMustContractList.begin();
2418                         stillMustContractList.erase(c);
2419                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2420
2421                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2422
2423                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2424                         int v = 0;
2425                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2426                                 const std::string &name = cpsIter->first;
2427                                 const std::pair<int,int> &bounds = cpsIter->second;
2428                                 const int lb = bounds.first;
2429                                 const int ub = bounds.second;
2430
2431                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2432
2433                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2434                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2435                                 v++;
2436                         }
2437                 } else {
2438                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2439                         CkAssert(stillMustContractList.size() == 0);
2440                         simplexIndices.clear();
2441                         CkAssert(simplexIndices.size()==0);
2442                         for(int i=0; i<n+1; i++){
2443                                 simplexIndices.insert(allData.phases.size()-2-i);
2444                         }
2445                         CkAssert(simplexIndices.size()==n+1);
2446
2447                         // Transition to reflecting state
2448                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2449
2450                 }
2451
2452
2453         } else if (simplexState == expanding){
2454                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2455                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2456                 // A new configuration has been evaluated
2457
2458                 // determine if y** < yl
2459                 if(recentPhaseTime < bestTime){
2460                         // replace Ph by P**
2461                         CkAssert(simplexIndices.size() == n+1);
2462                         simplexIndices.erase(worstPhase);
2463                         CkAssert(simplexIndices.size() == n);
2464                         simplexIndices.insert(p2Phase);
2465                         CkAssert(simplexIndices.size() == n+1);
2466                         // Transition to reflecting state
2467                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2468
2469                 } else {
2470                         // else, replace ph with p*
2471                         CkAssert(simplexIndices.size() == n+1);
2472                         simplexIndices.erase(worstPhase);
2473                         CkAssert(simplexIndices.size() == n);
2474                         simplexIndices.insert(pPhase);
2475                         CkAssert(simplexIndices.size() == n+1);
2476                         // Transition to reflecting state
2477                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2478                 }
2479
2480
2481         } else {
2482                 CkAbort("Unknown simplexState");
2483         }
2484
2485 }
2486
2487
2488
2489 /** Replace the worst point with its reflection across the centroid. */
2490 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2491
2492         int n = controlPointSpace.size();
2493
2494         printSimplex(allData);
2495
2496         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2497
2498
2499         // Quit if the diameter of our simplex is small
2500         double maxr = 0.0;
2501         for(int i=0; i<n+1; i++){
2502                 //              Compute r^2 of this simplex point from the centroid
2503                 double r2 = 0.0;
2504                 std::vector<double> p = pointCoords(allData, i);
2505                 for(int d=0; d<p.size(); d++){
2506                         double r1 = (p[d] * centroid[d]);
2507                         r2 += r1*r1;
2508                 }
2509                 if(r2 > maxr)
2510                         maxr = r2;
2511         }
2512
2513 #if 0
2514         // At some point we quit this tuning
2515         if(maxr < 10){
2516                 useBestKnown = true;
2517                 instrumentedPhase *best = allData.findBest();
2518                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2519
2520                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2521                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2522                         const std::string &name = cpsIter->first;
2523                         newControlPoints[name] =  best->controlPoints[name];
2524                 }
2525         }
2526 #endif
2527
2528         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2529
2530         pPhase = allData.phases.size()-1;
2531         P.resize(n);
2532         for(int i=0; i<n; i++){
2533                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2534         }
2535
2536         for(int i=0; i<P.size(); i++){
2537                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2538         }
2539
2540
2541         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2542         int v = 0;
2543         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2544                 const std::string &name = cpsIter->first;
2545                 const std::pair<int,int> &bounds = cpsIter->second;
2546                 const int lb = bounds.first;
2547                 const int ub = bounds.second;
2548                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2549                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2550                 v++;
2551         }
2552
2553
2554         // Transition to "reflecting" state
2555         simplexState = reflecting;
2556         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2557
2558 }
2559
2560
2561
2562
2563 /** Replace the newly tested reflection with a further expanded version of itself. */
2564 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2565         int n = controlPointSpace.size();
2566         printSimplex(allData);
2567
2568         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2569         // I believe the equation for P** in the text on page 308 is correct.
2570
2571         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2572
2573
2574         p2Phase = allData.phases.size()-1;
2575         P2.resize(n);
2576         for(int i=0; i<n; i++){
2577                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2578         }
2579
2580         for(int i=0; i<P2.size(); i++){
2581                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2582         }
2583
2584
2585         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2586         int v = 0;
2587         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2588                 const std::string &name = cpsIter->first;
2589                 const std::pair<int,int> &bounds = cpsIter->second;
2590                 const int lb = bounds.first;
2591                 const int ub = bounds.second;
2592                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2593                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2594                 v++;
2595         }
2596
2597
2598         // Transition to "doneExpanding" state
2599         simplexState = doneExpanding;
2600         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2601
2602 }
2603
2604
2605
2606
2607 /** Replace the newly tested reflection with a further expanded version of itself. */
2608 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2609         int n = controlPointSpace.size();
2610         printSimplex(allData);
2611
2612         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2613
2614
2615         p2Phase = allData.phases.size()-1;
2616         P2.resize(n);
2617         for(int i=0; i<n; i++){
2618                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2619         }
2620
2621         for(int i=0; i<P2.size(); i++){
2622                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2623         }
2624
2625
2626         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2627         int v = 0;
2628         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2629                 const std::string &name = cpsIter->first;
2630                 const std::pair<int,int> &bounds = cpsIter->second;
2631                 const int lb = bounds.first;
2632                 const int ub = bounds.second;
2633                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2634                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2635                 v++;
2636         }
2637         
2638         
2639         // Transition to "contracting" state
2640         simplexState = contracting;
2641         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2642
2643 }
2644
2645
2646
2647
2648 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2649         int n = controlPointSpace.size();
2650
2651         // Find worst performing point in the simplex
2652         worstPhase = -1;
2653         worstTime = -1.0;
2654         bestPhase = 10000000;
2655         bestTime = 10000000;
2656         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2657                 double t = allData.phases[*iter]->medianTime();
2658                 if(t > worstTime){
2659                         worstTime = t;
2660                         worstPhase = *iter;
2661                 }
2662                 if(t < bestTime){
2663                         bestTime = t;
2664                         bestPhase = *iter;
2665                 }
2666         }
2667         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2668
2669         best = pointCoords(allData, bestPhase);
2670         CkAssert(best.size() == n);
2671
2672         worst = pointCoords(allData, worstPhase);
2673         CkAssert(worst.size() == n);
2674
2675         // Calculate centroid of the remaining points in the simplex
2676         centroid.resize(n);
2677         for(int i=0; i<n; i++){
2678                 centroid[i] = 0.0;
2679         }
2680
2681         int numPts = 0;
2682
2683         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2684                 if(*iter != worstPhase){
2685                         numPts ++;
2686                         // Accumulate into the result vector
2687                         int c = 0;
2688                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2689                                 centroid[c] += citer->second;
2690                                 c++;
2691                         }
2692
2693                 }
2694         }
2695
2696         // Now divide the sums by the number of points.
2697         for(int v = 0; v<centroid.size(); v++) {
2698                 centroid[v] /= (double)numPts;
2699         }
2700
2701         CkAssert(centroid.size() == n);
2702
2703         for(int i=0; i<centroid.size(); i++){
2704                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2705         }
2706
2707
2708 }
2709
2710
2711
2712 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2713         std::vector<double> result;
2714         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2715                 result.push_back((double)citer->second);
2716         }
2717         return result;
2718 }
2719
2720
2721
2722
2723 void ControlPointWriteOutputToDisk(){
2724         CkAssert(CkMyPe() == 0);
2725         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2726 }
2727
2728
2729
2730 /*! @} */
2731
2732 #include "ControlPoints.def.h"
2733
2734 #endif