298b81c26cbb3b05071ba479d574b9c5ba899933
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4
5 #include "controlPoints.h"
6 #include "trace-controlPoints.h"
7 #include "LBDatabase.h"
8 #include "controlPoints.h"
9 #include "charm++.h"
10 #include "trace-projections.h"
11 #include <pathHistory.h>
12 #include "cp_effects.h"
13 #include <iostream>
14 #include <math.h>
15 #include <climits>
16
17 #if CMK_WITH_CONTROLPOINT
18
19 #define roundDouble(x)        ((long)(x+0.5))
20 #define myAbs(x)   (((x)>=0.0)?(x):(-1.0*(x)))
21 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
22
23 inline double closestInRange(double v, double a, double b){
24   return (v<a) ? a : ((v>b)?b:v);
25 }
26
27
28 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
29  
30
31 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
32 /**  @{ */
33
34 using namespace std;
35
36 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
37
38
39 //#undef DEBUGPRINT
40 //#define DEBUGPRINT 4
41
42 static void periodicProcessControlPoints(void* ptr, double currWallTime);
43
44
45 // A pointer to this PE's controlpoint manager Proxy
46 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
47 /* readonly */ int random_seed;
48 /* readonly */ long controlPointSamplePeriod;
49 /* readonly */ int whichTuningScheme;
50 /* readonly */ bool writeDataFileAtShutdown;
51 /* readonly */ bool shouldFilterOutputData;
52 /* readonly */ bool loadDataFileAtStartup;
53 /* readonly */ bool shouldGatherMemoryUsage;
54 /* readonly */ bool shouldGatherUtilization;
55 /* readonly */ bool shouldGatherAll;
56 /* readonly */ char CPDataFilename[512];
57
58 extern bool enableCPTracing;
59
60 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
61 /// These probably come from the command line arguments, so are available only on PE 0
62 std::map<std::string, int> defaultControlPointValues;
63
64
65
66 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
67
68
69
70 void printTuningScheme(){
71   switch(whichTuningScheme){
72   case RandomSelection:
73     CkPrintf("Tuning Scheme: RandomSelection\n");
74     break;
75   case AlwaysDefaults:
76     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
77     break;
78   case SimulatedAnnealing:
79     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
80     break;
81   case ExhaustiveSearch:
82     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
83     break;
84   case CriticalPathAutoPrioritization:
85     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
86     break;
87   case UseBestKnownTiming:
88     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
89     break;
90   case UseSteering:
91     CkPrintf("Tuning Scheme: UseSteering\n");
92     break;
93   case MemoryAware:
94     CkPrintf("Tuning Scheme: MemoryAware\n");
95     break;
96   case Simplex:
97     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
98     break;
99   case DivideAndConquer:
100     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
101     break;
102   case LDBPeriod:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
104     break;
105   case LDBPeriodLinear:
106     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
107     break;
108   case LDBPeriodQuadratic:
109     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
110     break;
111   case LDBPeriodOptimal:
112     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
113     break;
114   default:
115     CkPrintf("Unknown tuning scheme\n");
116     break;
117   }
118   fflush(stdout);
119 }
120
121
122
123 /// A reduction type that combines idle time measurements (min/sum/max etc.)
124 CkReduction::reducerType idleTimeReductionType;
125 /// A reducer that combines idle time measurements (min/sum/max etc.)
126 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
127   double ret[3];
128   if(nMsg > 0){
129     CkAssert(msgs[0]->getSize()==3*sizeof(double));
130     double *m=(double *)msgs[0]->getData();
131     ret[0]=m[0];
132     ret[1]=m[1];
133     ret[2]=m[2];
134   }
135   for (int i=1;i<nMsg;i++) {
136     CkAssert(msgs[i]->getSize()==3*sizeof(double));
137     double *m=(double *)msgs[i]->getData();
138     ret[0]=min(ret[0],m[0]);
139     ret[1]+=m[1];
140     ret[2]=max(ret[2],m[2]);
141   }  
142   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
143 }
144
145
146
147 /// A reduction type that combines idle, overhead, and memory measurements
148 CkReduction::reducerType allMeasuresReductionType;
149 /// A reducer that combines idle, overhead, and memory measurements
150 #define ALL_REDUCTION_SIZE 12
151 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
152   double ret[ALL_REDUCTION_SIZE];
153   if(nMsg > 0){
154     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
155     double *m=(double *)msgs[0]->getData();
156     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
157   }
158   for (int i=1;i<nMsg;i++) {
159     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
160     double *m=(double *)msgs[i]->getData();
161     // idle time (min/sum/max)
162     ret[0]=min(ret[0],m[0]);
163     ret[1]+=m[1];
164     ret[2]=max(ret[2],m[2]);
165     // overhead time (min/sum/max)
166     ret[3]=min(ret[3],m[3]);
167     ret[4]+=m[4];
168     ret[5]=max(ret[5],m[5]);
169     // mem usage (max)
170     ret[6]=max(ret[6],m[6]);
171     // bytes per invocation for two types of entry methods
172     ret[7]+=m[7];
173     ret[8]+=m[8];
174     ret[9]+=m[9];
175     ret[10]+=m[10];
176     // Grain size (avg)
177     ret[11]+=m[11];
178   }  
179   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
180 }
181
182
183 /// Registers the control point framework's reduction handlers at startup on each PE
184 /*initproc*/ void registerCPReductions(void) {
185   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
186   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
187 }
188
189
190
191
192
193
194 /// Return an integer between 0 and num-1 inclusive
195 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
196 unsigned int randInt(unsigned int num, const char* name, int seed=0){
197   CkAssert(num > 0);
198
199   unsigned long hash = 0;
200   unsigned int c;
201   unsigned char * str = (unsigned char*)name;
202
203   while (c = *str++){
204     unsigned int c2 = (c+64)%128;
205     unsigned int c3 = (c2*5953)%127;
206     hash = c3 + (hash << 6) + (hash << 16) - hash;
207   }
208
209   unsigned long shuffled1 = (hash*2083)%7907;
210   unsigned long shuffled2 = (seed*4297)%2017;
211
212   unsigned long shuffled3 = (random_seed*4799)%7919;
213
214   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
215
216   unsigned int result = ((namehash * 6029) % 1117) % num;
217
218   CkAssert(result >=0 && result < num);
219   return result;
220 }
221
222
223
224 controlPointManager::controlPointManager() {
225   generatedPlanForStep = -1;
226
227     exitWhenReady = false;
228     alreadyRequestedMemoryUsage = false;   
229     alreadyRequestedIdleTime = false;
230     alreadyRequestedAll = false;
231     
232     instrumentedPhase * newPhase = new instrumentedPhase();
233     allData.phases.push_back(newPhase);   
234     
235     frameworkShouldAdvancePhase = false;
236     haveControlPointChangeCallback = false;
237 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
238     
239     phase_id = 0;
240
241     if(loadDataFileAtStartup){    
242       loadDataFile();
243     }
244
245     
246     if(CkMyPe() == 0){
247       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
248     }
249
250     traceRegisterUserEvent("No currently executing message", 5000);
251     traceRegisterUserEvent("Zero time along critical path", 5010);
252     traceRegisterUserEvent("Positive total time along critical path", 5020);
253     traceRegisterUserEvent("env->setPathHistory()", 6000);
254     traceRegisterUserEvent("Critical Path", 5900);
255     traceRegisterUserEvent("Table Entry", 5901);
256
257
258 #if USER_EVENT_FOR_REGISTERTERMINALPATH
259     traceRegisterUserEvent("registerTerminalPath", 100); 
260 #endif
261
262   }
263   
264
265  controlPointManager::~controlPointManager(){
266 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
267   }
268
269   void controlPointManager::pup(PUP::er &p)
270   {
271       // FIXME: does not work when control point is actually used,
272       // just minimal pup so that it allows exit function to work (exitIfReady).
273     p|generatedPlanForStep;
274     p|exitWhenReady;
275     p|alreadyRequestedMemoryUsage;
276     p|alreadyRequestedIdleTime;
277     p|alreadyRequestedAll;
278     p|frameworkShouldAdvancePhase;
279     p|haveControlPointChangeCallback;
280     p|phase_id;
281   }
282   
283
284   /// Loads the previous run data file
285   void controlPointManager::loadDataFile(){
286     ifstream infile(CPDataFilename);
287     vector<std::string> names;
288     std::string line;
289   
290     while(getline(infile,line)){
291       if(line[0] != '#')
292         break;
293     }
294   
295     int numTimings = 0;
296     std::istringstream n(line);
297     n >> numTimings;
298   
299     while(getline(infile,line)){ 
300       if(line[0] != '#') 
301         break; 
302     }
303
304     int numControlPointNames = 0;
305     std::istringstream n2(line);
306     n2 >> numControlPointNames;
307   
308     for(int i=0; i<numControlPointNames; i++){
309       getline(infile,line);
310       names.push_back(line);
311     }
312
313     for(int i=0;i<numTimings;i++){
314       getline(infile,line);
315       while(line[0] == '#')
316         getline(infile,line); 
317
318       instrumentedPhase * ips = new instrumentedPhase();
319
320       std::istringstream iss(line);
321
322       // Read memory usage for phase
323       iss >> ips->memoryUsageMB;
324       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
325
326       // Read idle time
327       iss >> ips->idleTime.min;
328       iss >> ips->idleTime.avg;
329       iss >> ips->idleTime.max;
330
331       // Read overhead time
332       iss >> ips->overheadTime.min;
333       iss >> ips->overheadTime.avg;
334       iss >> ips->overheadTime.max;
335
336       // read bytePerInvoke
337       iss >> ips->bytesPerInvoke;
338
339       // read grain size
340       iss >> ips->grainSize;
341
342       // Read control point values
343       for(int cp=0;cp<numControlPointNames;cp++){
344         int cpvalue;
345         iss >> cpvalue;
346         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
347       }
348
349       // ignore median time
350       double mt;
351       iss >> mt;
352
353       // Read all times
354
355       double time;
356
357       while(iss >> time){
358         ips->times.push_back(time);
359 #if DEBUGPRINT > 5
360         CkPrintf("read time %lf from file\n", time);
361 #endif
362       }
363
364       allData.phases.push_back(ips);
365
366     }
367
368     infile.close();
369   }
370
371
372
373   /// Add the current data to allData and output it to a file
374   void controlPointManager::writeDataFile(){
375     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
376     ofstream outfile(CPDataFilename);
377     allData.cleanupNames();
378
379 //    string s = allData.toString();
380 //    CkPrintf("At end: \n %s\n", s.c_str());
381
382     if(shouldFilterOutputData){
383       allData.verify();
384       allData.filterOutIncompletePhases();
385     }
386
387 //    string s2 = allData.toString();
388 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
389     if(allData.toString().length() > 10){
390       outfile << allData.toString();
391     } else {
392       outfile << " No data available to save to disk " << endl;
393     }
394     outfile.close();
395   }
396
397   /// User can register a callback that is called when application should advance to next phase
398   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
399     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
400     controlPointChangeCallback = cb;
401     haveControlPointChangeCallback = true;
402   }
403
404
405 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
406 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
407   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
408 }
409
410   /// Called periodically by the runtime to handle the control points
411   /// Currently called on each PE
412   void controlPointManager::processControlPoints(){
413
414 #if DEBUGPRINT
415     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
416 #endif
417
418     //==========================================================================================
419     // Print the data for each phase
420
421     const int s = allData.phases.size();
422
423 #if DEBUGPRINT
424     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
425     for(int p=0;p<s;++p){
426       const instrumentedPhase &phase = allData.phases[p];
427       const idleTimeContainer &idle = phase.idleTime;
428       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
429       vector<double> const &times = phase.times;
430
431       CkPrintf("Phase %d:\n", p);
432       idle.print();
433      //   CkPrintf("critical paths: (* affected by control point)\n");
434         //   for(int i=0;i<criticalPaths.size(); i++){
435         // If affected by a control point
436         //      criticalPaths[i].print();
437
438       //        criticalPaths[i].print();
439       //        CkPrintf("\n");
440         
441
442       //   }
443       CkPrintf("Timings:\n");
444       for(int i=0;i<times.size(); i++){
445         CkPrintf("%lf ", times[i]);
446       }
447       CkPrintf("\n");
448
449     }
450     
451     CkPrintf("\n\n");
452
453
454 #endif
455
456
457
458     //==========================================================================================
459     // If this is a phase during which we try to adapt control point values based on critical path
460 #if 0
461     if( s%5 == 4) {
462
463       // Find the most recent phase with valid critical path data and idle time measurements      
464       int whichPhase=-1;
465       for(int p=0; p<s; ++p){
466         const instrumentedPhase &phase = allData.phases[p];
467         const idleTimeContainer &idle = phase.idleTime;
468         if(idle.isValid() && phase.criticalPaths.size()>0 ){
469           whichPhase = p;
470         }
471       }
472       
473       
474       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
475       const instrumentedPhase &phase = allData.phases[whichPhase];
476       const idleTimeContainer &idle = phase.idleTime;
477       
478       if(idle.min > 0.1){
479         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
480         
481         // Determine the median critical path for this phase
482         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
483         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
484         CkAssert(phase.criticalPaths.size() > 0);
485         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
486         
487         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
488         
489         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
490           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
491
492
493           CkPrintf("Finding control points related to the critical path\n");
494           int cpcount = 0;
495           std::set<std::string> controlPointsAffectingCriticalPath;
496
497           
498           for(int e=0;e<path.getNumUsed();e++){
499             if(path.getUsedCount(e)>0){
500               int ep = path.getUsedEp(e);
501
502               std::map<std::string, std::set<int> >::iterator iter;
503               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
504                 if(iter->second.count(ep)>0){
505                   controlPointsAffectingCriticalPath.insert(iter->first);
506                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
507                   cpcount++;
508                 }
509               }
510               
511             }
512           }
513           
514
515           if(cpcount==0){
516             CkPrintf("No control points are known to affect the critical path\n");
517           } else {
518             double beginTime = CmiWallTimer();
519
520             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
521             
522             newControlPoints = phase.controlPoints;
523             
524             if(frameworkShouldAdvancePhase){
525               gotoNextPhase();  
526             }
527             
528             if(haveControlPointChangeCallback){ 
529 #if DEBUGPRINT
530               CkPrintf("Calling control point change callback\n");
531 #endif
532               // Create a high priority message and send it to the callback
533               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
534               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
535               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
536               controlPointChangeCallback.send(msg);
537               
538             }
539             
540             
541             // adjust the control points that can affect the critical path
542
543             char textDescription[4096*2];
544             textDescription[0] = '\0';
545
546             std::map<std::string,int>::iterator newCP;
547             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
548               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
549                 // decrease the value (increase priority) if within range
550                 int lowerbound = controlPointSpace[newCP->first].first;
551                 if(newCP->second > lowerbound){
552                   newControlPointsAvailable = true;
553                   newCP->second --;
554                 }
555               }
556             }
557            
558             // Create a string for a projections user event
559             if(1){
560               std::map<std::string,int>::iterator newCP;
561               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
562                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
563               }
564             }
565
566             char *userEventString = new char[4096*2];
567             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
568             
569             static int userEventCounter = 20000;
570             CkPrintf("USER EVENT: %s\n", userEventString);
571             
572             traceRegisterUserEvent(userEventString, userEventCounter); 
573             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
574             userEventCounter++;
575             
576           }
577           
578         } else {
579           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
580         }
581         
582         
583       }
584       
585     } else {
586       
587       
588     }
589    
590 #endif
591
592
593
594     if(frameworkShouldAdvancePhase){
595       gotoNextPhase();  
596     }
597     
598     if(haveControlPointChangeCallback){ 
599       // Create a high priority message and send it to the callback
600       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
601       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
602       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
603       controlPointChangeCallback.send(msg);
604     }
605     
606   }
607   
608   /// Determine if any control point is known to affect an entry method
609   bool controlPointManager::controlPointAffectsThisEP(int ep){
610     std::map<std::string, std::set<int> >::iterator iter;
611     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
612       if(iter->second.count(ep)>0){
613         return true;
614       }
615     }
616     return false;    
617   }
618   
619   /// Determine if any control point is known to affect a chare array  
620   bool controlPointManager::controlPointAffectsThisArray(int array){
621     std::map<std::string, std::set<int> >::iterator iter;
622     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
623       if(iter->second.count(array)>0){
624         return true;
625       }
626     }
627     return false;   
628   }
629   
630
631   /// The data from the current phase
632   instrumentedPhase * controlPointManager::currentPhaseData(){
633     int s = allData.phases.size();
634     CkAssert(s>=1);
635     return allData.phases[s-1];
636   }
637  
638
639   /// The data from the previous phase
640   instrumentedPhase * controlPointManager::previousPhaseData(){
641     int s = allData.phases.size();
642     if(s >= 2 && phase_id > 0) {
643       return allData.phases[s-2];
644     } else {
645       return NULL;
646     }
647   }
648  
649   /// The data from two phases back
650   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
651     int s = allData.phases.size();
652     if(s >= 3 && phase_id > 0) {
653       return allData.phases[s-3];
654     } else {
655       return NULL;
656     }
657   }
658   
659
660   /// Called by either the application or the Control Point Framework to advance to the next phase  
661   void controlPointManager::gotoNextPhase(){
662     CkPrintf("gotoNextPhase shouldGatherAll=%d enableCPTracing=%d\n", (int)shouldGatherAll, (int)enableCPTracing);
663     fflush(stdout);
664       
665     if(enableCPTracing){
666       if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
667         alreadyRequestedAll = true;
668         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
669         CkPrintf("Requesting all measurements\n");
670         thisProxy.requestAll(*cb);
671         delete cb;
672         
673       } else {
674         
675         if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
676           alreadyRequestedMemoryUsage = true;
677           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
678           thisProxy.requestMemoryUsage(*cb);
679           delete cb;
680         }
681         
682         if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
683           alreadyRequestedIdleTime = true;
684           CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
685           thisProxy.requestIdleTime(*cb);
686           delete cb;
687         }
688       }
689     }
690
691
692
693 #if CMK_LBDB_ON && 0
694     LBDatabase * myLBdatabase = LBDatabaseObj();
695     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
696     const CkVec<LBObj*> objs = myLBDB->getObjs();
697     const int objCount = myLBDB->getObjCount();
698     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
699     
700     LBRealType maxObjWallTime = -1.0;
701     
702     for(int i=0;i<objs.length();i++){
703       LBObj* o = objs[i];
704       const LDObjData d = o->ObjData();
705       LBRealType cpuTime = d.cpuTime;
706       LBRealType wallTime = d.wallTime;
707       // can also get object handles from the LDObjData struct
708       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
709       if(wallTime > maxObjWallTime){
710
711       }
712       
713     }
714
715     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
716     
717 #endif    
718
719
720     
721     // increment phase id
722     phase_id++;
723     
724
725     // Create new entry for the phase we are starting now
726     instrumentedPhase * newPhase = new instrumentedPhase();
727     allData.phases.push_back(newPhase);
728     
729     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
730
731   }
732
733   /// An application uses this to register an instrumented timing for this phase
734   void controlPointManager::setTiming(double time){
735     currentPhaseData()->times.push_back(time);
736
737 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
738        
739     // First we should register this currently executing message as a path, because it is likely an important one to consider.
740     //    registerTerminalEntryMethod();
741     
742     // save the critical path for this phase
743     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
744     //    maxTerminalPathHistory.reset();
745     
746     
747     // Reset the counts for the currently executing message
748     //resetThisEntryPath();
749         
750 #endif
751     
752   }
753   
754   /// Entry method called on all PEs to request CPU utilization statistics
755   void controlPointManager::requestIdleTime(CkCallback cb){
756     CkAssert(enableCPTracing);
757    
758     double i = localControlPointTracingInstance()->idleRatio();
759     double idle[3];
760     idle[0] = i;
761     idle[1] = i;
762     idle[2] = i;
763     
764     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
765     
766     localControlPointTracingInstance()->resetTimings();
767     
768     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
769   }
770   
771   /// All processors reduce their memory usages in requestIdleTime() to this method
772   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
773     CkAssert(enableCPTracing);
774
775     int size=msg->getSize() / sizeof(double);
776     CkAssert(size==3);
777     double *r=(double *) msg->getData();
778         
779     instrumentedPhase* prevPhase = previousPhaseData();
780     if(prevPhase != NULL){
781       prevPhase->idleTime.min = r[0];
782       prevPhase->idleTime.avg = r[1]/CkNumPes();
783       prevPhase->idleTime.max = r[2];
784       prevPhase->idleTime.print();
785       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
786     } else {
787       CkPrintf("There is no previous phase to store the idle time measurements\n");
788     }
789     
790     alreadyRequestedIdleTime = false;
791     checkForShutdown();
792     delete msg;
793   }
794
795
796
797
798
799
800   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
801   void controlPointManager::requestAll(CkCallback cb){
802     CkAssert(enableCPTracing);
803
804     TraceControlPoints *t = localControlPointTracingInstance();
805
806     double data[ALL_REDUCTION_SIZE];
807
808     double *idle = data;
809     double *over = data+3;
810     double *mem = data+6;
811     double *msgBytes = data+7;  
812     double *grainsize = data+11;  
813
814     const double i = t->idleRatio();
815     idle[0] = i;
816     idle[1] = i;
817     idle[2] = i;
818
819     const double o = t->overheadRatio();
820     over[0] = o;
821     over[1] = o;
822     over[2] = o;
823
824     const double m = t->memoryUsageMB();
825     mem[0] = m;
826
827     msgBytes[0] = t->b2;
828     msgBytes[1] = t->b3;
829     msgBytes[2] = t->b2mlen;
830     msgBytes[3] = t->b3mlen;
831
832     grainsize[0] = t->grainSize();
833     
834     localControlPointTracingInstance()->resetAll();
835
836     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
837   }
838   
839   /// All processors reduce their memory usages in requestIdleTime() to this method
840   void controlPointManager::gatherAll(CkReductionMsg *msg){
841     CkAssert(enableCPTracing);
842
843     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
844     int size=msg->getSize() / sizeof(double);
845     double *data=(double *) msg->getData();
846         
847     double *idle = data;
848     double *over = data+3;
849     double *mem = data+6;
850     double *msgBytes = data+7;
851     double *granularity = data+11;
852
853
854     //    std::string b = allData.toString();
855
856     instrumentedPhase* prevPhase = previousPhaseData();
857     if(prevPhase != NULL){
858       prevPhase->idleTime.min = idle[0];
859       prevPhase->idleTime.avg = idle[1]/CkNumPes();
860       prevPhase->idleTime.max = idle[2];
861       
862       prevPhase->overheadTime.min = over[0];
863       prevPhase->overheadTime.avg = over[1]/CkNumPes();
864       prevPhase->overheadTime.max = over[2];
865       
866       prevPhase->memoryUsageMB = mem[0];
867       
868       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
869
870       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
871       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
872
873       /** The average of the grain sizes on all PEs in us */
874       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
875
876       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
877       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
878
879       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
880       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
881
882       if(bytesPerInvoke2 > bytesPerInvoke3)
883         prevPhase->bytesPerInvoke = bytesPerInvoke2;
884       else
885         prevPhase->bytesPerInvoke = bytesPerInvoke3;
886
887       //      prevPhase->print();
888       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
889
890       //      std::string a = allData.toString();
891
892       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
893       
894     } else {
895       CkPrintf("There is no previous phase to store measurements\n");
896     }
897     
898     alreadyRequestedAll = false;
899     checkForShutdown();
900     delete msg;
901   }
902
903
904
905
906   void controlPointManager::checkForShutdown(){
907     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
908       doExitNow();
909     }
910   }
911
912
913   void controlPointManager::exitIfReady(){
914      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
915        //  CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
916        doExitNow();
917      } else {
918        // CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
919        exitWhenReady = true;
920      }
921   }
922
923
924
925   void controlPointManager::doExitNow(){
926           _TRACE_BEGIN_EXECUTE_DETAILED(-1, -1, _threadEP,CkMyPe(), 0, NULL, this);
927           writeOutputToDisk();
928           //      CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
929           CkExit();
930   }
931
932   void controlPointManager::writeOutputToDisk(){
933           if(writeDataFileAtShutdown){
934                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
935           }
936   }
937
938
939   /// Entry method called on all PEs to request memory usage
940   void controlPointManager::requestMemoryUsage(CkCallback cb){
941     int m = CmiMaxMemoryUsage() / 1024 / 1024;
942     CmiResetMaxMemory();
943     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
944     contribute(sizeof(int),&m,CkReduction::max_int, cb);
945   }
946
947   /// All processors reduce their memory usages to this method
948   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
949     int size=msg->getSize() / sizeof(int);
950     CkAssert(size==1);
951     int *m=(int *) msg->getData();
952
953     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
954     
955     instrumentedPhase* prevPhase = previousPhaseData();
956     if(prevPhase != NULL){
957       prevPhase->memoryUsageMB = m[0];
958     } else {
959       CkPrintf("No place to store memory usage");
960     }
961
962     alreadyRequestedMemoryUsage = false;
963     checkForShutdown();
964     delete msg;
965   }
966
967
968 //   /// Inform the control point framework that a named control point affects the priorities of some array  
969 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
970 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
971     
972 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
973 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
974 //     } else {
975 //       std::set<int> s;
976 //       s.insert(groupIdx);
977 //       affectsPrioritiesArray[std::string(name)] = s;
978 //     }
979     
980 // #if DEBUGPRINT   
981 //     std::map<std::string, std::set<int> >::iterator f;
982 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
983 //       std::string name = f->first;
984 //       std::set<int> &vals = f->second;
985 //       cout << "Control point " << name << " affects arrays: ";
986 //       std::set<int>::iterator i;
987 //       for(i=vals.begin(); i!=vals.end();++i){
988 //      cout << *i << " ";
989 //       }
990 //       cout << endl;
991 //     }
992 // #endif
993     
994 //   }
995   
996 //   /// Inform the control point framework that a named control point affects the priority of some entry method
997 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
998 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
999
1000 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
1001 //       affectsPrioritiesEP[std::string(name)].insert(idx);
1002 //     } else {
1003 //       std::set<int> s;
1004 //       s.insert(idx);
1005 //       affectsPrioritiesEP[std::string(name)] = s;
1006 //     }
1007     
1008 // #if DEBUGPRINT
1009 //     std::map<std::string, std::set<int> >::iterator f;
1010 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1011 //       std::string name = f->first;
1012 //       std::set<int> &vals = f->second;
1013 //       cout << "Control point " << name << " affects EP: ";
1014 //       std::set<int>::iterator i;
1015 //       for(i=vals.begin(); i!=vals.end();++i){
1016 //      cout << *i << " ";
1017 //       }
1018 //       cout << endl;
1019 //     }
1020 // #endif
1021
1022
1023 //   }
1024   
1025
1026
1027 /// An interface callable by the application.
1028 void gotoNextPhase(){
1029   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1030 }
1031
1032 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1033 {
1034   gotoNextPhase();
1035 }
1036
1037
1038 /// A mainchare that is used just to create our controlPointManager group at startup
1039 class controlPointMain : public CBase_controlPointMain {
1040 public:
1041   controlPointMain(CkArgMsg* args){
1042 #if OLDRANDSEED
1043     struct timeval tp;
1044     gettimeofday(& tp, NULL);
1045     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1046 #else
1047     double time = CmiWallTimer();
1048     int sec = (int)time;
1049     int usec = (int)((time - (double)sec)*1000000.0);
1050     random_seed =  sec ^ usec;
1051 #endif
1052     
1053     
1054     double period, periodms;
1055     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1056     bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
1057     if(haveSamplePeriod){
1058       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1059       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1060     } else if(haveSamplePeriodMs){
1061       CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
1062       controlPointSamplePeriod = periodms; /**< A readonly */
1063     } else {
1064       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1065     }
1066
1067   
1068     
1069     whichTuningScheme = RandomSelection;
1070
1071
1072     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1073       whichTuningScheme = RandomSelection;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1075       whichTuningScheme = ExhaustiveSearch;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1077       whichTuningScheme = AlwaysDefaults;
1078     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1079       whichTuningScheme = SimulatedAnnealing;
1080     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1081       whichTuningScheme = CriticalPathAutoPrioritization;
1082     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1083       whichTuningScheme = UseBestKnownTiming;
1084     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1085       whichTuningScheme = UseSteering;
1086     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1087       whichTuningScheme = MemoryAware;
1088     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1089       whichTuningScheme = Simplex;
1090     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1091       whichTuningScheme = DivideAndConquer;
1092     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1093       whichTuningScheme = LDBPeriod;
1094     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1095       whichTuningScheme = LDBPeriodLinear;
1096     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1097       whichTuningScheme = LDBPeriodQuadratic;
1098     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1099       whichTuningScheme = LDBPeriodOptimal;
1100     }
1101
1102     char *defValStr = NULL;
1103     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1104       CkPrintf("You specified default value string: %s\n", defValStr);
1105       
1106       // Parse the string, looking for commas
1107      
1108
1109       char *tok = strtok(defValStr, ",");
1110       while (tok) {
1111         // split on the equal sign
1112         int len = strlen(tok);
1113         char *eqsign = strchr(tok, '=');
1114         if(eqsign != NULL && eqsign != tok){
1115           *eqsign = '\0';
1116           char *cpname = tok;
1117           std::string cpName(tok);
1118           char *cpDefVal = eqsign+1;      
1119           int v=-1;
1120           if(sscanf(cpDefVal, "%d", &v) == 1){
1121             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1122             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1123             defaultControlPointValues[cpName] = v;
1124           }
1125         }
1126         tok = strtok(NULL, ",");
1127       }
1128
1129     }
1130
1131     shouldGatherAll = false;
1132     shouldGatherMemoryUsage = false;
1133     shouldGatherUtilization = false;
1134     
1135     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1136       shouldGatherAll = true;
1137     } else {
1138       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1139         shouldGatherMemoryUsage = true;
1140       }
1141       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1142         shouldGatherUtilization = true;
1143       }
1144     }
1145     
1146     writeDataFileAtShutdown = false;   
1147     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1148       writeDataFileAtShutdown = true;
1149     }
1150
1151     shouldFilterOutputData = true;
1152     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1153       shouldFilterOutputData = false;
1154     }
1155
1156
1157    loadDataFileAtStartup = false;   
1158     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1159       loadDataFileAtStartup = true;
1160     }
1161
1162     char *cpdatafile;
1163     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1164       sprintf(CPDataFilename, "%s", cpdatafile);
1165     } else {
1166       sprintf(CPDataFilename, "controlPointData.txt");
1167     }
1168
1169
1170     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1171
1172     delete args;
1173   }
1174   ~controlPointMain(){}
1175 };
1176
1177 /// An interface callable by the application.
1178 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1179   CkAssert(CkMyPe() == 0);
1180   CkPrintf("Application has registered a control point change callback\n");
1181   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1182 }
1183
1184 /// An interface callable by the application.
1185 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1186   if(CkMyPe() == 0){
1187     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1188     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1189   }
1190 }
1191
1192 /// An interface callable by the application.
1193 void registerControlPointTiming(double time){
1194   CkAssert(CkMyPe() == 0);
1195 #if DEBUGPRINT>0
1196   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1197 #endif
1198   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1199 }
1200
1201 /// An interface callable by the application.
1202 void controlPointTimingStamp() {
1203   CkAssert(CkMyPe() == 0);
1204 #if DEBUGPRINT>0
1205   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1206 #endif
1207   
1208   static double prev_time = 0.0;
1209   double t = CmiWallTimer();
1210   double duration = t - prev_time;
1211   prev_time = t;
1212     
1213   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1214 }
1215
1216 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1217 {
1218   controlPointTimingStamp();
1219 }
1220
1221
1222 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value) 
1223 {
1224   setFrameworkAdvancePhase(*value);
1225 }
1226
1227
1228
1229
1230 /// Shutdown the control point framework, writing data to disk if necessary
1231 extern "C" void controlPointShutdown(){
1232   if(CkMyPe() == 0){
1233
1234     // wait for gathering of idle time & memory usage to complete
1235     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1236
1237   }
1238 }
1239
1240 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1241 void controlPointInitNode(){
1242   registerExitFn(controlPointShutdown);
1243 }
1244
1245 /// Called periodically to allow control point framework to do things periodically
1246 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1247 #ifdef DEBUGPRINT
1248   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1249 #endif
1250   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1251   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1252 }
1253
1254
1255
1256
1257
1258 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1259 /// user observed characteristic to adapt specific control point values.
1260 /// This function must return valid values for newControlPoints.
1261 void controlPointManager::generatePlan() {
1262   const double startGenerateTime = CmiWallTimer();
1263   const int phase_id = this->phase_id;
1264   const int effective_phase = allData.phases.size();
1265
1266   // Only generate a plan once per phase
1267   if(generatedPlanForStep == phase_id) 
1268     return;
1269   generatedPlanForStep = phase_id;
1270  
1271   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1272   printTuningScheme();
1273
1274   // By default lets put the previous phase data into newControlPoints
1275   instrumentedPhase *prevPhase = previousPhaseData();
1276   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1277           newControlPoints[cpsIter->first] = cpsIter->second;
1278   }
1279
1280
1281   if( whichTuningScheme == RandomSelection ){
1282     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1283     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1284       const std::string &name = cpsIter->first;
1285       const std::pair<int,int> &bounds = cpsIter->second;
1286       const int lb = bounds.first;
1287       const int ub = bounds.second;
1288       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1289     }
1290   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1291     // -----------------------------------------------------------
1292     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1293     //   
1294     // This scheme will return the median value for the range 
1295     // early on, and then will transition over to the new control points
1296     // determined by the critical path adapting code
1297
1298     // This won't work until the periodic function is fixed up a bit
1299
1300     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1301     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1302       const std::string &name = cpsIter->first;
1303       const std::pair<int,int> &bounds = cpsIter->second;
1304       const int lb = bounds.first;
1305       const int ub = bounds.second;
1306       newControlPoints[name] =  (lb+ub)/2;
1307     }
1308
1309   } else if ( whichTuningScheme == MemoryAware ) {
1310
1311     // -----------------------------------------------------------
1312     //  STEERING BASED ON MEMORY USAGE
1313
1314     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1315     instrumentedPhase *prevPhase = previousPhaseData();
1316  
1317     if(phase_id%2 == 0){
1318       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1319       twoAgoPhase->print();
1320       CkPrintf("\n");
1321       fflush(stdout);
1322       
1323       // See if memory usage is low:
1324       double memUsage = twoAgoPhase->memoryUsageMB;
1325       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1326       fflush(stdout);
1327       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1328         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1329         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1330         
1331         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1332         newControlPoints = twoAgoPhase->controlPoints;
1333
1334
1335         CkPrintf("Steering (memory based) initialized plan\n");
1336         fflush(stdout);
1337
1338         // look for a possible control point knob to turn
1339         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1340         
1341         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1342         bool found = false;
1343         std::string cpName;
1344         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1345         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1346         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1347           cpName = iter->first;
1348           info = &iter->second;
1349           found = true;
1350           break;
1351         }
1352
1353         // Adapt the control point value
1354         if(found){
1355           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1356           fflush(stdout);
1357           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1358           const int maxValue = controlPointSpace[cpName].second;
1359           
1360           if(twoAgoValue+1 <= maxValue){
1361             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1362           }
1363         }
1364         
1365       }
1366     }
1367
1368     CkPrintf("Steering (memory based) done for this phase\n");
1369     fflush(stdout);
1370
1371   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1372
1373     // -----------------------------------------------------------
1374     //  USE BEST KNOWN TIME  
1375     static bool firstTime = true;
1376     if(firstTime){
1377       firstTime = false;
1378       instrumentedPhase *best = allData.findBest(); 
1379       CkPrintf("Best known phase is:\n");
1380       best->print();
1381       CkPrintf("\n");
1382       
1383       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1384       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1385         const std::string &name = cpsIter->first;
1386         newControlPoints[name] =  best->controlPoints[name];
1387       }
1388     }
1389   } else if( whichTuningScheme == LDBPeriod) {
1390     // Assume this is used in this manner:
1391     //  1) go to next phase
1392     //  2) request control point
1393     //  3) load balancing
1394     //  4) computation
1395     
1396     
1397     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1398     instrumentedPhase *prevPhase = previousPhaseData();
1399     
1400     
1401     const std::vector<double> &times = twoAgoPhase->times;
1402     const int oldNumTimings = times.size();
1403
1404
1405     const std::vector<double> &timesNew = prevPhase->times;
1406     const int newNumTimings = timesNew.size();
1407
1408
1409     if(oldNumTimings > 4 && newNumTimings > 4){
1410       
1411       // Build model of execution time based on two phases ago
1412       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1413       
1414       double oldSum = 0;
1415       
1416       for(int i=2; i<oldNumTimings; i++){
1417         oldSum += times[i];
1418       }
1419       
1420       const double oldAvg = oldSum / (oldNumTimings-2);
1421       
1422       
1423       
1424       
1425       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1426       const double expectedTotalTime = oldAvg * newNumTimings;
1427       
1428       
1429       // Measure actual time
1430       double newSum = 0.0;
1431       for(int i=2; i<newNumTimings; ++i){
1432         newSum += timesNew[i];
1433       }
1434       
1435       const double newAvg = newSum / (newNumTimings-2);
1436       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1437       
1438       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1439       
1440       // Determine load balance cost
1441       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1442       
1443       const double benefitAfterLB = benefit - lbcost;
1444     
1445     
1446       // Determine whether LB cost outweights the estimated benefit
1447       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1448     
1449     
1450       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1451       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1452         const std::string &name = cpsIter->first;
1453         const std::pair<int,int> &bounds = cpsIter->second;
1454         const int lb = bounds.first;
1455         const int ub = bounds.second;
1456       
1457         if(benefitAfterLB > 0){
1458           CkPrintf("Constant Model: Beneficial LB\n");
1459           int newval = newControlPoints[name] / 2;
1460           if(newval > lb)
1461             newControlPoints[name] = newval;
1462           else 
1463             newControlPoints[name] = lb;
1464         } else {
1465           CkPrintf("Constant Model: Detrimental LB\n");
1466           int newval = newControlPoints[name] * 2;
1467           if(newval < ub)
1468             newControlPoints[name] = newval;
1469           else
1470             newControlPoints[name] = ub;
1471         }
1472       }
1473     }
1474     
1475     
1476   }  else if( whichTuningScheme == LDBPeriodLinear) {
1477     // Assume this is used in this manner:
1478     //  1) go to next phase
1479     //  2) request control point
1480     //  3) load balancing
1481     //  4) computation
1482
1483
1484     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1485     instrumentedPhase *prevPhase = previousPhaseData();
1486     
1487     const std::vector<double> &times = twoAgoPhase->times;
1488     const int oldNumTimings = times.size();
1489
1490     const std::vector<double> &timesNew = prevPhase->times;
1491     const int newNumTimings = timesNew.size();
1492     
1493
1494     if(oldNumTimings > 4 && newNumTimings > 4){
1495
1496       // Build model of execution time based on two phases ago
1497       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1498       const int b1 = 2 + (oldNumTimings-2)/2;
1499       double s1 = 0;
1500       double s2 = 0;
1501     
1502       const double ldbStepsTime = times[0] + times[1];
1503     
1504       for(int i=2; i<b1; i++){
1505         s1 += times[i];
1506       }
1507       for(int i=b1; i<oldNumTimings; i++){
1508         s2 += times[i];
1509       }
1510       
1511       
1512       // Compute the estimated time for the last phase's data
1513     
1514       const double a1 = s1 / (double)(b1-2);
1515       const double a2 = s2 / (double)(oldNumTimings-b1);
1516       const double aold = (a1+a2)/2.0;
1517
1518       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1519         
1520     
1521       // Measure actual time
1522       double sum = 0.0;
1523       for(int i=2; i<newNumTimings; ++i){
1524         sum += timesNew[i];
1525       }
1526
1527       const double avg = sum / ((double)(newNumTimings-2));
1528       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1529
1530       const double benefit = expectedTotalTime - actualTotalTime;
1531
1532       // Determine load balance cost
1533       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1534
1535       const double benefitAfterLB = benefit - lbcost;
1536
1537     
1538       // Determine whether LB cost outweights the estimated benefit
1539       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1540     
1541     
1542     
1543       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1544       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1545         const std::string &name = cpsIter->first;
1546         const std::pair<int,int> &bounds = cpsIter->second;
1547         const int lb = bounds.first;
1548         const int ub = bounds.second;
1549       
1550         if(benefitAfterLB > 0){
1551           CkPrintf("Linear Model: Beneficial LB\n");
1552           int newval = newControlPoints[name] / 2;
1553           if(newval > lb)
1554             newControlPoints[name] = newval;
1555           else 
1556             newControlPoints[name] = lb;
1557         } else {
1558           CkPrintf("Linear Model: Detrimental LB\n");
1559           int newval = newControlPoints[name] * 2;
1560           if(newval < ub)
1561             newControlPoints[name] = newval;
1562           else 
1563             newControlPoints[name] = ub;
1564         }
1565       }
1566     }
1567
1568   }
1569
1570   else if( whichTuningScheme == LDBPeriodQuadratic) {
1571     // Assume this is used in this manner:
1572     //  1) go to next phase
1573     //  2) request control point
1574     //  3) load balancing
1575     //  4) computation
1576
1577
1578     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1579     instrumentedPhase *prevPhase = previousPhaseData();
1580         
1581     const std::vector<double> &times = twoAgoPhase->times;
1582     const int oldNumTimings = times.size();
1583
1584     const std::vector<double> &timesNew = prevPhase->times;
1585     const int newNumTimings = timesNew.size();
1586
1587     
1588     if(oldNumTimings > 4 && newNumTimings > 4){
1589
1590
1591       // Build model of execution time based on two phases ago
1592       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1593       const int b1 = 2 + (oldNumTimings-2)/3;
1594       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1595       double s1 = 0;
1596       double s2 = 0;
1597       double s3 = 0;
1598
1599       const double ldbStepsTime = times[0] + times[1];
1600     
1601       for(int i=2; i<b1; i++){
1602         s1 += times[i];
1603       }
1604       for(int i=b1; i<b2; i++){
1605         s2 += times[i];
1606       }
1607       for(int i=b2; i<oldNumTimings; i++){
1608         s3 += times[i];
1609       }
1610
1611     
1612       // Compute the estimated time for the last phase's data
1613     
1614       const double a1 = s1 / (double)(b1-2);
1615       const double a2 = s2 / (double)(b2-b1);
1616       const double a3 = s3 / (double)(oldNumTimings-b2);
1617     
1618       const double a = (a1-2.0*a2+a3)/2.0;
1619       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1620       const double c = a3;
1621     
1622       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1623       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1624       const double x2 = 0.5;
1625       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1626    
1627     
1628       // Measure actual time
1629       double sum = 0.0;
1630       for(int i=2; i<newNumTimings; ++i){
1631         sum += timesNew[i];
1632       }
1633
1634       const double avg = sum / ((double)(newNumTimings-2));
1635       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1636
1637       const double benefit = expectedTotalTime - actualTotalTime;
1638
1639       // Determine load balance cost
1640       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1641
1642       const double benefitAfterLB = benefit - lbcost;
1643
1644     
1645       // Determine whether LB cost outweights the estimated benefit
1646       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1647     
1648     
1649     
1650       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1651       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1652         const std::string &name = cpsIter->first;
1653         const std::pair<int,int> &bounds = cpsIter->second;
1654         const int lb = bounds.first;
1655         const int ub = bounds.second;
1656       
1657         if(benefitAfterLB > 0){
1658           CkPrintf("QuadraticModel: Beneficial LB\n");
1659           int newval = newControlPoints[name] / 2;
1660           if(newval > lb)
1661             newControlPoints[name] = newval;
1662           else 
1663             newControlPoints[name] = lb;
1664         } else {
1665           CkPrintf("QuadraticModel: Detrimental LB\n");
1666           int newval = newControlPoints[name] * 2;
1667           if(newval < ub)
1668             newControlPoints[name] = newval;
1669           else 
1670             newControlPoints[name] = ub;
1671         }
1672       
1673       }
1674     }
1675     
1676
1677   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1678     // Assume this is used in this manner:
1679     //  1) go to next phase
1680     //  2) request control point
1681     //  3) load balancing
1682     //  4) computation
1683
1684
1685
1686     instrumentedPhase *prevPhase = previousPhaseData();
1687     
1688     const std::vector<double> &times = prevPhase->times;
1689     const int numTimings = times.size();
1690     
1691     if( numTimings > 4){
1692
1693       const int b1 = 2 + (numTimings-2)/2;
1694       double s1 = 0;
1695       double s2 = 0;
1696     
1697     
1698       for(int i=2; i<b1; i++){
1699         s1 += times[i];
1700       }
1701       for(int i=b1; i<numTimings; i++){
1702         s2 += times[i];
1703       }
1704       
1705     
1706       const double a1 = s1 / (double)(b1-2);
1707       const double a2 = s2 / (double)(numTimings-b1);
1708       const double avg = (a1+a1) / 2.0;
1709
1710       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1711
1712       const double ldbStepsTime = times[0] + times[1];
1713       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1714       
1715
1716       int newval = roundDouble(sqrt(2.0*lbcost/m));
1717       
1718       // We don't really know what to do if m<=0, so we'll just double the period
1719       if(m<=0)
1720         newval = 2*numTimings;     
1721       
1722       CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1723     
1724     
1725       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1726       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1727         // TODO: lookup only control points that are relevant instead of all of them
1728         const std::string &name = cpsIter->first;
1729         const std::pair<int,int> &bounds = cpsIter->second;
1730         const int lb = bounds.first;
1731         const int ub = bounds.second;
1732         
1733         if(newval < lb){
1734           newControlPoints[name] = lb;
1735         } else if(newval > ub){
1736           newControlPoints[name] = ub;
1737         } else {
1738           newControlPoints[name] = newval;
1739         } 
1740         
1741       }
1742       
1743       
1744     }
1745     
1746  
1747
1748   } else if ( whichTuningScheme == UseSteering ) {
1749           // -----------------------------------------------------------
1750           //  STEERING BASED ON KNOWLEDGE
1751
1752           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1753           // plans are only generated after 3 phases
1754
1755           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1756           instrumentedPhase *prevPhase = previousPhaseData();
1757
1758           if(phase_id%4 == 0){
1759                   CkPrintf("Steering based on 2 phases ago:\n");
1760                   twoAgoPhase->print();
1761                   CkPrintf("\n");
1762                   fflush(stdout);
1763
1764                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1765
1766
1767                   // ========================================= Concurrency =============================================
1768                   // See if idle time is high:
1769                   {
1770                           double idleTime = twoAgoPhase->idleTime.avg;
1771                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1772                           fflush(stdout);
1773                           if(idleTime > 0.10){
1774                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1775                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1776
1777                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1778
1779                                   bool found = false;
1780                                   std::string cpName;
1781                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1782                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1783                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1784                                           cpName = iter->first;
1785                                           info = &iter->second;
1786
1787                                           // Initialize a new plan based on two phases ago
1788                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1789
1790                                           CkPrintf("Steering found knob to turn\n");
1791                                           fflush(stdout);
1792
1793                                           if(info->first == ControlPoint::EFF_INC){
1794                                                   const int maxValue = controlPointSpace[cpName].second;
1795                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1796                                                   if(twoAgoValue+1 <= maxValue){
1797                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1798                                                   }
1799                                           } else {
1800                                                   const int minValue = controlPointSpace[cpName].second;
1801                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1802                                                   if(twoAgoValue-1 >= minValue){
1803                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1804                                                   }
1805                                           }
1806
1807                                           possibleNextStepPlans.push_back(aNewPlan);
1808
1809                                   }
1810                           }
1811                   }
1812
1813                   // ========================================= Grain Size =============================================
1814                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1815                   {
1816                           double overheadTime = twoAgoPhase->overheadTime.avg;
1817                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1818                           fflush(stdout);
1819                           if(overheadTime > 0.10){
1820                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1821                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1822
1823                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1824
1825                                   bool found = false;
1826                                   std::string cpName;
1827                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1828                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1829                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1830                                           cpName = iter->first;
1831                                           info = &iter->second;
1832
1833                                           // Initialize a new plan based on two phases ago
1834                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1835
1836
1837
1838                                           CkPrintf("Steering found knob to turn\n");
1839                                           fflush(stdout);
1840
1841                                           if(info->first == ControlPoint::EFF_INC){
1842                                                   const int maxValue = controlPointSpace[cpName].second;
1843                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1844                                                   if(twoAgoValue+1 <= maxValue){
1845                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1846                                                   }
1847                                           } else {
1848                                                   const int minValue = controlPointSpace[cpName].second;
1849                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1850                                                   if(twoAgoValue-1 >= minValue){
1851                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1852                                                   }
1853                                           }
1854
1855                                           possibleNextStepPlans.push_back(aNewPlan);
1856
1857                                   }
1858
1859                           }
1860                   }
1861                   // ========================================= GPU Offload =============================================
1862                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1863                   {
1864                           double idleTime = twoAgoPhase->idleTime.avg;
1865                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1866                           fflush(stdout);
1867                           if(idleTime > 0.10){
1868                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1869                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1870
1871                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1872
1873                                   bool found = false;
1874                                   std::string cpName;
1875                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1876                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1877                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1878                                           cpName = iter->first;
1879                                           info = &iter->second;
1880
1881                                           // Initialize a new plan based on two phases ago
1882                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1883
1884
1885                                           CkPrintf("Steering found knob to turn\n");
1886                                           fflush(stdout);
1887
1888                                           if(info->first == ControlPoint::EFF_DEC){
1889                                                   const int maxValue = controlPointSpace[cpName].second;
1890                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1891                                                   if(twoAgoValue+1 <= maxValue){
1892                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1893                                                   }
1894                                           } else {
1895                                                   const int minValue = controlPointSpace[cpName].second;
1896                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1897                                                   if(twoAgoValue-1 >= minValue){
1898                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1899                                                   }
1900                                           }
1901
1902                                           possibleNextStepPlans.push_back(aNewPlan);
1903
1904                                   }
1905
1906                           }
1907                   }
1908
1909                   // ========================================= Done =============================================
1910
1911
1912                   if(possibleNextStepPlans.size() > 0){
1913                           newControlPoints = possibleNextStepPlans[0];
1914                   }
1915
1916
1917                   CkPrintf("Steering done for this phase\n");
1918                   fflush(stdout);
1919
1920           }  else {
1921                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1922                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1923                   fflush(stdout);
1924           }
1925
1926
1927
1928   } else if( whichTuningScheme == SimulatedAnnealing ) {
1929
1930     // -----------------------------------------------------------
1931     //  SIMULATED ANNEALING
1932     //  Simulated Annealing style hill climbing method
1933     //
1934     //  Find the best search space configuration, and try something
1935     //  nearby it, with a radius decreasing as phases increase
1936
1937     CkPrintf("Finding best phase\n");
1938     instrumentedPhase *bestPhase = allData.findBest();  
1939     CkPrintf("best found:\n"); 
1940     bestPhase->print(); 
1941     CkPrintf("\n"); 
1942     
1943
1944     const int convergeByPhase = 100;
1945     // Determine from 0.0 to 1.0 how far along we are
1946     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1947
1948     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1949     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1950       const std::string &name = cpsIter->first;
1951       const std::pair<int,int> &bounds = cpsIter->second;
1952       const int minValue = bounds.first;
1953       const int maxValue = bounds.second;
1954       
1955       const int before = bestPhase->controlPoints[name];   
1956   
1957       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1958
1959       int high = min(before+range, maxValue);
1960       int low = max(before-range, minValue);
1961       
1962       newControlPoints[name] = low;
1963       if(high-low > 0){
1964         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1965       } 
1966       
1967     }
1968
1969   } else if ( whichTuningScheme == DivideAndConquer ) {
1970
1971           // -----------------------------------------------------------
1972           //  STEERING FOR Divide & Conquer Programs
1973           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1974           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1975           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1976
1977           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1978           // plans are only generated after 3 phases
1979
1980           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1981           instrumentedPhase *prevPhase = previousPhaseData();
1982
1983           if(phase_id%4 == 0){
1984                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1985                   twoAgoPhase->print();
1986                   CkPrintf("\n");
1987                   fflush(stdout);
1988
1989                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1990
1991
1992                   // ========================================= Concurrency =============================================
1993                   // See if idle time is high:
1994                   {
1995                           double idleTime = twoAgoPhase->idleTime.avg;
1996                           double overheadTime = twoAgoPhase->overheadTime.avg;
1997
1998
1999                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
2000                           fflush(stdout);
2001                           if(idleTime+overheadTime > 0.10){
2002                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
2003                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
2004
2005                                   int direction = -1;
2006                                   if (idleTime>overheadTime){
2007                                           // We need to decrease the grain size, or increase the available concurrency
2008                                           direction = 1;
2009                                   }
2010
2011                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
2012
2013                                   bool found = false;
2014                                   std::string cpName;
2015                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2016                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2017                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2018                                           cpName = iter->first;
2019                                           info = &iter->second;
2020                                           
2021                                           // Initialize a new plan based on two phases ago
2022                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2023
2024                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2025                                           fflush(stdout);
2026
2027                                           int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
2028                                           
2029                                           if(info->first == ControlPoint::EFF_INC){
2030                                             const int minValue = controlPointSpace[cpName].first;
2031                                             const int maxValue = controlPointSpace[cpName].second;
2032                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2033                                             const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);                                          
2034                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2035                                             aNewPlan[cpName] = newVal;
2036                                           } else {
2037                                             const int minValue = controlPointSpace[cpName].first;
2038                                             const int maxValue = controlPointSpace[cpName].second;
2039                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2040                                             const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
2041                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2042                                             aNewPlan[cpName] = newVal;
2043                                           }
2044                                           
2045                                           possibleNextStepPlans.push_back(aNewPlan);
2046                                   }
2047                           }
2048                   }
2049
2050                   if(possibleNextStepPlans.size() > 0){
2051                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2052                     newControlPoints = possibleNextStepPlans[0];
2053                   } else {
2054                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2055                   }
2056           }
2057
2058   } else if( whichTuningScheme == Simplex ) {
2059
2060           // -----------------------------------------------------------
2061           //  Nelder Mead Simplex Algorithm
2062           //
2063           //  A scheme that takes a simplex (n+1 points) and moves it
2064           //  toward the minimum, eventually converging there.
2065
2066           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2067
2068   } else if( whichTuningScheme == ExhaustiveSearch ) {
2069     
2070     // -----------------------------------------------------------
2071     // EXHAUSTIVE SEARCH
2072    
2073     int numDimensions = controlPointSpace.size();
2074     CkAssert(numDimensions > 0);
2075   
2076     vector<int> lowerBounds(numDimensions);
2077     vector<int> upperBounds(numDimensions); 
2078   
2079     int d=0;
2080     std::map<std::string, pair<int,int> >::iterator iter;
2081     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2082       //    CkPrintf("Examining dimension %d\n", d);
2083       lowerBounds[d] = iter->second.first;
2084       upperBounds[d] = iter->second.second;
2085       d++;
2086     }
2087    
2088     // get names for each dimension (control point)
2089     vector<std::string> names(numDimensions);
2090     d=0;
2091     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2092       names[d] = niter->first;
2093       d++;
2094     }
2095   
2096   
2097     // Create the first possible configuration
2098     vector<int> config = lowerBounds;
2099     config.push_back(0);
2100   
2101     // Increment until finding an unused configuration
2102     allData.cleanupNames(); // put -1 values in for any control points missing
2103     std::vector<instrumentedPhase*> &phases = allData.phases;     
2104
2105     while(true){
2106     
2107       std::vector<instrumentedPhase*>::iterator piter; 
2108       bool testedConfiguration = false; 
2109       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2110       
2111         // Test if the configuration matches this phase
2112         bool match = true;
2113         for(int j=0;j<numDimensions;j++){
2114           match &= (*piter)->controlPoints[names[j]] == config[j];
2115         }
2116       
2117         if(match){
2118           testedConfiguration = true; 
2119           break;
2120         } 
2121       } 
2122     
2123       if(testedConfiguration == false){ 
2124         break;
2125       } 
2126     
2127       // go to next configuration
2128       config[0] ++;
2129       // Propagate "carrys"
2130       for(int i=0;i<numDimensions;i++){
2131         if(config[i] > upperBounds[i]){
2132           config[i+1]++;
2133           config[i] = lowerBounds[i];
2134         }
2135       }
2136       // check if we have exhausted all possible values
2137       if(config[numDimensions] > 0){
2138         break;
2139       }
2140        
2141     }
2142   
2143     if(config[numDimensions] > 0){
2144       for(int i=0;i<numDimensions;i++){ 
2145         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2146       }
2147     }
2148
2149     // results are now in config[i]
2150     
2151     for(int i=0; i<numDimensions; i++){
2152       newControlPoints[names[i]] = config[i];
2153       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2154     }
2155
2156
2157   } else {
2158     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2159   }
2160
2161
2162   const double endGenerateTime = CmiWallTimer();
2163   
2164   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2165   
2166 }
2167
2168
2169
2170
2171
2172 /// Get control point value from range of integers [lb,ub]
2173 int controlPoint(const char *name, int lb, int ub){
2174   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2175   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2176   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2177   int result;
2178
2179   // if we already have control point values for phase, return them
2180   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2181     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2182     return thisPhaseData->controlPoints[std::string(name)];
2183   }
2184   
2185
2186   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2187     // For the first few phases, just use the lower bound, or the default if one was provided 
2188     // This ensures that the ranges for all the control points are known before we do anything fancy
2189     result = lb;
2190
2191     if(defaultControlPointValues.count(std::string(name)) > 0){
2192       int v = defaultControlPointValues[std::string(name)];
2193       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2194       result = v;
2195     }
2196
2197   } else if(controlPointSpace.count(std::string(name)) == 0){
2198     // if this is the first time we've seen the CP, then return the lower bound
2199     result = lb;
2200     
2201   }  else {
2202     // Generate a plan one time for each phase
2203     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2204     
2205     // Use the planned value:
2206     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2207     
2208   }
2209
2210   if(!isInRange(result,ub,lb)){
2211     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2212     fflush(stdout);
2213     fflush(stderr);
2214   }
2215   CkAssert(isInRange(result,ub,lb));
2216   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2217
2218   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2219
2220   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2221   //  thisPhaseData->print();
2222   
2223   return result;
2224 }
2225
2226
2227 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2228   CkAssert(CkMyPe() == 0);
2229   return controlPoint("FortranCP", *lb, *ub);
2230 }
2231
2232
2233
2234
2235 /// Inform the control point framework that a named control point affects the priorities of some array  
2236 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2237 //   CkGroupID aid = arraybase.ckGetArrayID();
2238 //   int groupIdx = aid.idx;
2239 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2240 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2241 // }
2242
2243
2244 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2245 // void controlPointPriorityEntry(const char *name, int idx){
2246 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2247 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2248 // }
2249
2250
2251
2252
2253
2254 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2255
2256     This function decomposes the algorithm into a state machine that allows it to
2257     evaluate one or more configurations through subsequent clls to this function.
2258     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2259
2260     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2261     so a list of them will be created and they will be evaluated, one per call.
2262
2263     Currently there is no stopping criteria, but the simplex ought to contract down
2264     to a few close configurations, and hence not much change will happen after this 
2265     point.
2266
2267  */
2268 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2269
2270         if(useBestKnown){
2271                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2272                 return;
2273         }
2274
2275
2276         if(firstSimplexPhase< 0){
2277                 firstSimplexPhase = allData.phases.size()-1;
2278                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2279         }
2280
2281         int n = controlPointSpace.size();
2282
2283         CkAssert(n>=2);
2284
2285
2286         if(simplexState == beginning){
2287                 // First we evaluate n+1 random points, then we go to a different state
2288                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2289                         CkPrintf("Simplex Tuning: chose random configuration\n");
2290
2291                         // Choose random values from the middle third of the range in each dimension
2292                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2293                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2294                                 const std::string &name = cpsIter->first;
2295                                 const std::pair<int,int> &bounds = cpsIter->second;
2296                                 const int lb = bounds.first;
2297                                 const int ub = bounds.second;
2298                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2299                         }
2300                 } else {
2301                         // Set initial simplex:
2302                         for(int i=0; i<n+1; i++){
2303                                 simplexIndices.insert(firstSimplexPhase+i);
2304                         }
2305                         CkAssert(simplexIndices.size() == n+1);
2306
2307                         // Transition to reflecting state
2308                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2309
2310                 }
2311
2312         } else if (simplexState == reflecting){
2313                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2314                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2315
2316                 // Find the highest time from other points in the simplex
2317                 double highestTimeForOthersInSimplex = 0.0;
2318                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2319                         double t = allData.phases[*iter]->medianTime();
2320                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2321                                 highestTimeForOthersInSimplex = t;
2322                         }
2323                 }
2324
2325                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2326
2327                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2328                         // if y* < yl,  transition to "expanding"
2329                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2330
2331                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2332                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2333                         CkAssert(simplexIndices.size() == n+1);
2334                         simplexIndices.erase(worstPhase);
2335                         CkAssert(simplexIndices.size() == n);
2336                         simplexIndices.insert(pPhase);
2337                         CkAssert(simplexIndices.size() == n+1);
2338                         // Transition to reflecting state
2339                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2340
2341                 } else {
2342                         // if y* > yh
2343                         if(recentPhaseTime <= worstTime){
2344                                 // replace Ph with P*
2345                                 CkAssert(simplexIndices.size() == n+1);
2346                                 simplexIndices.erase(worstPhase);
2347                                 CkAssert(simplexIndices.size() == n);
2348                                 simplexIndices.insert(pPhase);
2349                                 CkAssert(simplexIndices.size() == n+1);
2350                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2351                                 worstPhase = pPhase;
2352                                 // Just as a sanity check, make sure we don't use the non-updated values.
2353                                 worst.clear();
2354                         }
2355
2356                         // Now, form P** and do contracting phase
2357                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2358
2359                 }
2360
2361         } else if (simplexState == doneExpanding){
2362                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2363                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2364                 // A new configuration has been evaluated
2365
2366                 // Check to see if y** < y1
2367                 if(recentPhaseTime < bestTime){
2368                         // replace Ph by P**
2369                         CkAssert(simplexIndices.size() == n+1);
2370                         simplexIndices.erase(worstPhase);
2371                         CkAssert(simplexIndices.size() == n);
2372                         simplexIndices.insert(p2Phase);
2373                         CkAssert(simplexIndices.size() == n+1);
2374                 } else {
2375                         //      replace Ph by P*
2376                         CkAssert(simplexIndices.size() == n+1);
2377                         simplexIndices.erase(worstPhase);
2378                         CkAssert(simplexIndices.size() == n);
2379                         simplexIndices.insert(pPhase);
2380                         CkAssert(simplexIndices.size() == n+1);
2381                 }
2382
2383                 // Transition to reflecting state
2384                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2385
2386         }  else if (simplexState == contracting){
2387                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2388                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2389                 // A new configuration has been evaluated
2390
2391                 // Check to see if y** > yh
2392                 if(recentPhaseTime <= worstTime){
2393                         // replace Ph by P**
2394                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2395                         CkAssert(simplexIndices.size() == n+1);
2396                         simplexIndices.erase(worstPhase);
2397                         CkAssert(simplexIndices.size() == n);
2398                         simplexIndices.insert(p2Phase);
2399                         CkAssert(simplexIndices.size() == n+1);
2400                         // Transition to reflecting state
2401                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2402
2403                 } else {
2404                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2405                         simplexState = stillContracting;
2406
2407                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2408                         stillMustContractList = simplexIndices;
2409
2410                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2411                 }
2412
2413         } else if (simplexState == stillContracting){
2414                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2415
2416                 if(stillMustContractList.size()>0){
2417                         int c = *stillMustContractList.begin();
2418                         stillMustContractList.erase(c);
2419                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2420
2421                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2422
2423                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2424                         int v = 0;
2425                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2426                                 const std::string &name = cpsIter->first;
2427                                 const std::pair<int,int> &bounds = cpsIter->second;
2428                                 const int lb = bounds.first;
2429                                 const int ub = bounds.second;
2430
2431                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2432
2433                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2434                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2435                                 v++;
2436                         }
2437                 } else {
2438                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2439                         CkAssert(stillMustContractList.size() == 0);
2440                         simplexIndices.clear();
2441                         CkAssert(simplexIndices.size()==0);
2442                         for(int i=0; i<n+1; i++){
2443                                 simplexIndices.insert(allData.phases.size()-2-i);
2444                         }
2445                         CkAssert(simplexIndices.size()==n+1);
2446
2447                         // Transition to reflecting state
2448                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2449
2450                 }
2451
2452
2453         } else if (simplexState == expanding){
2454                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2455                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2456                 // A new configuration has been evaluated
2457
2458                 // determine if y** < yl
2459                 if(recentPhaseTime < bestTime){
2460                         // replace Ph by P**
2461                         CkAssert(simplexIndices.size() == n+1);
2462                         simplexIndices.erase(worstPhase);
2463                         CkAssert(simplexIndices.size() == n);
2464                         simplexIndices.insert(p2Phase);
2465                         CkAssert(simplexIndices.size() == n+1);
2466                         // Transition to reflecting state
2467                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2468
2469                 } else {
2470                         // else, replace ph with p*
2471                         CkAssert(simplexIndices.size() == n+1);
2472                         simplexIndices.erase(worstPhase);
2473                         CkAssert(simplexIndices.size() == n);
2474                         simplexIndices.insert(pPhase);
2475                         CkAssert(simplexIndices.size() == n+1);
2476                         // Transition to reflecting state
2477                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2478                 }
2479
2480
2481         } else {
2482                 CkAbort("Unknown simplexState");
2483         }
2484
2485 }
2486
2487
2488
2489 /** Replace the worst point with its reflection across the centroid. */
2490 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2491
2492         int n = controlPointSpace.size();
2493
2494         printSimplex(allData);
2495
2496         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2497
2498
2499         // Quit if the diameter of our simplex is small
2500         double maxr = 0.0;
2501         for(int i=0; i<n+1; i++){
2502                 //              Compute r^2 of this simplex point from the centroid
2503                 double r2 = 0.0;
2504                 std::vector<double> p = pointCoords(allData, i);
2505                 for(int d=0; d<p.size(); d++){
2506                         double r1 = (p[d] * centroid[d]);
2507                         r2 += r1*r1;
2508                 }
2509                 if(r2 > maxr)
2510                         maxr = r2;
2511         }
2512
2513 #if 0
2514         // At some point we quit this tuning
2515         if(maxr < 10){
2516                 useBestKnown = true;
2517                 instrumentedPhase *best = allData.findBest();
2518                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2519
2520                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2521                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2522                         const std::string &name = cpsIter->first;
2523                         newControlPoints[name] =  best->controlPoints[name];
2524                 }
2525         }
2526 #endif
2527
2528         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2529
2530         pPhase = allData.phases.size()-1;
2531         P.resize(n);
2532         for(int i=0; i<n; i++){
2533                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2534         }
2535
2536         for(int i=0; i<P.size(); i++){
2537                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2538         }
2539
2540
2541         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2542         int v = 0;
2543         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2544                 const std::string &name = cpsIter->first;
2545                 const std::pair<int,int> &bounds = cpsIter->second;
2546                 const int lb = bounds.first;
2547                 const int ub = bounds.second;
2548                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2549                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2550                 v++;
2551         }
2552
2553
2554         // Transition to "reflecting" state
2555         simplexState = reflecting;
2556         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2557
2558 }
2559
2560
2561
2562
2563 /** Replace the newly tested reflection with a further expanded version of itself. */
2564 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2565         int n = controlPointSpace.size();
2566         printSimplex(allData);
2567
2568         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2569         // I believe the equation for P** in the text on page 308 is correct.
2570
2571         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2572
2573
2574         p2Phase = allData.phases.size()-1;
2575         P2.resize(n);
2576         for(int i=0; i<n; i++){
2577                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2578         }
2579
2580         for(int i=0; i<P2.size(); i++){
2581                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2582         }
2583
2584
2585         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2586         int v = 0;
2587         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2588                 const std::string &name = cpsIter->first;
2589                 const std::pair<int,int> &bounds = cpsIter->second;
2590                 const int lb = bounds.first;
2591                 const int ub = bounds.second;
2592                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2593                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2594                 v++;
2595         }
2596
2597
2598         // Transition to "doneExpanding" state
2599         simplexState = doneExpanding;
2600         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2601
2602 }
2603
2604
2605
2606
2607 /** Replace the newly tested reflection with a further expanded version of itself. */
2608 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2609         int n = controlPointSpace.size();
2610         printSimplex(allData);
2611
2612         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2613
2614
2615         p2Phase = allData.phases.size()-1;
2616         P2.resize(n);
2617         for(int i=0; i<n; i++){
2618                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2619         }
2620
2621         for(int i=0; i<P2.size(); i++){
2622                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2623         }
2624
2625
2626         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2627         int v = 0;
2628         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2629                 const std::string &name = cpsIter->first;
2630                 const std::pair<int,int> &bounds = cpsIter->second;
2631                 const int lb = bounds.first;
2632                 const int ub = bounds.second;
2633                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2634                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2635                 v++;
2636         }
2637         
2638         
2639         // Transition to "contracting" state
2640         simplexState = contracting;
2641         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2642
2643 }
2644
2645
2646
2647
2648 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2649         int n = controlPointSpace.size();
2650
2651         // Find worst performing point in the simplex
2652         worstPhase = -1;
2653         worstTime = -1.0;
2654         bestPhase = 10000000;
2655         bestTime = 10000000;
2656         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2657                 double t = allData.phases[*iter]->medianTime();
2658                 if(t > worstTime){
2659                         worstTime = t;
2660                         worstPhase = *iter;
2661                 }
2662                 if(t < bestTime){
2663                         bestTime = t;
2664                         bestPhase = *iter;
2665                 }
2666         }
2667         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2668
2669         best = pointCoords(allData, bestPhase);
2670         CkAssert(best.size() == n);
2671
2672         worst = pointCoords(allData, worstPhase);
2673         CkAssert(worst.size() == n);
2674
2675         // Calculate centroid of the remaining points in the simplex
2676         centroid.resize(n);
2677         for(int i=0; i<n; i++){
2678                 centroid[i] = 0.0;
2679         }
2680
2681         int numPts = 0;
2682
2683         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2684                 if(*iter != worstPhase){
2685                         numPts ++;
2686                         // Accumulate into the result vector
2687                         int c = 0;
2688                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2689                                 centroid[c] += citer->second;
2690                                 c++;
2691                         }
2692
2693                 }
2694         }
2695
2696         // Now divide the sums by the number of points.
2697         for(int v = 0; v<centroid.size(); v++) {
2698                 centroid[v] /= (double)numPts;
2699         }
2700
2701         CkAssert(centroid.size() == n);
2702
2703         for(int i=0; i<centroid.size(); i++){
2704                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2705         }
2706
2707
2708 }
2709
2710
2711
2712 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2713         std::vector<double> result;
2714         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2715                 result.push_back((double)citer->second);
2716         }
2717         return result;
2718 }
2719
2720
2721
2722
2723 void ControlPointWriteOutputToDisk(){
2724         CkAssert(CkMyPe() == 0);
2725         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2726 }
2727
2728
2729
2730 /*! @} */
2731
2732 #include "ControlPoints.def.h"
2733
2734 #endif