Merge branch 'charm' of charmgit:charm into charm
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15 #include <math.h>
16
17
18 #include <climits>
19 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
20  
21
22 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
23 /**  @{ */
24
25 using namespace std;
26
27 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
28
29
30 //#undef DEBUGPRINT
31 //#define DEBUGPRINT 4
32
33 static void periodicProcessControlPoints(void* ptr, double currWallTime);
34
35
36 // A pointer to this PE's controlpoint manager Proxy
37 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
38 /* readonly */ int random_seed;
39 /* readonly */ long controlPointSamplePeriod;
40 /* readonly */ int whichTuningScheme;
41 /* readonly */ bool writeDataFileAtShutdown;
42 /* readonly */ bool shouldFilterOutputData;
43 /* readonly */ bool loadDataFileAtStartup;
44 /* readonly */ bool shouldGatherMemoryUsage;
45 /* readonly */ bool shouldGatherUtilization;
46 /* readonly */ bool shouldGatherAll;
47 /* readonly */ char CPDataFilename[512];
48
49
50
51 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
52 /// These probably come from the command line arguments, so are available only on PE 0
53 std::map<std::string, int> defaultControlPointValues;
54
55
56
57 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
58
59
60
61 void printTuningScheme(){
62   switch(whichTuningScheme){
63   case RandomSelection:
64     CkPrintf("Tuning Scheme: RandomSelection\n");
65     break;
66   case AlwaysDefaults:
67     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
68     break;
69   case SimulatedAnnealing:
70     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
71     break;
72   case ExhaustiveSearch:
73     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
74     break;
75   case CriticalPathAutoPrioritization:
76     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
77     break;
78   case UseBestKnownTiming:
79     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
80     break;
81   case UseSteering:
82     CkPrintf("Tuning Scheme: UseSteering\n");
83     break;
84   case MemoryAware:
85     CkPrintf("Tuning Scheme: MemoryAware\n");
86     break;
87   case Simplex:
88     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
89     break;
90   case DivideAndConquer:
91     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
92     break;
93   case LDBPeriod:
94     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
95     break;
96   case LDBPeriodLinear:
97     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
98     break;
99   case LDBPeriodQuadratic:
100     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
101     break;
102   case LDBPeriodOptimal:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
104     break;
105   default:
106     CkPrintf("Unknown tuning scheme\n");
107     break;
108   }
109   fflush(stdout);
110 }
111
112
113
114 /// A reduction type that combines idle time measurements (min/sum/max etc.)
115 CkReduction::reducerType idleTimeReductionType;
116 /// A reducer that combines idle time measurements (min/sum/max etc.)
117 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
118   double ret[3];
119   if(nMsg > 0){
120     CkAssert(msgs[0]->getSize()==3*sizeof(double));
121     double *m=(double *)msgs[0]->getData();
122     ret[0]=m[0];
123     ret[1]=m[1];
124     ret[2]=m[2];
125   }
126   for (int i=1;i<nMsg;i++) {
127     CkAssert(msgs[i]->getSize()==3*sizeof(double));
128     double *m=(double *)msgs[i]->getData();
129     ret[0]=min(ret[0],m[0]);
130     ret[1]+=m[1];
131     ret[2]=max(ret[2],m[2]);
132   }  
133   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
134 }
135
136
137
138 /// A reduction type that combines idle, overhead, and memory measurements
139 CkReduction::reducerType allMeasuresReductionType;
140 /// A reducer that combines idle, overhead, and memory measurements
141 #define ALL_REDUCTION_SIZE 12
142 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
143   double ret[ALL_REDUCTION_SIZE];
144   if(nMsg > 0){
145     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
146     double *m=(double *)msgs[0]->getData();
147     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
148   }
149   for (int i=1;i<nMsg;i++) {
150     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
151     double *m=(double *)msgs[i]->getData();
152     // idle time (min/sum/max)
153     ret[0]=min(ret[0],m[0]);
154     ret[1]+=m[1];
155     ret[2]=max(ret[2],m[2]);
156     // overhead time (min/sum/max)
157     ret[3]=min(ret[3],m[3]);
158     ret[4]+=m[4];
159     ret[5]=max(ret[5],m[5]);
160     // mem usage (max)
161     ret[6]=max(ret[6],m[6]);
162     // bytes per invocation for two types of entry methods
163     ret[7]+=m[7];
164     ret[8]+=m[8];
165     ret[9]+=m[9];
166     ret[10]+=m[10];
167     // Grain size (avg)
168     ret[11]+=m[11];
169   }  
170   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
171 }
172
173
174 /// Registers the control point framework's reduction handlers at startup on each PE
175 /*initproc*/ void registerCPReductions(void) {
176   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
177   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
178 }
179
180
181
182
183
184
185 /// Return an integer between 0 and num-1 inclusive
186 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
187 unsigned int randInt(unsigned int num, const char* name, int seed=0){
188   CkAssert(num > 0);
189
190   unsigned long hash = 0;
191   unsigned int c;
192   unsigned char * str = (unsigned char*)name;
193
194   while (c = *str++){
195     unsigned int c2 = (c+64)%128;
196     unsigned int c3 = (c2*5953)%127;
197     hash = c3 + (hash << 6) + (hash << 16) - hash;
198   }
199
200   unsigned long shuffled1 = (hash*2083)%7907;
201   unsigned long shuffled2 = (seed*4297)%2017;
202
203   unsigned long shuffled3 = (random_seed*4799)%7919;
204
205   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
206
207   unsigned int result = ((namehash * 6029) % 1117) % num;
208
209   CkAssert(result >=0 && result < num);
210   return result;
211 }
212
213
214
215 controlPointManager::controlPointManager() {
216   generatedPlanForStep = -1;
217
218     exitWhenReady = false;
219     alreadyRequestedMemoryUsage = false;   
220     alreadyRequestedIdleTime = false;
221     alreadyRequestedAll = false;
222     
223     instrumentedPhase * newPhase = new instrumentedPhase();
224     allData.phases.push_back(newPhase);   
225     
226     frameworkShouldAdvancePhase = false;
227     haveControlPointChangeCallback = false;
228 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
229     
230     ControlPoint::initControlPointEffects();
231
232     phase_id = 0;
233
234     if(loadDataFileAtStartup){    
235       loadDataFile();
236     }
237
238     
239     if(CkMyPe() == 0){
240       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
241     }
242
243     traceRegisterUserEvent("No currently executing message", 5000);
244     traceRegisterUserEvent("Zero time along critical path", 5010);
245     traceRegisterUserEvent("Positive total time along critical path", 5020);
246     traceRegisterUserEvent("env->setPathHistory()", 6000);
247     traceRegisterUserEvent("Critical Path", 5900);
248     traceRegisterUserEvent("Table Entry", 5901);
249
250
251 #if USER_EVENT_FOR_REGISTERTERMINALPATH
252     traceRegisterUserEvent("registerTerminalPath", 100); 
253 #endif
254
255   }
256   
257
258  controlPointManager::~controlPointManager(){
259 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
260   }
261
262
263   /// Loads the previous run data file
264   void controlPointManager::loadDataFile(){
265     ifstream infile(CPDataFilename);
266     vector<std::string> names;
267     std::string line;
268   
269     while(getline(infile,line)){
270       if(line[0] != '#')
271         break;
272     }
273   
274     int numTimings = 0;
275     std::istringstream n(line);
276     n >> numTimings;
277   
278     while(getline(infile,line)){ 
279       if(line[0] != '#') 
280         break; 
281     }
282
283     int numControlPointNames = 0;
284     std::istringstream n2(line);
285     n2 >> numControlPointNames;
286   
287     for(int i=0; i<numControlPointNames; i++){
288       getline(infile,line);
289       names.push_back(line);
290     }
291
292     for(int i=0;i<numTimings;i++){
293       getline(infile,line);
294       while(line[0] == '#')
295         getline(infile,line); 
296
297       instrumentedPhase * ips = new instrumentedPhase();
298
299       std::istringstream iss(line);
300
301       // Read memory usage for phase
302       iss >> ips->memoryUsageMB;
303       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
304
305       // Read idle time
306       iss >> ips->idleTime.min;
307       iss >> ips->idleTime.avg;
308       iss >> ips->idleTime.max;
309
310       // Read overhead time
311       iss >> ips->overheadTime.min;
312       iss >> ips->overheadTime.avg;
313       iss >> ips->overheadTime.max;
314
315       // read bytePerInvoke
316       iss >> ips->bytesPerInvoke;
317
318       // read grain size
319       iss >> ips->grainSize;
320
321       // Read control point values
322       for(int cp=0;cp<numControlPointNames;cp++){
323         int cpvalue;
324         iss >> cpvalue;
325         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
326       }
327
328       // ignore median time
329       double mt;
330       iss >> mt;
331
332       // Read all times
333
334       double time;
335
336       while(iss >> time){
337         ips->times.push_back(time);
338 #if DEBUGPRINT > 5
339         CkPrintf("read time %lf from file\n", time);
340 #endif
341       }
342
343       allData.phases.push_back(ips);
344
345     }
346
347     infile.close();
348   }
349
350
351
352   /// Add the current data to allData and output it to a file
353   void controlPointManager::writeDataFile(){
354     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
355     ofstream outfile(CPDataFilename);
356     allData.cleanupNames();
357
358 //    string s = allData.toString();
359 //    CkPrintf("At end: \n %s\n", s.c_str());
360
361     if(shouldFilterOutputData){
362       allData.verify();
363       allData.filterOutIncompletePhases();
364     }
365
366 //    string s2 = allData.toString();
367 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
368     if(allData.toString().length() > 10){
369       outfile << allData.toString();
370     } else {
371       outfile << " No data available to save to disk " << endl;
372     }
373     outfile.close();
374   }
375
376   /// User can register a callback that is called when application should advance to next phase
377   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
378     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
379     controlPointChangeCallback = cb;
380     haveControlPointChangeCallback = true;
381   }
382
383
384 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
385 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
386   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
387 }
388
389   /// Called periodically by the runtime to handle the control points
390   /// Currently called on each PE
391   void controlPointManager::processControlPoints(){
392
393     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
394
395
396     //==========================================================================================
397     // Print the data for each phase
398
399     const int s = allData.phases.size();
400
401 #if DEBUGPRINT
402     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
403     for(int p=0;p<s;++p){
404       const instrumentedPhase &phase = allData.phases[p];
405       const idleTimeContainer &idle = phase.idleTime;
406       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
407       vector<double> const &times = phase.times;
408
409       CkPrintf("Phase %d:\n", p);
410       idle.print();
411      //   CkPrintf("critical paths: (* affected by control point)\n");
412         //   for(int i=0;i<criticalPaths.size(); i++){
413         // If affected by a control point
414         //      criticalPaths[i].print();
415
416       //        criticalPaths[i].print();
417       //        CkPrintf("\n");
418         
419
420       //   }
421       CkPrintf("Timings:\n");
422       for(int i=0;i<times.size(); i++){
423         CkPrintf("%lf ", times[i]);
424       }
425       CkPrintf("\n");
426
427     }
428     
429     CkPrintf("\n\n");
430
431
432 #endif
433
434
435
436     //==========================================================================================
437     // If this is a phase during which we try to adapt control point values based on critical path
438 #if 0
439     if( s%5 == 4) {
440
441       // Find the most recent phase with valid critical path data and idle time measurements      
442       int whichPhase=-1;
443       for(int p=0; p<s; ++p){
444         const instrumentedPhase &phase = allData.phases[p];
445         const idleTimeContainer &idle = phase.idleTime;
446         if(idle.isValid() && phase.criticalPaths.size()>0 ){
447           whichPhase = p;
448         }
449       }
450       
451       
452       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
453       const instrumentedPhase &phase = allData.phases[whichPhase];
454       const idleTimeContainer &idle = phase.idleTime;
455       
456       if(idle.min > 0.1){
457         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
458         
459         // Determine the median critical path for this phase
460         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
461         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
462         CkAssert(phase.criticalPaths.size() > 0);
463         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
464         
465         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
466         
467         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
468           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
469
470
471           CkPrintf("Finding control points related to the critical path\n");
472           int cpcount = 0;
473           std::set<std::string> controlPointsAffectingCriticalPath;
474
475           
476           for(int e=0;e<path.getNumUsed();e++){
477             if(path.getUsedCount(e)>0){
478               int ep = path.getUsedEp(e);
479
480               std::map<std::string, std::set<int> >::iterator iter;
481               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
482                 if(iter->second.count(ep)>0){
483                   controlPointsAffectingCriticalPath.insert(iter->first);
484                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
485                   cpcount++;
486                 }
487               }
488               
489             }
490           }
491           
492
493           if(cpcount==0){
494             CkPrintf("No control points are known to affect the critical path\n");
495           } else {
496             double beginTime = CmiWallTimer();
497
498             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
499             
500             newControlPoints = phase.controlPoints;
501             
502             if(frameworkShouldAdvancePhase){
503               gotoNextPhase();  
504             }
505             
506             if(haveControlPointChangeCallback){ 
507 #if DEBUGPRINT
508               CkPrintf("Calling control point change callback\n");
509 #endif
510               // Create a high priority message and send it to the callback
511               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
512               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
513               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
514               controlPointChangeCallback.send(msg);
515               
516             }
517             
518             
519             // adjust the control points that can affect the critical path
520
521             char textDescription[4096*2];
522             textDescription[0] = '\0';
523
524             std::map<std::string,int>::iterator newCP;
525             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
526               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
527                 // decrease the value (increase priority) if within range
528                 int lowerbound = controlPointSpace[newCP->first].first;
529                 if(newCP->second > lowerbound){
530                   newControlPointsAvailable = true;
531                   newCP->second --;
532                 }
533               }
534             }
535            
536             // Create a string for a projections user event
537             if(1){
538               std::map<std::string,int>::iterator newCP;
539               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
540                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
541               }
542             }
543
544             char *userEventString = new char[4096*2];
545             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
546             
547             static int userEventCounter = 20000;
548             CkPrintf("USER EVENT: %s\n", userEventString);
549             
550             traceRegisterUserEvent(userEventString, userEventCounter); 
551             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
552             userEventCounter++;
553             
554           }
555           
556         } else {
557           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
558         }
559         
560         
561       }
562       
563     } else {
564       
565       
566     }
567    
568 #endif
569
570
571
572     if(frameworkShouldAdvancePhase){
573       gotoNextPhase();  
574     }
575     
576     if(haveControlPointChangeCallback){ 
577       // Create a high priority message and send it to the callback
578       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
579       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
580       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
581       controlPointChangeCallback.send(msg);
582     }
583     
584   }
585   
586   /// Determine if any control point is known to affect an entry method
587   bool controlPointManager::controlPointAffectsThisEP(int ep){
588     std::map<std::string, std::set<int> >::iterator iter;
589     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
590       if(iter->second.count(ep)>0){
591         return true;
592       }
593     }
594     return false;    
595   }
596   
597   /// Determine if any control point is known to affect a chare array  
598   bool controlPointManager::controlPointAffectsThisArray(int array){
599     std::map<std::string, std::set<int> >::iterator iter;
600     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
601       if(iter->second.count(array)>0){
602         return true;
603       }
604     }
605     return false;   
606   }
607   
608
609   /// The data from the current phase
610   instrumentedPhase * controlPointManager::currentPhaseData(){
611     int s = allData.phases.size();
612     CkAssert(s>=1);
613     return allData.phases[s-1];
614   }
615  
616
617   /// The data from the previous phase
618   instrumentedPhase * controlPointManager::previousPhaseData(){
619     int s = allData.phases.size();
620     if(s >= 2 && phase_id > 0) {
621       return allData.phases[s-2];
622     } else {
623       return NULL;
624     }
625   }
626  
627   /// The data from two phases back
628   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
629     int s = allData.phases.size();
630     if(s >= 3 && phase_id > 0) {
631       return allData.phases[s-3];
632     } else {
633       return NULL;
634     }
635   }
636   
637
638   /// Called by either the application or the Control Point Framework to advance to the next phase  
639   void controlPointManager::gotoNextPhase(){
640     
641 #ifndef CP_DISABLE_TRACING
642     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
643     fflush(stdout);
644
645     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
646       alreadyRequestedAll = true;
647       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
648       CkPrintf("Requesting all measurements\n");
649       thisProxy.requestAll(*cb);
650       delete cb;
651     
652     } else {
653       
654       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
655         alreadyRequestedMemoryUsage = true;
656         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
657         thisProxy.requestMemoryUsage(*cb);
658         delete cb;
659       }
660       
661       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
662         alreadyRequestedIdleTime = true;
663         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
664         thisProxy.requestIdleTime(*cb);
665         delete cb;
666       }
667     }
668     
669
670 #endif
671
672
673
674 #if CMK_LBDB_ON && 0
675     LBDatabase * myLBdatabase = LBDatabaseObj();
676     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
677     const CkVec<LBObj*> objs = myLBDB->getObjs();
678     const int objCount = myLBDB->getObjCount();
679     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
680     
681     floatType maxObjWallTime = -1.0;
682     
683     for(int i=0;i<objs.length();i++){
684       LBObj* o = objs[i];
685       const LDObjData d = o->ObjData();
686       floatType cpuTime = d.cpuTime;
687       floatType wallTime = d.wallTime;
688       // can also get object handles from the LDObjData struct
689       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
690       if(wallTime > maxObjWallTime){
691
692       }
693       
694     }
695
696     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
697     
698 #endif    
699
700
701     
702     // increment phase id
703     phase_id++;
704     
705
706     // Create new entry for the phase we are starting now
707     instrumentedPhase * newPhase = new instrumentedPhase();
708     allData.phases.push_back(newPhase);
709     
710     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
711
712   }
713
714   /// An application uses this to register an instrumented timing for this phase
715   void controlPointManager::setTiming(double time){
716     currentPhaseData()->times.push_back(time);
717
718 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
719        
720     // First we should register this currently executing message as a path, because it is likely an important one to consider.
721     //    registerTerminalEntryMethod();
722     
723     // save the critical path for this phase
724     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
725     //    maxTerminalPathHistory.reset();
726     
727     
728     // Reset the counts for the currently executing message
729     //resetThisEntryPath();
730         
731 #endif
732     
733   }
734   
735   /// Entry method called on all PEs to request CPU utilization statistics
736   void controlPointManager::requestIdleTime(CkCallback cb){
737 #ifndef CP_DISABLE_TRACING
738     double i = localControlPointTracingInstance()->idleRatio();
739     double idle[3];
740     idle[0] = i;
741     idle[1] = i;
742     idle[2] = i;
743     
744     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
745     
746     localControlPointTracingInstance()->resetTimings();
747
748     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
749 #else
750     CkAbort("Should not get here\n");
751 #endif
752   }
753   
754   /// All processors reduce their memory usages in requestIdleTime() to this method
755   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
756 #ifndef CP_DISABLE_TRACING
757     int size=msg->getSize() / sizeof(double);
758     CkAssert(size==3);
759     double *r=(double *) msg->getData();
760         
761     instrumentedPhase* prevPhase = previousPhaseData();
762     if(prevPhase != NULL){
763       prevPhase->idleTime.min = r[0];
764       prevPhase->idleTime.avg = r[1]/CkNumPes();
765       prevPhase->idleTime.max = r[2];
766       prevPhase->idleTime.print();
767       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
768     } else {
769       CkPrintf("There is no previous phase to store the idle time measurements\n");
770     }
771     
772     alreadyRequestedIdleTime = false;
773     checkForShutdown();
774     delete msg;
775 #else
776     CkAbort("Should not get here\n");
777 #endif
778   }
779
780
781
782
783
784
785   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
786   void controlPointManager::requestAll(CkCallback cb){
787 #ifndef CP_DISABLE_TRACING
788     TraceControlPoints *t = localControlPointTracingInstance();
789
790     double data[ALL_REDUCTION_SIZE];
791
792     double *idle = data;
793     double *over = data+3;
794     double *mem = data+6;
795     double *msgBytes = data+7;  
796     double *grainsize = data+11;  
797
798     const double i = t->idleRatio();
799     idle[0] = i;
800     idle[1] = i;
801     idle[2] = i;
802
803     const double o = t->overheadRatio();
804     over[0] = o;
805     over[1] = o;
806     over[2] = o;
807
808     const double m = t->memoryUsageMB();
809     mem[0] = m;
810
811     msgBytes[0] = t->b2;
812     msgBytes[1] = t->b3;
813     msgBytes[2] = t->b2mlen;
814     msgBytes[3] = t->b3mlen;
815
816     grainsize[0] = t->grainSize();
817     
818     localControlPointTracingInstance()->resetAll();
819
820     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
821 #else
822     CkAbort("Should not get here\n");
823 #endif
824   }
825   
826   /// All processors reduce their memory usages in requestIdleTime() to this method
827   void controlPointManager::gatherAll(CkReductionMsg *msg){
828 #ifndef CP_DISABLE_TRACING
829     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
830     int size=msg->getSize() / sizeof(double);
831     double *data=(double *) msg->getData();
832         
833     double *idle = data;
834     double *over = data+3;
835     double *mem = data+6;
836     double *msgBytes = data+7;
837     double *granularity = data+11;
838
839
840     //    std::string b = allData.toString();
841
842     instrumentedPhase* prevPhase = previousPhaseData();
843     if(prevPhase != NULL){
844       prevPhase->idleTime.min = idle[0];
845       prevPhase->idleTime.avg = idle[1]/CkNumPes();
846       prevPhase->idleTime.max = idle[2];
847       
848       prevPhase->overheadTime.min = over[0];
849       prevPhase->overheadTime.avg = over[1]/CkNumPes();
850       prevPhase->overheadTime.max = over[2];
851       
852       prevPhase->memoryUsageMB = mem[0];
853       
854       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
855
856       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
857       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
858
859       /** The average of the grain sizes on all PEs in us */
860       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
861
862       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
863       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
864
865       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
866       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
867
868       if(bytesPerInvoke2 > bytesPerInvoke3)
869         prevPhase->bytesPerInvoke = bytesPerInvoke2;
870       else
871         prevPhase->bytesPerInvoke = bytesPerInvoke3;
872
873       //      prevPhase->print();
874       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
875
876       //      std::string a = allData.toString();
877
878       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
879       
880     } else {
881       CkPrintf("There is no previous phase to store measurements\n");
882     }
883     
884     alreadyRequestedAll = false;
885     checkForShutdown();
886     delete msg;
887 #else
888     CkAbort("Should not get here\n");
889 #endif
890   }
891
892
893
894
895   void controlPointManager::checkForShutdown(){
896     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
897       doExitNow();
898     }
899   }
900
901
902   void controlPointManager::exitIfReady(){
903      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
904        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
905        doExitNow();
906      } else {
907        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
908        exitWhenReady = true;
909      }
910   }
911
912
913
914   void controlPointManager::doExitNow(){
915           writeOutputToDisk();
916           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
917           CkExit();
918   }
919
920   void controlPointManager::writeOutputToDisk(){
921           if(writeDataFileAtShutdown){
922                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
923           }
924   }
925
926
927   /// Entry method called on all PEs to request memory usage
928   void controlPointManager::requestMemoryUsage(CkCallback cb){
929     int m = CmiMaxMemoryUsage() / 1024 / 1024;
930     CmiResetMaxMemory();
931     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
932     contribute(sizeof(int),&m,CkReduction::max_int, cb);
933   }
934
935   /// All processors reduce their memory usages to this method
936   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
937     int size=msg->getSize() / sizeof(int);
938     CkAssert(size==1);
939     int *m=(int *) msg->getData();
940
941     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
942     
943     instrumentedPhase* prevPhase = previousPhaseData();
944     if(prevPhase != NULL){
945       prevPhase->memoryUsageMB = m[0];
946     } else {
947       CkPrintf("No place to store memory usage");
948     }
949
950     alreadyRequestedMemoryUsage = false;
951     checkForShutdown();
952     delete msg;
953   }
954
955
956 //   /// Inform the control point framework that a named control point affects the priorities of some array  
957 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
958 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
959     
960 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
961 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
962 //     } else {
963 //       std::set<int> s;
964 //       s.insert(groupIdx);
965 //       affectsPrioritiesArray[std::string(name)] = s;
966 //     }
967     
968 // #if DEBUGPRINT   
969 //     std::map<std::string, std::set<int> >::iterator f;
970 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
971 //       std::string name = f->first;
972 //       std::set<int> &vals = f->second;
973 //       cout << "Control point " << name << " affects arrays: ";
974 //       std::set<int>::iterator i;
975 //       for(i=vals.begin(); i!=vals.end();++i){
976 //      cout << *i << " ";
977 //       }
978 //       cout << endl;
979 //     }
980 // #endif
981     
982 //   }
983   
984 //   /// Inform the control point framework that a named control point affects the priority of some entry method
985 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
986 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
987
988 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
989 //       affectsPrioritiesEP[std::string(name)].insert(idx);
990 //     } else {
991 //       std::set<int> s;
992 //       s.insert(idx);
993 //       affectsPrioritiesEP[std::string(name)] = s;
994 //     }
995     
996 // #if DEBUGPRINT
997 //     std::map<std::string, std::set<int> >::iterator f;
998 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
999 //       std::string name = f->first;
1000 //       std::set<int> &vals = f->second;
1001 //       cout << "Control point " << name << " affects EP: ";
1002 //       std::set<int>::iterator i;
1003 //       for(i=vals.begin(); i!=vals.end();++i){
1004 //      cout << *i << " ";
1005 //       }
1006 //       cout << endl;
1007 //     }
1008 // #endif
1009
1010
1011 //   }
1012   
1013
1014
1015 /// An interface callable by the application.
1016 void gotoNextPhase(){
1017   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1018 }
1019
1020 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1021 {
1022   gotoNextPhase();
1023 }
1024
1025
1026 /// A mainchare that is used just to create our controlPointManager group at startup
1027 class controlPointMain : public CBase_controlPointMain {
1028 public:
1029   controlPointMain(CkArgMsg* args){
1030 #if OLDRANDSEED
1031     struct timeval tp;
1032     gettimeofday(& tp, NULL);
1033     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1034 #else
1035     double time = CmiWallTimer();
1036     int sec = (int)time;
1037     int usec = (int)((time - (double)sec)*1000000.0);
1038     random_seed =  sec ^ usec;
1039 #endif
1040     
1041     
1042     double period;
1043     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1044     if(haveSamplePeriod){
1045       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1046       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1047     } else {
1048       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1049     }
1050     
1051     
1052     
1053     whichTuningScheme = RandomSelection;
1054
1055
1056     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1057       whichTuningScheme = RandomSelection;
1058     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1059       whichTuningScheme = ExhaustiveSearch;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1061       whichTuningScheme = AlwaysDefaults;
1062     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1063       whichTuningScheme = SimulatedAnnealing;
1064     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1065       whichTuningScheme = CriticalPathAutoPrioritization;
1066     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1067       whichTuningScheme = UseBestKnownTiming;
1068     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1069       whichTuningScheme = UseSteering;
1070     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1071       whichTuningScheme = MemoryAware;
1072     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1073       whichTuningScheme = Simplex;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1075       whichTuningScheme = DivideAndConquer;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1077       whichTuningScheme = LDBPeriod;
1078     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1079       whichTuningScheme = LDBPeriodLinear;
1080     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1081       whichTuningScheme = LDBPeriodQuadratic;
1082     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1083       whichTuningScheme = LDBPeriodOptimal;
1084     }
1085
1086     char *defValStr = NULL;
1087     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1088       CkPrintf("You specified default value string: %s\n", defValStr);
1089       
1090       // Parse the string, looking for commas
1091      
1092
1093       char *tok = strtok(defValStr, ",");
1094       while (tok) {
1095         // split on the equal sign
1096         int len = strlen(tok);
1097         char *eqsign = strchr(tok, '=');
1098         if(eqsign != NULL && eqsign != tok){
1099           *eqsign = '\0';
1100           char *cpname = tok;
1101           std::string cpName(tok);
1102           char *cpDefVal = eqsign+1;      
1103           int v=-1;
1104           if(sscanf(cpDefVal, "%d", &v) == 1){
1105             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1106             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1107             defaultControlPointValues[cpName] = v;
1108           }
1109         }
1110         tok = strtok(NULL, ",");
1111       }
1112
1113     }
1114
1115     shouldGatherAll = false;
1116     shouldGatherMemoryUsage = false;
1117     shouldGatherUtilization = false;
1118     
1119     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1120       shouldGatherAll = true;
1121     } else {
1122       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1123         shouldGatherMemoryUsage = true;
1124       }
1125       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1126         shouldGatherUtilization = true;
1127       }
1128     }
1129     
1130     writeDataFileAtShutdown = false;   
1131     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1132       writeDataFileAtShutdown = true;
1133     }
1134
1135     shouldFilterOutputData = true;
1136     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1137       shouldFilterOutputData = false;
1138     }
1139
1140
1141    loadDataFileAtStartup = false;   
1142     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1143       loadDataFileAtStartup = true;
1144     }
1145
1146     char *cpdatafile;
1147     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1148       sprintf(CPDataFilename, "%s", cpdatafile);
1149     } else {
1150       sprintf(CPDataFilename, "controlPointData.txt");
1151     }
1152
1153
1154     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1155   }
1156   ~controlPointMain(){}
1157 };
1158
1159 /// An interface callable by the application.
1160 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1161   CkAssert(CkMyPe() == 0);
1162   CkPrintf("Application has registered a control point change callback\n");
1163   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1164 }
1165
1166 /// An interface callable by the application.
1167 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1168   if(CkMyPe() == 0){
1169     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1170     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1171   }
1172 }
1173
1174 /// An interface callable by the application.
1175 void registerControlPointTiming(double time){
1176   CkAssert(CkMyPe() == 0);
1177 #if DEBUGPRINT>0
1178   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1179 #endif
1180   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1181 }
1182
1183 /// An interface callable by the application.
1184 void controlPointTimingStamp() {
1185   CkAssert(CkMyPe() == 0);
1186 #if DEBUGPRINT>0
1187   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1188 #endif
1189   
1190   static double prev_time = 0.0;
1191   double t = CmiWallTimer();
1192   double duration = t - prev_time;
1193   prev_time = t;
1194     
1195   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1196 }
1197
1198 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1199 {
1200   controlPointTimingStamp();
1201 }
1202
1203
1204 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value) 
1205 {
1206   setFrameworkAdvancePhase(*value);
1207 }
1208
1209
1210
1211
1212 /// Shutdown the control point framework, writing data to disk if necessary
1213 extern "C" void controlPointShutdown(){
1214   if(CkMyPe() == 0){
1215
1216     // wait for gathering of idle time & memory usage to complete
1217     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1218
1219   }
1220 }
1221
1222 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1223 void controlPointInitNode(){
1224 //  CkPrintf("controlPointInitNode()\n");
1225   registerExitFn(controlPointShutdown);
1226 }
1227
1228 /// Called periodically to allow control point framework to do things periodically
1229 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1230 #ifdef DEBUGPRINT
1231   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1232 #endif
1233   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1234   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1235 }
1236
1237
1238
1239
1240
1241 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1242 /// user observed characteristic to adapt specific control point values.
1243 /// This function must return valid values for newControlPoints.
1244 void controlPointManager::generatePlan() {
1245   const double startGenerateTime = CmiWallTimer();
1246   const int phase_id = this->phase_id;
1247   const int effective_phase = allData.phases.size();
1248
1249   // Only generate a plan once per phase
1250   if(generatedPlanForStep == phase_id) 
1251     return;
1252   generatedPlanForStep = phase_id;
1253  
1254   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1255   printTuningScheme();
1256
1257   // By default lets put the previous phase data into newControlPoints
1258   instrumentedPhase *prevPhase = previousPhaseData();
1259   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1260           newControlPoints[cpsIter->first] = cpsIter->second;
1261   }
1262
1263
1264   if( whichTuningScheme == RandomSelection ){
1265     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1266     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1267       const std::string &name = cpsIter->first;
1268       const std::pair<int,int> &bounds = cpsIter->second;
1269       const int lb = bounds.first;
1270       const int ub = bounds.second;
1271       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1272     }
1273   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1274     // -----------------------------------------------------------
1275     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1276     //   
1277     // This scheme will return the median value for the range 
1278     // early on, and then will transition over to the new control points
1279     // determined by the critical path adapting code
1280
1281     // This won't work until the periodic function is fixed up a bit
1282
1283     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1284     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1285       const std::string &name = cpsIter->first;
1286       const std::pair<int,int> &bounds = cpsIter->second;
1287       const int lb = bounds.first;
1288       const int ub = bounds.second;
1289       newControlPoints[name] =  (lb+ub)/2;
1290     }
1291
1292   } else if ( whichTuningScheme == MemoryAware ) {
1293
1294     // -----------------------------------------------------------
1295     //  STEERING BASED ON MEMORY USAGE
1296
1297     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1298     instrumentedPhase *prevPhase = previousPhaseData();
1299  
1300     if(phase_id%2 == 0){
1301       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1302       twoAgoPhase->print();
1303       CkPrintf("\n");
1304       fflush(stdout);
1305       
1306       // See if memory usage is low:
1307       double memUsage = twoAgoPhase->memoryUsageMB;
1308       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1309       fflush(stdout);
1310       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1311         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1312         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1313         
1314         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1315         newControlPoints = twoAgoPhase->controlPoints;
1316
1317
1318         CkPrintf("Steering (memory based) initialized plan\n");
1319         fflush(stdout);
1320
1321         // look for a possible control point knob to turn
1322         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1323         
1324         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1325         bool found = false;
1326         std::string cpName;
1327         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1328         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1329         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1330           cpName = iter->first;
1331           info = &iter->second;
1332           found = true;
1333           break;
1334         }
1335
1336         // Adapt the control point value
1337         if(found){
1338           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1339           fflush(stdout);
1340           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1341           const int maxValue = controlPointSpace[cpName].second;
1342           
1343           if(twoAgoValue+1 <= maxValue){
1344             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1345           }
1346         }
1347         
1348       }
1349     }
1350
1351     CkPrintf("Steering (memory based) done for this phase\n");
1352     fflush(stdout);
1353
1354   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1355
1356     // -----------------------------------------------------------
1357     //  USE BEST KNOWN TIME  
1358     static bool firstTime = true;
1359     if(firstTime){
1360       firstTime = false;
1361       instrumentedPhase *best = allData.findBest(); 
1362       CkPrintf("Best known phase is:\n");
1363       best->print();
1364       CkPrintf("\n");
1365       
1366       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1367       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1368         const std::string &name = cpsIter->first;
1369         newControlPoints[name] =  best->controlPoints[name];
1370       }
1371     }
1372   } else if( whichTuningScheme == LDBPeriod) {
1373     // Assume this is used in this manner:
1374     //  1) go to next phase
1375     //  2) request control point
1376     //  3) load balancing
1377     //  4) computation
1378     
1379     
1380     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1381     instrumentedPhase *prevPhase = previousPhaseData();
1382     
1383     
1384     const std::vector<double> &times = twoAgoPhase->times;
1385     const int oldNumTimings = times.size();
1386
1387
1388     const std::vector<double> &timesNew = prevPhase->times;
1389     const int newNumTimings = timesNew.size();
1390
1391
1392     if(oldNumTimings > 4 && newNumTimings > 4){
1393       
1394       // Build model of execution time based on two phases ago
1395       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1396       
1397       double oldSum = 0;
1398       
1399       for(int i=2; i<oldNumTimings; i++){
1400         oldSum += times[i];
1401       }
1402       
1403       const double oldAvg = oldSum / (oldNumTimings-2);
1404       
1405       
1406       
1407       
1408       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1409       const double expectedTotalTime = oldAvg * newNumTimings;
1410       
1411       
1412       // Measure actual time
1413       double newSum = 0.0;
1414       for(int i=2; i<newNumTimings; ++i){
1415         newSum += timesNew[i];
1416       }
1417       
1418       const double newAvg = newSum / (newNumTimings-2);
1419       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1420       
1421       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1422       
1423       // Determine load balance cost
1424       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1425       
1426       const double benefitAfterLB = benefit - lbcost;
1427     
1428     
1429       // Determine whether LB cost outweights the estimated benefit
1430       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1431     
1432     
1433       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1434       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1435         const std::string &name = cpsIter->first;
1436         const std::pair<int,int> &bounds = cpsIter->second;
1437         const int lb = bounds.first;
1438         const int ub = bounds.second;
1439       
1440         if(benefitAfterLB > 0){
1441           CkPrintf("Constant Model: Beneficial LB\n");
1442           int newval = newControlPoints[name] / 2;
1443           if(newval > lb)
1444             newControlPoints[name] = newval;
1445           else 
1446             newControlPoints[name] = lb;
1447         } else {
1448           CkPrintf("Constant Model: Detrimental LB\n");
1449           int newval = newControlPoints[name] * 2;
1450           if(newval < ub)
1451             newControlPoints[name] = newval;
1452           else
1453             newControlPoints[name] = ub;
1454         }
1455       }
1456     }
1457     
1458     
1459   }  else if( whichTuningScheme == LDBPeriodLinear) {
1460     // Assume this is used in this manner:
1461     //  1) go to next phase
1462     //  2) request control point
1463     //  3) load balancing
1464     //  4) computation
1465
1466
1467     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1468     instrumentedPhase *prevPhase = previousPhaseData();
1469     
1470     const std::vector<double> &times = twoAgoPhase->times;
1471     const int oldNumTimings = times.size();
1472
1473     const std::vector<double> &timesNew = prevPhase->times;
1474     const int newNumTimings = timesNew.size();
1475     
1476
1477     if(oldNumTimings > 4 && newNumTimings > 4){
1478
1479       // Build model of execution time based on two phases ago
1480       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1481       const int b1 = 2 + (oldNumTimings-2)/2;
1482       double s1 = 0;
1483       double s2 = 0;
1484     
1485       const double ldbStepsTime = times[0] + times[1];
1486     
1487       for(int i=2; i<b1; i++){
1488         s1 += times[i];
1489       }
1490       for(int i=b1; i<oldNumTimings; i++){
1491         s2 += times[i];
1492       }
1493       
1494       
1495       // Compute the estimated time for the last phase's data
1496     
1497       const double a1 = s1 / (double)(b1-2);
1498       const double a2 = s2 / (double)(oldNumTimings-b1);
1499       const double aold = (a1+a2)/2.0;
1500
1501       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1502         
1503     
1504       // Measure actual time
1505       double sum = 0.0;
1506       for(int i=2; i<newNumTimings; ++i){
1507         sum += timesNew[i];
1508       }
1509
1510       const double avg = sum / ((double)(newNumTimings-2));
1511       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1512
1513       const double benefit = expectedTotalTime - actualTotalTime;
1514
1515       // Determine load balance cost
1516       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1517
1518       const double benefitAfterLB = benefit - lbcost;
1519
1520     
1521       // Determine whether LB cost outweights the estimated benefit
1522       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1523     
1524     
1525     
1526       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1527       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1528         const std::string &name = cpsIter->first;
1529         const std::pair<int,int> &bounds = cpsIter->second;
1530         const int lb = bounds.first;
1531         const int ub = bounds.second;
1532       
1533         if(benefitAfterLB > 0){
1534           CkPrintf("Linear Model: Beneficial LB\n");
1535           int newval = newControlPoints[name] / 2;
1536           if(newval > lb)
1537             newControlPoints[name] = newval;
1538           else 
1539             newControlPoints[name] = lb;
1540         } else {
1541           CkPrintf("Linear Model: Detrimental LB\n");
1542           int newval = newControlPoints[name] * 2;
1543           if(newval < ub)
1544             newControlPoints[name] = newval;
1545           else 
1546             newControlPoints[name] = ub;
1547         }
1548       }
1549     }
1550
1551   }
1552
1553   else if( whichTuningScheme == LDBPeriodQuadratic) {
1554     // Assume this is used in this manner:
1555     //  1) go to next phase
1556     //  2) request control point
1557     //  3) load balancing
1558     //  4) computation
1559
1560
1561     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1562     instrumentedPhase *prevPhase = previousPhaseData();
1563         
1564     const std::vector<double> &times = twoAgoPhase->times;
1565     const int oldNumTimings = times.size();
1566
1567     const std::vector<double> &timesNew = prevPhase->times;
1568     const int newNumTimings = timesNew.size();
1569
1570     
1571     if(oldNumTimings > 4 && newNumTimings > 4){
1572
1573
1574       // Build model of execution time based on two phases ago
1575       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1576       const int b1 = 2 + (oldNumTimings-2)/3;
1577       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1578       double s1 = 0;
1579       double s2 = 0;
1580       double s3 = 0;
1581
1582       const double ldbStepsTime = times[0] + times[1];
1583     
1584       for(int i=2; i<b1; i++){
1585         s1 += times[i];
1586       }
1587       for(int i=b1; i<b2; i++){
1588         s2 += times[i];
1589       }
1590       for(int i=b2; i<oldNumTimings; i++){
1591         s3 += times[i];
1592       }
1593
1594     
1595       // Compute the estimated time for the last phase's data
1596     
1597       const double a1 = s1 / (double)(b1-2);
1598       const double a2 = s2 / (double)(b2-b1);
1599       const double a3 = s3 / (double)(oldNumTimings-b2);
1600     
1601       const double a = (a1-2.0*a2+a3)/2.0;
1602       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1603       const double c = a3;
1604     
1605       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1606       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1607       const double x2 = 0.5;
1608       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1609    
1610     
1611       // Measure actual time
1612       double sum = 0.0;
1613       for(int i=2; i<newNumTimings; ++i){
1614         sum += timesNew[i];
1615       }
1616
1617       const double avg = sum / ((double)(newNumTimings-2));
1618       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1619
1620       const double benefit = expectedTotalTime - actualTotalTime;
1621
1622       // Determine load balance cost
1623       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1624
1625       const double benefitAfterLB = benefit - lbcost;
1626
1627     
1628       // Determine whether LB cost outweights the estimated benefit
1629       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1630     
1631     
1632     
1633       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1634       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1635         const std::string &name = cpsIter->first;
1636         const std::pair<int,int> &bounds = cpsIter->second;
1637         const int lb = bounds.first;
1638         const int ub = bounds.second;
1639       
1640         if(benefitAfterLB > 0){
1641           CkPrintf("QuadraticModel: Beneficial LB\n");
1642           int newval = newControlPoints[name] / 2;
1643           if(newval > lb)
1644             newControlPoints[name] = newval;
1645           else 
1646             newControlPoints[name] = lb;
1647         } else {
1648           CkPrintf("QuadraticModel: Detrimental LB\n");
1649           int newval = newControlPoints[name] * 2;
1650           if(newval < ub)
1651             newControlPoints[name] = newval;
1652           else 
1653             newControlPoints[name] = ub;
1654         }
1655       
1656       }
1657     }
1658     
1659
1660   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1661     // Assume this is used in this manner:
1662     //  1) go to next phase
1663     //  2) request control point
1664     //  3) load balancing
1665     //  4) computation
1666
1667
1668
1669     instrumentedPhase *prevPhase = previousPhaseData();
1670     
1671     const std::vector<double> &times = prevPhase->times;
1672     const int numTimings = times.size();
1673     
1674     if( numTimings > 4){
1675
1676       const int b1 = 2 + (numTimings-2)/2;
1677       double s1 = 0;
1678       double s2 = 0;
1679     
1680     
1681       for(int i=2; i<b1; i++){
1682         s1 += times[i];
1683       }
1684       for(int i=b1; i<numTimings; i++){
1685         s2 += times[i];
1686       }
1687       
1688     
1689       const double a1 = s1 / (double)(b1-2);
1690       const double a2 = s2 / (double)(numTimings-b1);
1691       const double avg = (a1+a1) / 2.0;
1692
1693       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1694
1695       const double ldbStepsTime = times[0] + times[1];
1696       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1697       
1698       int newval = lround(sqrt(2.0*lbcost/m));
1699       
1700
1701       CkPrintf("Optimal Model: lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1702     
1703     
1704       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1705       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1706         // TODO: lookup only control points that are relevant instead of all of them
1707         const std::string &name = cpsIter->first;
1708         const std::pair<int,int> &bounds = cpsIter->second;
1709         const int lb = bounds.first;
1710         const int ub = bounds.second;
1711         
1712         if(newval < lb){
1713           newControlPoints[name] = lb;
1714         } else if(newval > ub){
1715           newControlPoints[name] = ub;
1716         } else {
1717           newControlPoints[name] = newval;
1718         } 
1719         
1720       }
1721       
1722       
1723     }
1724     
1725  
1726
1727   } else if ( whichTuningScheme == UseSteering ) {
1728           // -----------------------------------------------------------
1729           //  STEERING BASED ON KNOWLEDGE
1730
1731           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1732           // plans are only generated after 3 phases
1733
1734           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1735           instrumentedPhase *prevPhase = previousPhaseData();
1736
1737           if(phase_id%4 == 0){
1738                   CkPrintf("Steering based on 2 phases ago:\n");
1739                   twoAgoPhase->print();
1740                   CkPrintf("\n");
1741                   fflush(stdout);
1742
1743                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1744
1745
1746                   // ========================================= Concurrency =============================================
1747                   // See if idle time is high:
1748                   {
1749                           double idleTime = twoAgoPhase->idleTime.avg;
1750                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1751                           fflush(stdout);
1752                           if(idleTime > 0.10){
1753                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1754                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1755
1756                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1757
1758                                   bool found = false;
1759                                   std::string cpName;
1760                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1761                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1762                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1763                                           cpName = iter->first;
1764                                           info = &iter->second;
1765
1766                                           // Initialize a new plan based on two phases ago
1767                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1768
1769                                           CkPrintf("Steering found knob to turn\n");
1770                                           fflush(stdout);
1771
1772                                           if(info->first == ControlPoint::EFF_INC){
1773                                                   const int maxValue = controlPointSpace[cpName].second;
1774                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1775                                                   if(twoAgoValue+1 <= maxValue){
1776                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1777                                                   }
1778                                           } else {
1779                                                   const int minValue = controlPointSpace[cpName].second;
1780                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1781                                                   if(twoAgoValue-1 >= minValue){
1782                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1783                                                   }
1784                                           }
1785
1786                                           possibleNextStepPlans.push_back(aNewPlan);
1787
1788                                   }
1789                           }
1790                   }
1791
1792                   // ========================================= Grain Size =============================================
1793                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1794                   {
1795                           double overheadTime = twoAgoPhase->overheadTime.avg;
1796                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1797                           fflush(stdout);
1798                           if(overheadTime > 0.10){
1799                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1800                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1801
1802                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1803
1804                                   bool found = false;
1805                                   std::string cpName;
1806                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1807                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1808                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1809                                           cpName = iter->first;
1810                                           info = &iter->second;
1811
1812                                           // Initialize a new plan based on two phases ago
1813                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1814
1815
1816
1817                                           CkPrintf("Steering found knob to turn\n");
1818                                           fflush(stdout);
1819
1820                                           if(info->first == ControlPoint::EFF_INC){
1821                                                   const int maxValue = controlPointSpace[cpName].second;
1822                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1823                                                   if(twoAgoValue+1 <= maxValue){
1824                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1825                                                   }
1826                                           } else {
1827                                                   const int minValue = controlPointSpace[cpName].second;
1828                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1829                                                   if(twoAgoValue-1 >= minValue){
1830                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1831                                                   }
1832                                           }
1833
1834                                           possibleNextStepPlans.push_back(aNewPlan);
1835
1836                                   }
1837
1838                           }
1839                   }
1840                   // ========================================= GPU Offload =============================================
1841                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1842                   {
1843                           double idleTime = twoAgoPhase->idleTime.avg;
1844                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1845                           fflush(stdout);
1846                           if(idleTime > 0.10){
1847                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1848                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1849
1850                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1851
1852                                   bool found = false;
1853                                   std::string cpName;
1854                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1855                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1856                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1857                                           cpName = iter->first;
1858                                           info = &iter->second;
1859
1860                                           // Initialize a new plan based on two phases ago
1861                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1862
1863
1864                                           CkPrintf("Steering found knob to turn\n");
1865                                           fflush(stdout);
1866
1867                                           if(info->first == ControlPoint::EFF_DEC){
1868                                                   const int maxValue = controlPointSpace[cpName].second;
1869                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1870                                                   if(twoAgoValue+1 <= maxValue){
1871                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1872                                                   }
1873                                           } else {
1874                                                   const int minValue = controlPointSpace[cpName].second;
1875                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1876                                                   if(twoAgoValue-1 >= minValue){
1877                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1878                                                   }
1879                                           }
1880
1881                                           possibleNextStepPlans.push_back(aNewPlan);
1882
1883                                   }
1884
1885                           }
1886                   }
1887
1888                   // ========================================= Done =============================================
1889
1890
1891                   if(possibleNextStepPlans.size() > 0){
1892                           newControlPoints = possibleNextStepPlans[0];
1893                   }
1894
1895
1896                   CkPrintf("Steering done for this phase\n");
1897                   fflush(stdout);
1898
1899           }  else {
1900                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1901                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1902                   fflush(stdout);
1903           }
1904
1905
1906
1907   } else if( whichTuningScheme == SimulatedAnnealing ) {
1908
1909     // -----------------------------------------------------------
1910     //  SIMULATED ANNEALING
1911     //  Simulated Annealing style hill climbing method
1912     //
1913     //  Find the best search space configuration, and try something
1914     //  nearby it, with a radius decreasing as phases increase
1915
1916     CkPrintf("Finding best phase\n");
1917     instrumentedPhase *bestPhase = allData.findBest();  
1918     CkPrintf("best found:\n"); 
1919     bestPhase->print(); 
1920     CkPrintf("\n"); 
1921     
1922
1923     const int convergeByPhase = 100;
1924     // Determine from 0.0 to 1.0 how far along we are
1925     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1926
1927     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1928     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1929       const std::string &name = cpsIter->first;
1930       const std::pair<int,int> &bounds = cpsIter->second;
1931       const int minValue = bounds.first;
1932       const int maxValue = bounds.second;
1933       
1934       const int before = bestPhase->controlPoints[name];   
1935   
1936       const int range = (maxValue-minValue+1)*(1.0-progress);
1937
1938       int high = min(before+range, maxValue);
1939       int low = max(before-range, minValue);
1940       
1941       newControlPoints[name] = low;
1942       if(high-low > 0){
1943         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1944       } 
1945       
1946     }
1947
1948   } else if ( whichTuningScheme == DivideAndConquer ) {
1949
1950           // -----------------------------------------------------------
1951           //  STEERING FOR Divide & Conquer Programs
1952           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1953           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1954           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1955
1956           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1957           // plans are only generated after 3 phases
1958
1959           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1960           instrumentedPhase *prevPhase = previousPhaseData();
1961
1962           if(phase_id%4 == 0){
1963                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1964                   twoAgoPhase->print();
1965                   CkPrintf("\n");
1966                   fflush(stdout);
1967
1968                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1969
1970
1971                   // ========================================= Concurrency =============================================
1972                   // See if idle time is high:
1973                   {
1974                           double idleTime = twoAgoPhase->idleTime.avg;
1975                           double overheadTime = twoAgoPhase->overheadTime.avg;
1976
1977
1978                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1979                           fflush(stdout);
1980                           if(idleTime+overheadTime > 0.10){
1981                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1982                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1983
1984                                   int direction = -1;
1985                                   if (idleTime>overheadTime){
1986                                           // We need to decrease the grain size, or increase the available concurrency
1987                                           direction = 1;
1988                                   }
1989
1990                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1991
1992                                   bool found = false;
1993                                   std::string cpName;
1994                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1995                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1996                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1997                                           cpName = iter->first;
1998                                           info = &iter->second;
1999                                           
2000                                           // Initialize a new plan based on two phases ago
2001                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2002                                           bool newPlanExists = false;
2003
2004                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2005                                           fflush(stdout);
2006
2007
2008
2009                                           if(info->first == ControlPoint::EFF_INC){
2010                                                   const int minValue = controlPointSpace[cpName].first;
2011                                                   const int maxValue = controlPointSpace[cpName].second;
2012                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2013                                                   const int newVal = twoAgoValue+1*direction;
2014                                                   if(newVal <= maxValue && newVal >= minValue){
2015                                                     aNewPlan[cpName] = newVal;
2016                                                     newPlanExists = true;
2017                                                   } else {
2018                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2019                                                   }
2020                                           } else {
2021                                                   const int minValue = controlPointSpace[cpName].first;
2022                                                   const int maxValue = controlPointSpace[cpName].second;
2023                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2024                                                   const int newVal = twoAgoValue-1*direction;
2025                                                   if(newVal <= maxValue && newVal >= minValue){
2026                                                           aNewPlan[cpName] = newVal;
2027                                                           newPlanExists = true;
2028                                                   } else {
2029                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2030                                                   }
2031                                           }
2032
2033                                           if(newPlanExists){
2034                                             possibleNextStepPlans.push_back(aNewPlan);
2035                                           }
2036                                   }
2037                           }
2038                   }
2039
2040                   if(possibleNextStepPlans.size() > 0){
2041                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2042                     newControlPoints = possibleNextStepPlans[0];
2043                   } else {
2044                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2045                   }
2046           }
2047
2048   } else if( whichTuningScheme == Simplex ) {
2049
2050           // -----------------------------------------------------------
2051           //  Nelder Mead Simplex Algorithm
2052           //
2053           //  A scheme that takes a simplex (n+1 points) and moves it
2054           //  toward the minimum, eventually converging there.
2055
2056           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2057
2058   } else if( whichTuningScheme == ExhaustiveSearch ) {
2059     
2060     // -----------------------------------------------------------
2061     // EXHAUSTIVE SEARCH
2062    
2063     int numDimensions = controlPointSpace.size();
2064     CkAssert(numDimensions > 0);
2065   
2066     vector<int> lowerBounds(numDimensions);
2067     vector<int> upperBounds(numDimensions); 
2068   
2069     int d=0;
2070     std::map<std::string, pair<int,int> >::iterator iter;
2071     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2072       //    CkPrintf("Examining dimension %d\n", d);
2073       lowerBounds[d] = iter->second.first;
2074       upperBounds[d] = iter->second.second;
2075       d++;
2076     }
2077    
2078     // get names for each dimension (control point)
2079     vector<std::string> names(numDimensions);
2080     d=0;
2081     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2082       names[d] = niter->first;
2083       d++;
2084     }
2085   
2086   
2087     // Create the first possible configuration
2088     vector<int> config = lowerBounds;
2089     config.push_back(0);
2090   
2091     // Increment until finding an unused configuration
2092     allData.cleanupNames(); // put -1 values in for any control points missing
2093     std::vector<instrumentedPhase*> &phases = allData.phases;     
2094
2095     while(true){
2096     
2097       std::vector<instrumentedPhase*>::iterator piter; 
2098       bool testedConfiguration = false; 
2099       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2100       
2101         // Test if the configuration matches this phase
2102         bool match = true;
2103         for(int j=0;j<numDimensions;j++){
2104           match &= (*piter)->controlPoints[names[j]] == config[j];
2105         }
2106       
2107         if(match){
2108           testedConfiguration = true; 
2109           break;
2110         } 
2111       } 
2112     
2113       if(testedConfiguration == false){ 
2114         break;
2115       } 
2116     
2117       // go to next configuration
2118       config[0] ++;
2119       // Propagate "carrys"
2120       for(int i=0;i<numDimensions;i++){
2121         if(config[i] > upperBounds[i]){
2122           config[i+1]++;
2123           config[i] = lowerBounds[i];
2124         }
2125       }
2126       // check if we have exhausted all possible values
2127       if(config[numDimensions] > 0){
2128         break;
2129       }
2130        
2131     }
2132   
2133     if(config[numDimensions] > 0){
2134       for(int i=0;i<numDimensions;i++){ 
2135         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2136       }
2137     }
2138
2139     // results are now in config[i]
2140     
2141     for(int i=0; i<numDimensions; i++){
2142       newControlPoints[names[i]] = config[i];
2143       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2144     }
2145
2146
2147   } else {
2148     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2149   }
2150
2151
2152   const double endGenerateTime = CmiWallTimer();
2153   
2154   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2155   
2156 }
2157
2158
2159
2160
2161 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2162
2163
2164 /// Get control point value from range of integers [lb,ub]
2165 int controlPoint(const char *name, int lb, int ub){
2166   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2167   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2168   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2169   int result;
2170
2171   // if we already have control point values for phase, return them
2172   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2173     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2174     return thisPhaseData->controlPoints[std::string(name)];
2175   }
2176   
2177
2178   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2179     // For the first few phases, just use the lower bound, or the default if one was provided 
2180     // This ensures that the ranges for all the control points are known before we do anything fancy
2181     result = lb;
2182
2183     if(defaultControlPointValues.count(std::string(name)) > 0){
2184       int v = defaultControlPointValues[std::string(name)];
2185       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2186       result = v;
2187     }
2188
2189   } else if(controlPointSpace.count(std::string(name)) == 0){
2190     // if this is the first time we've seen the CP, then return the lower bound
2191     result = lb;
2192     
2193   }  else {
2194     // Generate a plan one time for each phase
2195     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2196     
2197     // Use the planned value:
2198     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2199     
2200   }
2201
2202   if(!isInRange(result,ub,lb)){
2203     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2204     fflush(stdout);
2205     fflush(stderr);
2206   }
2207   CkAssert(isInRange(result,ub,lb));
2208   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2209
2210   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2211
2212   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2213   //  thisPhaseData->print();
2214   
2215   return result;
2216 }
2217
2218
2219 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2220   CkAssert(CkMyPe() == 0);
2221   return controlPoint("FortranCP", *lb, *ub);
2222 }
2223
2224
2225
2226
2227 /// Inform the control point framework that a named control point affects the priorities of some array  
2228 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2229 //   CkGroupID aid = arraybase.ckGetArrayID();
2230 //   int groupIdx = aid.idx;
2231 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2232 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2233 // }
2234
2235
2236 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2237 // void controlPointPriorityEntry(const char *name, int idx){
2238 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2239 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2240 // }
2241
2242
2243
2244
2245
2246 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2247
2248     This function decomposes the algorithm into a state machine that allows it to
2249     evaluate one or more configurations through subsequent clls to this function.
2250     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2251
2252     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2253     so a list of them will be created and they will be evaluated, one per call.
2254
2255     Currently there is no stopping criteria, but the simplex ought to contract down
2256     to a few close configurations, and hence not much change will happen after this 
2257     point.
2258
2259  */
2260 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2261
2262         if(useBestKnown){
2263                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2264                 return;
2265         }
2266
2267
2268         if(firstSimplexPhase< 0){
2269                 firstSimplexPhase = allData.phases.size()-1;
2270                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2271         }
2272
2273         int n = controlPointSpace.size();
2274
2275         CkAssert(n>=2);
2276
2277
2278         if(simplexState == beginning){
2279                 // First we evaluate n+1 random points, then we go to a different state
2280                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2281                         CkPrintf("Simplex Tuning: chose random configuration\n");
2282
2283                         // Choose random values from the middle third of the range in each dimension
2284                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2285                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2286                                 const std::string &name = cpsIter->first;
2287                                 const std::pair<int,int> &bounds = cpsIter->second;
2288                                 const int lb = bounds.first;
2289                                 const int ub = bounds.second;
2290                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2291                         }
2292                 } else {
2293                         // Set initial simplex:
2294                         for(int i=0; i<n+1; i++){
2295                                 simplexIndices.insert(firstSimplexPhase+i);
2296                         }
2297                         CkAssert(simplexIndices.size() == n+1);
2298
2299                         // Transition to reflecting state
2300                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2301
2302                 }
2303
2304         } else if (simplexState == reflecting){
2305                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2306                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2307
2308                 // Find the highest time from other points in the simplex
2309                 double highestTimeForOthersInSimplex = 0.0;
2310                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2311                         double t = allData.phases[*iter]->medianTime();
2312                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2313                                 highestTimeForOthersInSimplex = t;
2314                         }
2315                 }
2316
2317                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2318
2319                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2320                         // if y* < yl,  transition to "expanding"
2321                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2322
2323                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2324                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2325                         CkAssert(simplexIndices.size() == n+1);
2326                         simplexIndices.erase(worstPhase);
2327                         CkAssert(simplexIndices.size() == n);
2328                         simplexIndices.insert(pPhase);
2329                         CkAssert(simplexIndices.size() == n+1);
2330                         // Transition to reflecting state
2331                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2332
2333                 } else {
2334                         // if y* > yh
2335                         if(recentPhaseTime <= worstTime){
2336                                 // replace Ph with P*
2337                                 CkAssert(simplexIndices.size() == n+1);
2338                                 simplexIndices.erase(worstPhase);
2339                                 CkAssert(simplexIndices.size() == n);
2340                                 simplexIndices.insert(pPhase);
2341                                 CkAssert(simplexIndices.size() == n+1);
2342                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2343                                 worstPhase = pPhase;
2344                                 // Just as a sanity check, make sure we don't use the non-updated values.
2345                                 worst.clear();
2346                         }
2347
2348                         // Now, form P** and do contracting phase
2349                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2350
2351                 }
2352
2353         } else if (simplexState == doneExpanding){
2354                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2355                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2356                 // A new configuration has been evaluated
2357
2358                 // Check to see if y** < y1
2359                 if(recentPhaseTime < bestTime){
2360                         // replace Ph by P**
2361                         CkAssert(simplexIndices.size() == n+1);
2362                         simplexIndices.erase(worstPhase);
2363                         CkAssert(simplexIndices.size() == n);
2364                         simplexIndices.insert(p2Phase);
2365                         CkAssert(simplexIndices.size() == n+1);
2366                 } else {
2367                         //      replace Ph by P*
2368                         CkAssert(simplexIndices.size() == n+1);
2369                         simplexIndices.erase(worstPhase);
2370                         CkAssert(simplexIndices.size() == n);
2371                         simplexIndices.insert(pPhase);
2372                         CkAssert(simplexIndices.size() == n+1);
2373                 }
2374
2375                 // Transition to reflecting state
2376                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2377
2378         }  else if (simplexState == contracting){
2379                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2380                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2381                 // A new configuration has been evaluated
2382
2383                 // Check to see if y** > yh
2384                 if(recentPhaseTime <= worstTime){
2385                         // replace Ph by P**
2386                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2387                         CkAssert(simplexIndices.size() == n+1);
2388                         simplexIndices.erase(worstPhase);
2389                         CkAssert(simplexIndices.size() == n);
2390                         simplexIndices.insert(p2Phase);
2391                         CkAssert(simplexIndices.size() == n+1);
2392                         // Transition to reflecting state
2393                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2394
2395                 } else {
2396                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2397                         simplexState = stillContracting;
2398
2399                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2400                         stillMustContractList = simplexIndices;
2401
2402                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2403                 }
2404
2405         } else if (simplexState == stillContracting){
2406                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2407
2408                 if(stillMustContractList.size()>0){
2409                         int c = *stillMustContractList.begin();
2410                         stillMustContractList.erase(c);
2411                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2412
2413                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2414
2415                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2416                         int v = 0;
2417                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2418                                 const std::string &name = cpsIter->first;
2419                                 const std::pair<int,int> &bounds = cpsIter->second;
2420                                 const int lb = bounds.first;
2421                                 const int ub = bounds.second;
2422
2423                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2424
2425                                 newControlPoints[name] = keepInRange(val,lb,ub);
2426                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2427                                 v++;
2428                         }
2429                 } else {
2430                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2431                         CkAssert(stillMustContractList.size() == 0);
2432                         simplexIndices.clear();
2433                         CkAssert(simplexIndices.size()==0);
2434                         for(int i=0; i<n+1; i++){
2435                                 simplexIndices.insert(allData.phases.size()-2-i);
2436                         }
2437                         CkAssert(simplexIndices.size()==n+1);
2438
2439                         // Transition to reflecting state
2440                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2441
2442                 }
2443
2444
2445         } else if (simplexState == expanding){
2446                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2447                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2448                 // A new configuration has been evaluated
2449
2450                 // determine if y** < yl
2451                 if(recentPhaseTime < bestTime){
2452                         // replace Ph by P**
2453                         CkAssert(simplexIndices.size() == n+1);
2454                         simplexIndices.erase(worstPhase);
2455                         CkAssert(simplexIndices.size() == n);
2456                         simplexIndices.insert(p2Phase);
2457                         CkAssert(simplexIndices.size() == n+1);
2458                         // Transition to reflecting state
2459                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2460
2461                 } else {
2462                         // else, replace ph with p*
2463                         CkAssert(simplexIndices.size() == n+1);
2464                         simplexIndices.erase(worstPhase);
2465                         CkAssert(simplexIndices.size() == n);
2466                         simplexIndices.insert(pPhase);
2467                         CkAssert(simplexIndices.size() == n+1);
2468                         // Transition to reflecting state
2469                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2470                 }
2471
2472
2473         } else {
2474                 CkAbort("Unknown simplexState");
2475         }
2476
2477 }
2478
2479
2480
2481 /** Replace the worst point with its reflection across the centroid. */
2482 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2483
2484         int n = controlPointSpace.size();
2485
2486         printSimplex(allData);
2487
2488         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2489
2490
2491         // Quit if the diameter of our simplex is small
2492         double maxr = 0.0;
2493         for(int i=0; i<n+1; i++){
2494                 //              Compute r^2 of this simplex point from the centroid
2495                 double r2 = 0.0;
2496                 std::vector<double> p = pointCoords(allData, i);
2497                 for(int d=0; d<p.size(); d++){
2498                         double r1 = (p[d] * centroid[d]);
2499                         r2 += r1*r1;
2500                 }
2501                 if(r2 > maxr)
2502                         maxr = r2;
2503         }
2504
2505 #if 0
2506         // At some point we quit this tuning
2507         if(maxr < 10){
2508                 useBestKnown = true;
2509                 instrumentedPhase *best = allData.findBest();
2510                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2511
2512                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2513                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2514                         const std::string &name = cpsIter->first;
2515                         newControlPoints[name] =  best->controlPoints[name];
2516                 }
2517         }
2518 #endif
2519
2520         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2521
2522         pPhase = allData.phases.size()-1;
2523         P.resize(n);
2524         for(int i=0; i<n; i++){
2525                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2526         }
2527
2528         for(int i=0; i<P.size(); i++){
2529                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2530         }
2531
2532
2533         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2534         int v = 0;
2535         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2536                 const std::string &name = cpsIter->first;
2537                 const std::pair<int,int> &bounds = cpsIter->second;
2538                 const int lb = bounds.first;
2539                 const int ub = bounds.second;
2540                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2541                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2542                 v++;
2543         }
2544
2545
2546         // Transition to "reflecting" state
2547         simplexState = reflecting;
2548         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2549
2550 }
2551
2552
2553
2554
2555 /** Replace the newly tested reflection with a further expanded version of itself. */
2556 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2557         int n = controlPointSpace.size();
2558         printSimplex(allData);
2559
2560         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2561         // I believe the equation for P** in the text on page 308 is correct.
2562
2563         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2564
2565
2566         p2Phase = allData.phases.size()-1;
2567         P2.resize(n);
2568         for(int i=0; i<n; i++){
2569                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2570         }
2571
2572         for(int i=0; i<P2.size(); i++){
2573                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2574         }
2575
2576
2577         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2578         int v = 0;
2579         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2580                 const std::string &name = cpsIter->first;
2581                 const std::pair<int,int> &bounds = cpsIter->second;
2582                 const int lb = bounds.first;
2583                 const int ub = bounds.second;
2584                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2585                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2586                 v++;
2587         }
2588
2589
2590         // Transition to "doneExpanding" state
2591         simplexState = doneExpanding;
2592         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2593
2594 }
2595
2596
2597
2598
2599 /** Replace the newly tested reflection with a further expanded version of itself. */
2600 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2601         int n = controlPointSpace.size();
2602         printSimplex(allData);
2603
2604         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2605
2606
2607         p2Phase = allData.phases.size()-1;
2608         P2.resize(n);
2609         for(int i=0; i<n; i++){
2610                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2611         }
2612
2613         for(int i=0; i<P2.size(); i++){
2614                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2615         }
2616
2617
2618         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2619         int v = 0;
2620         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2621                 const std::string &name = cpsIter->first;
2622                 const std::pair<int,int> &bounds = cpsIter->second;
2623                 const int lb = bounds.first;
2624                 const int ub = bounds.second;
2625                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2626                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2627                 v++;
2628         }
2629         
2630         
2631         // Transition to "contracting" state
2632         simplexState = contracting;
2633         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2634
2635 }
2636
2637
2638
2639
2640 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2641         int n = controlPointSpace.size();
2642
2643         // Find worst performing point in the simplex
2644         worstPhase = -1;
2645         worstTime = -1.0;
2646         bestPhase = 10000000;
2647         bestTime = 10000000;
2648         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2649                 double t = allData.phases[*iter]->medianTime();
2650                 if(t > worstTime){
2651                         worstTime = t;
2652                         worstPhase = *iter;
2653                 }
2654                 if(t < bestTime){
2655                         bestTime = t;
2656                         bestPhase = *iter;
2657                 }
2658         }
2659         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2660
2661         best = pointCoords(allData, bestPhase);
2662         CkAssert(best.size() == n);
2663
2664         worst = pointCoords(allData, worstPhase);
2665         CkAssert(worst.size() == n);
2666
2667         // Calculate centroid of the remaining points in the simplex
2668         centroid.resize(n);
2669         for(int i=0; i<n; i++){
2670                 centroid[i] = 0.0;
2671         }
2672
2673         int numPts = 0;
2674
2675         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2676                 if(*iter != worstPhase){
2677                         numPts ++;
2678                         // Accumulate into the result vector
2679                         int c = 0;
2680                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2681                                 centroid[c] += citer->second;
2682                                 c++;
2683                         }
2684
2685                 }
2686         }
2687
2688         // Now divide the sums by the number of points.
2689         for(int v = 0; v<centroid.size(); v++) {
2690                 centroid[v] /= (double)numPts;
2691         }
2692
2693         CkAssert(centroid.size() == n);
2694
2695         for(int i=0; i<centroid.size(); i++){
2696                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2697         }
2698
2699
2700 }
2701
2702
2703
2704 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2705         std::vector<double> result;
2706         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2707                 result.push_back((double)citer->second);
2708         }
2709         return result;
2710 }
2711
2712
2713
2714
2715 void ControlPointWriteOutputToDisk(){
2716         CkAssert(CkMyPe() == 0);
2717         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2718 }
2719
2720
2721
2722 /*! @} */
2723
2724 #ifdef CP_DISABLE_TRACING
2725 #include "ControlPointsNoTrace.def.h"
2726 #else
2727 #include "ControlPoints.def.h"
2728 #endif