Merge branch 'minas' into charm
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15 #include <math.h>
16
17
18 #include <climits>
19 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
20  
21
22 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
23 /**  @{ */
24
25 using namespace std;
26
27 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
28
29
30 //#undef DEBUGPRINT
31 //#define DEBUGPRINT 4
32
33 static void periodicProcessControlPoints(void* ptr, double currWallTime);
34
35
36 // A pointer to this PE's controlpoint manager Proxy
37 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
38 /* readonly */ int random_seed;
39 /* readonly */ long controlPointSamplePeriod;
40 /* readonly */ int whichTuningScheme;
41 /* readonly */ bool writeDataFileAtShutdown;
42 /* readonly */ bool shouldFilterOutputData;
43 /* readonly */ bool loadDataFileAtStartup;
44 /* readonly */ bool shouldGatherMemoryUsage;
45 /* readonly */ bool shouldGatherUtilization;
46 /* readonly */ bool shouldGatherAll;
47 /* readonly */ char CPDataFilename[512];
48
49
50
51 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
52 /// These probably come from the command line arguments, so are available only on PE 0
53 std::map<std::string, int> defaultControlPointValues;
54
55
56
57 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
58
59
60
61 void printTuningScheme(){
62   switch(whichTuningScheme){
63   case RandomSelection:
64     CkPrintf("Tuning Scheme: RandomSelection\n");
65     break;
66   case AlwaysDefaults:
67     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
68     break;
69   case SimulatedAnnealing:
70     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
71     break;
72   case ExhaustiveSearch:
73     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
74     break;
75   case CriticalPathAutoPrioritization:
76     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
77     break;
78   case UseBestKnownTiming:
79     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
80     break;
81   case UseSteering:
82     CkPrintf("Tuning Scheme: UseSteering\n");
83     break;
84   case MemoryAware:
85     CkPrintf("Tuning Scheme: MemoryAware\n");
86     break;
87   case Simplex:
88     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
89     break;
90   case DivideAndConquer:
91     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
92     break;
93   case LDBPeriod:
94     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
95     break;
96   case LDBPeriodLinear:
97     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
98     break;
99   case LDBPeriodQuadratic:
100     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
101     break;
102   case LDBPeriodOptimal:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
104     break;
105   default:
106     CkPrintf("Unknown tuning scheme\n");
107     break;
108   }
109   fflush(stdout);
110 }
111
112
113
114 /// A reduction type that combines idle time measurements (min/sum/max etc.)
115 CkReduction::reducerType idleTimeReductionType;
116 /// A reducer that combines idle time measurements (min/sum/max etc.)
117 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
118   double ret[3];
119   if(nMsg > 0){
120     CkAssert(msgs[0]->getSize()==3*sizeof(double));
121     double *m=(double *)msgs[0]->getData();
122     ret[0]=m[0];
123     ret[1]=m[1];
124     ret[2]=m[2];
125   }
126   for (int i=1;i<nMsg;i++) {
127     CkAssert(msgs[i]->getSize()==3*sizeof(double));
128     double *m=(double *)msgs[i]->getData();
129     ret[0]=min(ret[0],m[0]);
130     ret[1]+=m[1];
131     ret[2]=max(ret[2],m[2]);
132   }  
133   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
134 }
135
136
137
138 /// A reduction type that combines idle, overhead, and memory measurements
139 CkReduction::reducerType allMeasuresReductionType;
140 /// A reducer that combines idle, overhead, and memory measurements
141 #define ALL_REDUCTION_SIZE 12
142 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
143   double ret[ALL_REDUCTION_SIZE];
144   if(nMsg > 0){
145     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
146     double *m=(double *)msgs[0]->getData();
147     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
148   }
149   for (int i=1;i<nMsg;i++) {
150     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
151     double *m=(double *)msgs[i]->getData();
152     // idle time (min/sum/max)
153     ret[0]=min(ret[0],m[0]);
154     ret[1]+=m[1];
155     ret[2]=max(ret[2],m[2]);
156     // overhead time (min/sum/max)
157     ret[3]=min(ret[3],m[3]);
158     ret[4]+=m[4];
159     ret[5]=max(ret[5],m[5]);
160     // mem usage (max)
161     ret[6]=max(ret[6],m[6]);
162     // bytes per invocation for two types of entry methods
163     ret[7]+=m[7];
164     ret[8]+=m[8];
165     ret[9]+=m[9];
166     ret[10]+=m[10];
167     // Grain size (avg)
168     ret[11]+=m[11];
169   }  
170   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
171 }
172
173
174 /// Registers the control point framework's reduction handlers at startup on each PE
175 /*initproc*/ void registerCPReductions(void) {
176   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
177   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
178 }
179
180
181
182
183
184
185 /// Return an integer between 0 and num-1 inclusive
186 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
187 unsigned int randInt(unsigned int num, const char* name, int seed=0){
188   CkAssert(num > 0);
189   CkAssert(num < 1000);
190
191   unsigned long hash = 0;
192   unsigned int c;
193   unsigned char * str = (unsigned char*)name;
194
195   while (c = *str++){
196     unsigned int c2 = (c+64)%128;
197     unsigned int c3 = (c2*5953)%127;
198     hash = c3 + (hash << 6) + (hash << 16) - hash;
199   }
200
201   unsigned long shuffled1 = (hash*2083)%7907;
202   unsigned long shuffled2 = (seed*4297)%2017;
203
204   unsigned long shuffled3 = (random_seed*4799)%7919;
205
206   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
207
208   unsigned int result = ((namehash * 6029) % 1117) % num;
209
210   CkAssert(result >=0 && result < num);
211   return result;
212 }
213
214
215
216 controlPointManager::controlPointManager() {
217   generatedPlanForStep = -1;
218
219     exitWhenReady = false;
220     alreadyRequestedMemoryUsage = false;   
221     alreadyRequestedIdleTime = false;
222     alreadyRequestedAll = false;
223     
224     instrumentedPhase * newPhase = new instrumentedPhase();
225     allData.phases.push_back(newPhase);   
226     
227     frameworkShouldAdvancePhase = false;
228     haveControlPointChangeCallback = false;
229 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
230     
231     ControlPoint::initControlPointEffects();
232
233     phase_id = 0;
234
235     if(loadDataFileAtStartup){    
236       loadDataFile();
237     }
238
239     
240     if(CkMyPe() == 0){
241       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
242     }
243
244     traceRegisterUserEvent("No currently executing message", 5000);
245     traceRegisterUserEvent("Zero time along critical path", 5010);
246     traceRegisterUserEvent("Positive total time along critical path", 5020);
247     traceRegisterUserEvent("env->setPathHistory()", 6000);
248     traceRegisterUserEvent("Critical Path", 5900);
249     traceRegisterUserEvent("Table Entry", 5901);
250
251
252 #if USER_EVENT_FOR_REGISTERTERMINALPATH
253     traceRegisterUserEvent("registerTerminalPath", 100); 
254 #endif
255
256   }
257   
258
259  controlPointManager::~controlPointManager(){
260 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
261   }
262
263
264   /// Loads the previous run data file
265   void controlPointManager::loadDataFile(){
266     ifstream infile(CPDataFilename);
267     vector<std::string> names;
268     std::string line;
269   
270     while(getline(infile,line)){
271       if(line[0] != '#')
272         break;
273     }
274   
275     int numTimings = 0;
276     std::istringstream n(line);
277     n >> numTimings;
278   
279     while(getline(infile,line)){ 
280       if(line[0] != '#') 
281         break; 
282     }
283
284     int numControlPointNames = 0;
285     std::istringstream n2(line);
286     n2 >> numControlPointNames;
287   
288     for(int i=0; i<numControlPointNames; i++){
289       getline(infile,line);
290       names.push_back(line);
291     }
292
293     for(int i=0;i<numTimings;i++){
294       getline(infile,line);
295       while(line[0] == '#')
296         getline(infile,line); 
297
298       instrumentedPhase * ips = new instrumentedPhase();
299
300       std::istringstream iss(line);
301
302       // Read memory usage for phase
303       iss >> ips->memoryUsageMB;
304       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
305
306       // Read idle time
307       iss >> ips->idleTime.min;
308       iss >> ips->idleTime.avg;
309       iss >> ips->idleTime.max;
310
311       // Read overhead time
312       iss >> ips->overheadTime.min;
313       iss >> ips->overheadTime.avg;
314       iss >> ips->overheadTime.max;
315
316       // read bytePerInvoke
317       iss >> ips->bytesPerInvoke;
318
319       // read grain size
320       iss >> ips->grainSize;
321
322       // Read control point values
323       for(int cp=0;cp<numControlPointNames;cp++){
324         int cpvalue;
325         iss >> cpvalue;
326         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
327       }
328
329       // ignore median time
330       double mt;
331       iss >> mt;
332
333       // Read all times
334
335       double time;
336
337       while(iss >> time){
338         ips->times.push_back(time);
339 #if DEBUGPRINT > 5
340         CkPrintf("read time %lf from file\n", time);
341 #endif
342       }
343
344       allData.phases.push_back(ips);
345
346     }
347
348     infile.close();
349   }
350
351
352
353   /// Add the current data to allData and output it to a file
354   void controlPointManager::writeDataFile(){
355     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
356     ofstream outfile(CPDataFilename);
357     allData.cleanupNames();
358
359 //    string s = allData.toString();
360 //    CkPrintf("At end: \n %s\n", s.c_str());
361
362     if(shouldFilterOutputData){
363       allData.verify();
364       allData.filterOutIncompletePhases();
365     }
366
367 //    string s2 = allData.toString();
368 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
369     if(allData.toString().length() > 10){
370       outfile << allData.toString();
371     } else {
372       outfile << " No data available to save to disk " << endl;
373     }
374     outfile.close();
375   }
376
377   /// User can register a callback that is called when application should advance to next phase
378   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
379     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
380     controlPointChangeCallback = cb;
381     haveControlPointChangeCallback = true;
382   }
383
384
385 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
386 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
387   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
388 }
389
390   /// Called periodically by the runtime to handle the control points
391   /// Currently called on each PE
392   void controlPointManager::processControlPoints(){
393
394     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
395
396
397     //==========================================================================================
398     // Print the data for each phase
399
400     const int s = allData.phases.size();
401
402 #if DEBUGPRINT
403     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
404     for(int p=0;p<s;++p){
405       const instrumentedPhase &phase = allData.phases[p];
406       const idleTimeContainer &idle = phase.idleTime;
407       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
408       vector<double> const &times = phase.times;
409
410       CkPrintf("Phase %d:\n", p);
411       idle.print();
412      //   CkPrintf("critical paths: (* affected by control point)\n");
413         //   for(int i=0;i<criticalPaths.size(); i++){
414         // If affected by a control point
415         //      criticalPaths[i].print();
416
417       //        criticalPaths[i].print();
418       //        CkPrintf("\n");
419         
420
421       //   }
422       CkPrintf("Timings:\n");
423       for(int i=0;i<times.size(); i++){
424         CkPrintf("%lf ", times[i]);
425       }
426       CkPrintf("\n");
427
428     }
429     
430     CkPrintf("\n\n");
431
432
433 #endif
434
435
436
437     //==========================================================================================
438     // If this is a phase during which we try to adapt control point values based on critical path
439 #if 0
440     if( s%5 == 4) {
441
442       // Find the most recent phase with valid critical path data and idle time measurements      
443       int whichPhase=-1;
444       for(int p=0; p<s; ++p){
445         const instrumentedPhase &phase = allData.phases[p];
446         const idleTimeContainer &idle = phase.idleTime;
447         if(idle.isValid() && phase.criticalPaths.size()>0 ){
448           whichPhase = p;
449         }
450       }
451       
452       
453       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
454       const instrumentedPhase &phase = allData.phases[whichPhase];
455       const idleTimeContainer &idle = phase.idleTime;
456       
457       if(idle.min > 0.1){
458         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
459         
460         // Determine the median critical path for this phase
461         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
462         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
463         CkAssert(phase.criticalPaths.size() > 0);
464         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
465         
466         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
467         
468         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
469           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
470
471
472           CkPrintf("Finding control points related to the critical path\n");
473           int cpcount = 0;
474           std::set<std::string> controlPointsAffectingCriticalPath;
475
476           
477           for(int e=0;e<path.getNumUsed();e++){
478             if(path.getUsedCount(e)>0){
479               int ep = path.getUsedEp(e);
480
481               std::map<std::string, std::set<int> >::iterator iter;
482               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
483                 if(iter->second.count(ep)>0){
484                   controlPointsAffectingCriticalPath.insert(iter->first);
485                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
486                   cpcount++;
487                 }
488               }
489               
490             }
491           }
492           
493
494           if(cpcount==0){
495             CkPrintf("No control points are known to affect the critical path\n");
496           } else {
497             double beginTime = CmiWallTimer();
498
499             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
500             
501             newControlPoints = phase.controlPoints;
502             
503             if(frameworkShouldAdvancePhase){
504               gotoNextPhase();  
505             }
506             
507             if(haveControlPointChangeCallback){ 
508 #if DEBUGPRINT
509               CkPrintf("Calling control point change callback\n");
510 #endif
511               // Create a high priority message and send it to the callback
512               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
513               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
514               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
515               controlPointChangeCallback.send(msg);
516               
517             }
518             
519             
520             // adjust the control points that can affect the critical path
521
522             char textDescription[4096*2];
523             textDescription[0] = '\0';
524
525             std::map<std::string,int>::iterator newCP;
526             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
527               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
528                 // decrease the value (increase priority) if within range
529                 int lowerbound = controlPointSpace[newCP->first].first;
530                 if(newCP->second > lowerbound){
531                   newControlPointsAvailable = true;
532                   newCP->second --;
533                 }
534               }
535             }
536            
537             // Create a string for a projections user event
538             if(1){
539               std::map<std::string,int>::iterator newCP;
540               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
541                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
542               }
543             }
544
545             char *userEventString = new char[4096*2];
546             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
547             
548             static int userEventCounter = 20000;
549             CkPrintf("USER EVENT: %s\n", userEventString);
550             
551             traceRegisterUserEvent(userEventString, userEventCounter); 
552             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
553             userEventCounter++;
554             
555           }
556           
557         } else {
558           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
559         }
560         
561         
562       }
563       
564     } else {
565       
566       
567     }
568    
569 #endif
570
571
572
573     if(frameworkShouldAdvancePhase){
574       gotoNextPhase();  
575     }
576     
577     if(haveControlPointChangeCallback){ 
578       // Create a high priority message and send it to the callback
579       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
580       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
581       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
582       controlPointChangeCallback.send(msg);
583     }
584     
585   }
586   
587   /// Determine if any control point is known to affect an entry method
588   bool controlPointManager::controlPointAffectsThisEP(int ep){
589     std::map<std::string, std::set<int> >::iterator iter;
590     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
591       if(iter->second.count(ep)>0){
592         return true;
593       }
594     }
595     return false;    
596   }
597   
598   /// Determine if any control point is known to affect a chare array  
599   bool controlPointManager::controlPointAffectsThisArray(int array){
600     std::map<std::string, std::set<int> >::iterator iter;
601     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
602       if(iter->second.count(array)>0){
603         return true;
604       }
605     }
606     return false;   
607   }
608   
609
610   /// The data from the current phase
611   instrumentedPhase * controlPointManager::currentPhaseData(){
612     int s = allData.phases.size();
613     CkAssert(s>=1);
614     return allData.phases[s-1];
615   }
616  
617
618   /// The data from the previous phase
619   instrumentedPhase * controlPointManager::previousPhaseData(){
620     int s = allData.phases.size();
621     if(s >= 2 && phase_id > 0) {
622       return allData.phases[s-2];
623     } else {
624       return NULL;
625     }
626   }
627  
628   /// The data from two phases back
629   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
630     int s = allData.phases.size();
631     if(s >= 3 && phase_id > 0) {
632       return allData.phases[s-3];
633     } else {
634       return NULL;
635     }
636   }
637   
638
639   /// Called by either the application or the Control Point Framework to advance to the next phase  
640   void controlPointManager::gotoNextPhase(){
641     
642 #ifndef CP_DISABLE_TRACING
643     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
644     fflush(stdout);
645
646     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
647       alreadyRequestedAll = true;
648       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
649       CkPrintf("Requesting all measurements\n");
650       thisProxy.requestAll(*cb);
651       delete cb;
652     
653     } else {
654       
655       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
656         alreadyRequestedMemoryUsage = true;
657         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
658         thisProxy.requestMemoryUsage(*cb);
659         delete cb;
660       }
661       
662       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
663         alreadyRequestedIdleTime = true;
664         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
665         thisProxy.requestIdleTime(*cb);
666         delete cb;
667       }
668     }
669     
670
671 #endif
672
673
674
675 #if CMK_LBDB_ON && 0
676     LBDatabase * myLBdatabase = LBDatabaseObj();
677     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
678     const CkVec<LBObj*> objs = myLBDB->getObjs();
679     const int objCount = myLBDB->getObjCount();
680     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
681     
682     floatType maxObjWallTime = -1.0;
683     
684     for(int i=0;i<objs.length();i++){
685       LBObj* o = objs[i];
686       const LDObjData d = o->ObjData();
687       floatType cpuTime = d.cpuTime;
688       floatType wallTime = d.wallTime;
689       // can also get object handles from the LDObjData struct
690       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
691       if(wallTime > maxObjWallTime){
692
693       }
694       
695     }
696
697     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
698     
699 #endif    
700
701
702     
703     // increment phase id
704     phase_id++;
705     
706
707     // Create new entry for the phase we are starting now
708     instrumentedPhase * newPhase = new instrumentedPhase();
709     allData.phases.push_back(newPhase);
710     
711     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
712
713   }
714
715   /// An application uses this to register an instrumented timing for this phase
716   void controlPointManager::setTiming(double time){
717     currentPhaseData()->times.push_back(time);
718
719 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
720        
721     // First we should register this currently executing message as a path, because it is likely an important one to consider.
722     //    registerTerminalEntryMethod();
723     
724     // save the critical path for this phase
725     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
726     //    maxTerminalPathHistory.reset();
727     
728     
729     // Reset the counts for the currently executing message
730     //resetThisEntryPath();
731         
732 #endif
733     
734   }
735   
736   /// Entry method called on all PEs to request CPU utilization statistics
737   void controlPointManager::requestIdleTime(CkCallback cb){
738 #ifndef CP_DISABLE_TRACING
739     double i = localControlPointTracingInstance()->idleRatio();
740     double idle[3];
741     idle[0] = i;
742     idle[1] = i;
743     idle[2] = i;
744     
745     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
746     
747     localControlPointTracingInstance()->resetTimings();
748
749     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
750 #else
751     CkAbort("Should not get here\n");
752 #endif
753   }
754   
755   /// All processors reduce their memory usages in requestIdleTime() to this method
756   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
757 #ifndef CP_DISABLE_TRACING
758     int size=msg->getSize() / sizeof(double);
759     CkAssert(size==3);
760     double *r=(double *) msg->getData();
761         
762     instrumentedPhase* prevPhase = previousPhaseData();
763     if(prevPhase != NULL){
764       prevPhase->idleTime.min = r[0];
765       prevPhase->idleTime.avg = r[1]/CkNumPes();
766       prevPhase->idleTime.max = r[2];
767       prevPhase->idleTime.print();
768       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
769     } else {
770       CkPrintf("There is no previous phase to store the idle time measurements\n");
771     }
772     
773     alreadyRequestedIdleTime = false;
774     checkForShutdown();
775     delete msg;
776 #else
777     CkAbort("Should not get here\n");
778 #endif
779   }
780
781
782
783
784
785
786   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
787   void controlPointManager::requestAll(CkCallback cb){
788 #ifndef CP_DISABLE_TRACING
789     TraceControlPoints *t = localControlPointTracingInstance();
790
791     double data[ALL_REDUCTION_SIZE];
792
793     double *idle = data;
794     double *over = data+3;
795     double *mem = data+6;
796     double *msgBytes = data+7;  
797     double *grainsize = data+11;  
798
799     const double i = t->idleRatio();
800     idle[0] = i;
801     idle[1] = i;
802     idle[2] = i;
803
804     const double o = t->overheadRatio();
805     over[0] = o;
806     over[1] = o;
807     over[2] = o;
808
809     const double m = t->memoryUsageMB();
810     mem[0] = m;
811
812     msgBytes[0] = t->b2;
813     msgBytes[1] = t->b3;
814     msgBytes[2] = t->b2mlen;
815     msgBytes[3] = t->b3mlen;
816
817     grainsize[0] = t->grainSize();
818     
819     localControlPointTracingInstance()->resetAll();
820
821     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
822 #else
823     CkAbort("Should not get here\n");
824 #endif
825   }
826   
827   /// All processors reduce their memory usages in requestIdleTime() to this method
828   void controlPointManager::gatherAll(CkReductionMsg *msg){
829 #ifndef CP_DISABLE_TRACING
830     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
831     int size=msg->getSize() / sizeof(double);
832     double *data=(double *) msg->getData();
833         
834     double *idle = data;
835     double *over = data+3;
836     double *mem = data+6;
837     double *msgBytes = data+7;
838     double *granularity = data+11;
839
840
841     //    std::string b = allData.toString();
842
843     instrumentedPhase* prevPhase = previousPhaseData();
844     if(prevPhase != NULL){
845       prevPhase->idleTime.min = idle[0];
846       prevPhase->idleTime.avg = idle[1]/CkNumPes();
847       prevPhase->idleTime.max = idle[2];
848       
849       prevPhase->overheadTime.min = over[0];
850       prevPhase->overheadTime.avg = over[1]/CkNumPes();
851       prevPhase->overheadTime.max = over[2];
852       
853       prevPhase->memoryUsageMB = mem[0];
854       
855       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
856
857       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
858       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
859
860       /** The average of the grain sizes on all PEs in us */
861       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
862
863       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
864       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
865
866       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
867       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
868
869       if(bytesPerInvoke2 > bytesPerInvoke3)
870         prevPhase->bytesPerInvoke = bytesPerInvoke2;
871       else
872         prevPhase->bytesPerInvoke = bytesPerInvoke3;
873
874       //      prevPhase->print();
875       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
876
877       //      std::string a = allData.toString();
878
879       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
880       
881     } else {
882       CkPrintf("There is no previous phase to store measurements\n");
883     }
884     
885     alreadyRequestedAll = false;
886     checkForShutdown();
887     delete msg;
888 #else
889     CkAbort("Should not get here\n");
890 #endif
891   }
892
893
894
895
896   void controlPointManager::checkForShutdown(){
897     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
898       doExitNow();
899     }
900   }
901
902
903   void controlPointManager::exitIfReady(){
904      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
905        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
906        doExitNow();
907      } else {
908        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
909        exitWhenReady = true;
910      }
911   }
912
913
914
915   void controlPointManager::doExitNow(){
916           writeOutputToDisk();
917           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
918           CkExit();
919   }
920
921   void controlPointManager::writeOutputToDisk(){
922           if(writeDataFileAtShutdown){
923                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
924           }
925   }
926
927
928   /// Entry method called on all PEs to request memory usage
929   void controlPointManager::requestMemoryUsage(CkCallback cb){
930     int m = CmiMaxMemoryUsage() / 1024 / 1024;
931     CmiResetMaxMemory();
932     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
933     contribute(sizeof(int),&m,CkReduction::max_int, cb);
934   }
935
936   /// All processors reduce their memory usages to this method
937   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
938     int size=msg->getSize() / sizeof(int);
939     CkAssert(size==1);
940     int *m=(int *) msg->getData();
941
942     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
943     
944     instrumentedPhase* prevPhase = previousPhaseData();
945     if(prevPhase != NULL){
946       prevPhase->memoryUsageMB = m[0];
947     } else {
948       CkPrintf("No place to store memory usage");
949     }
950
951     alreadyRequestedMemoryUsage = false;
952     checkForShutdown();
953     delete msg;
954   }
955
956
957 //   /// Inform the control point framework that a named control point affects the priorities of some array  
958 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
959 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
960     
961 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
962 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
963 //     } else {
964 //       std::set<int> s;
965 //       s.insert(groupIdx);
966 //       affectsPrioritiesArray[std::string(name)] = s;
967 //     }
968     
969 // #if DEBUGPRINT   
970 //     std::map<std::string, std::set<int> >::iterator f;
971 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
972 //       std::string name = f->first;
973 //       std::set<int> &vals = f->second;
974 //       cout << "Control point " << name << " affects arrays: ";
975 //       std::set<int>::iterator i;
976 //       for(i=vals.begin(); i!=vals.end();++i){
977 //      cout << *i << " ";
978 //       }
979 //       cout << endl;
980 //     }
981 // #endif
982     
983 //   }
984   
985 //   /// Inform the control point framework that a named control point affects the priority of some entry method
986 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
987 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
988
989 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
990 //       affectsPrioritiesEP[std::string(name)].insert(idx);
991 //     } else {
992 //       std::set<int> s;
993 //       s.insert(idx);
994 //       affectsPrioritiesEP[std::string(name)] = s;
995 //     }
996     
997 // #if DEBUGPRINT
998 //     std::map<std::string, std::set<int> >::iterator f;
999 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1000 //       std::string name = f->first;
1001 //       std::set<int> &vals = f->second;
1002 //       cout << "Control point " << name << " affects EP: ";
1003 //       std::set<int>::iterator i;
1004 //       for(i=vals.begin(); i!=vals.end();++i){
1005 //      cout << *i << " ";
1006 //       }
1007 //       cout << endl;
1008 //     }
1009 // #endif
1010
1011
1012 //   }
1013   
1014
1015
1016 /// An interface callable by the application.
1017 void gotoNextPhase(){
1018   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1019 }
1020
1021 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1022 {
1023   gotoNextPhase();
1024 }
1025
1026
1027 /// A mainchare that is used just to create our controlPointManager group at startup
1028 class controlPointMain : public CBase_controlPointMain {
1029 public:
1030   controlPointMain(CkArgMsg* args){
1031 #if OLDRANDSEED
1032     struct timeval tp;
1033     gettimeofday(& tp, NULL);
1034     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1035 #else
1036     double time = CmiWallTimer();
1037     int sec = (int)time;
1038     int usec = (int)((time - (double)sec)*1000000.0);
1039     random_seed =  sec ^ usec;
1040 #endif
1041     
1042     
1043     double period;
1044     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1045     if(haveSamplePeriod){
1046       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1047       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1048     } else {
1049       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1050     }
1051     
1052     
1053     
1054     whichTuningScheme = RandomSelection;
1055
1056
1057     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1058       whichTuningScheme = RandomSelection;
1059     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1060       whichTuningScheme = ExhaustiveSearch;
1061     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1062       whichTuningScheme = AlwaysDefaults;
1063     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1064       whichTuningScheme = SimulatedAnnealing;
1065     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1066       whichTuningScheme = CriticalPathAutoPrioritization;
1067     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1068       whichTuningScheme = UseBestKnownTiming;
1069     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1070       whichTuningScheme = UseSteering;
1071     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1072       whichTuningScheme = MemoryAware;
1073     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1074       whichTuningScheme = Simplex;
1075     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1076       whichTuningScheme = DivideAndConquer;
1077     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1078       whichTuningScheme = LDBPeriod;
1079     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1080       whichTuningScheme = LDBPeriodLinear;
1081     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1082       whichTuningScheme = LDBPeriodQuadratic;
1083     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1084       whichTuningScheme = LDBPeriodOptimal;
1085     }
1086
1087     char *defValStr = NULL;
1088     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1089       CkPrintf("You specified default value string: %s\n", defValStr);
1090       
1091       // Parse the string, looking for commas
1092      
1093
1094       char *tok = strtok(defValStr, ",");
1095       while (tok) {
1096         // split on the equal sign
1097         int len = strlen(tok);
1098         char *eqsign = strchr(tok, '=');
1099         if(eqsign != NULL && eqsign != tok){
1100           *eqsign = '\0';
1101           char *cpname = tok;
1102           std::string cpName(tok);
1103           char *cpDefVal = eqsign+1;      
1104           int v=-1;
1105           if(sscanf(cpDefVal, "%d", &v) == 1){
1106             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1107             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1108             defaultControlPointValues[cpName] = v;
1109           }
1110         }
1111         tok = strtok(NULL, ",");
1112       }
1113
1114     }
1115
1116     shouldGatherAll = false;
1117     shouldGatherMemoryUsage = false;
1118     shouldGatherUtilization = false;
1119     
1120     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1121       shouldGatherAll = true;
1122     } else {
1123       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1124         shouldGatherMemoryUsage = true;
1125       }
1126       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1127         shouldGatherUtilization = true;
1128       }
1129     }
1130     
1131     writeDataFileAtShutdown = false;   
1132     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1133       writeDataFileAtShutdown = true;
1134     }
1135
1136     shouldFilterOutputData = true;
1137     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1138       shouldFilterOutputData = false;
1139     }
1140
1141
1142    loadDataFileAtStartup = false;   
1143     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1144       loadDataFileAtStartup = true;
1145     }
1146
1147     char *cpdatafile;
1148     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1149       sprintf(CPDataFilename, "%s", cpdatafile);
1150     } else {
1151       sprintf(CPDataFilename, "controlPointData.txt");
1152     }
1153
1154
1155     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1156   }
1157   ~controlPointMain(){}
1158 };
1159
1160 /// An interface callable by the application.
1161 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1162   CkAssert(CkMyPe() == 0);
1163   CkPrintf("Application has registered a control point change callback\n");
1164   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1165 }
1166
1167 /// An interface callable by the application.
1168 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1169   if(CkMyPe() == 0){
1170     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1171     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1172   }
1173 }
1174
1175 /// An interface callable by the application.
1176 void registerControlPointTiming(double time){
1177   CkAssert(CkMyPe() == 0);
1178 #if DEBUGPRINT>0
1179   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1180 #endif
1181   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1182 }
1183
1184 /// An interface callable by the application.
1185 void controlPointTimingStamp() {
1186   CkAssert(CkMyPe() == 0);
1187 #if DEBUGPRINT>0
1188   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1189 #endif
1190   
1191   static double prev_time = 0.0;
1192   double t = CmiWallTimer();
1193   double duration = t - prev_time;
1194   prev_time = t;
1195     
1196   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1197 }
1198
1199 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1200 {
1201   controlPointTimingStamp();
1202 }
1203
1204
1205
1206
1207
1208 /// Shutdown the control point framework, writing data to disk if necessary
1209 extern "C" void controlPointShutdown(){
1210   if(CkMyPe() == 0){
1211
1212     // wait for gathering of idle time & memory usage to complete
1213     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1214
1215   }
1216 }
1217
1218 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1219 void controlPointInitNode(){
1220 //  CkPrintf("controlPointInitNode()\n");
1221   registerExitFn(controlPointShutdown);
1222 }
1223
1224 /// Called periodically to allow control point framework to do things periodically
1225 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1226 #ifdef DEBUGPRINT
1227   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1228 #endif
1229   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1230   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1231 }
1232
1233
1234
1235
1236
1237 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1238 /// user observed characteristic to adapt specific control point values.
1239 /// This function must return valid values for newControlPoints.
1240 void controlPointManager::generatePlan() {
1241   const double startGenerateTime = CmiWallTimer();
1242   const int phase_id = this->phase_id;
1243   const int effective_phase = allData.phases.size();
1244
1245   // Only generate a plan once per phase
1246   if(generatedPlanForStep == phase_id) 
1247     return;
1248   generatedPlanForStep = phase_id;
1249  
1250   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1251   printTuningScheme();
1252
1253   // By default lets put the previous phase data into newControlPoints
1254   instrumentedPhase *prevPhase = previousPhaseData();
1255   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1256           newControlPoints[cpsIter->first] = cpsIter->second;
1257   }
1258
1259
1260   if( whichTuningScheme == RandomSelection ){
1261     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1262     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1263       const std::string &name = cpsIter->first;
1264       const std::pair<int,int> &bounds = cpsIter->second;
1265       const int lb = bounds.first;
1266       const int ub = bounds.second;
1267       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1268     }
1269   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1270     // -----------------------------------------------------------
1271     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1272     //   
1273     // This scheme will return the median value for the range 
1274     // early on, and then will transition over to the new control points
1275     // determined by the critical path adapting code
1276
1277     // This won't work until the periodic function is fixed up a bit
1278
1279     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1280     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1281       const std::string &name = cpsIter->first;
1282       const std::pair<int,int> &bounds = cpsIter->second;
1283       const int lb = bounds.first;
1284       const int ub = bounds.second;
1285       newControlPoints[name] =  (lb+ub)/2;
1286     }
1287
1288   } else if ( whichTuningScheme == MemoryAware ) {
1289
1290     // -----------------------------------------------------------
1291     //  STEERING BASED ON MEMORY USAGE
1292
1293     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1294     instrumentedPhase *prevPhase = previousPhaseData();
1295  
1296     if(phase_id%2 == 0){
1297       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1298       twoAgoPhase->print();
1299       CkPrintf("\n");
1300       fflush(stdout);
1301       
1302       // See if memory usage is low:
1303       double memUsage = twoAgoPhase->memoryUsageMB;
1304       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1305       fflush(stdout);
1306       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1307         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1308         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1309         
1310         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1311         newControlPoints = twoAgoPhase->controlPoints;
1312
1313
1314         CkPrintf("Steering (memory based) initialized plan\n");
1315         fflush(stdout);
1316
1317         // look for a possible control point knob to turn
1318         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1319         
1320         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1321         bool found = false;
1322         std::string cpName;
1323         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1324         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1325         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1326           cpName = iter->first;
1327           info = &iter->second;
1328           found = true;
1329           break;
1330         }
1331
1332         // Adapt the control point value
1333         if(found){
1334           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1335           fflush(stdout);
1336           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1337           const int maxValue = controlPointSpace[cpName].second;
1338           
1339           if(twoAgoValue+1 <= maxValue){
1340             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1341           }
1342         }
1343         
1344       }
1345     }
1346
1347     CkPrintf("Steering (memory based) done for this phase\n");
1348     fflush(stdout);
1349
1350   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1351
1352     // -----------------------------------------------------------
1353     //  USE BEST KNOWN TIME  
1354     static bool firstTime = true;
1355     if(firstTime){
1356       firstTime = false;
1357       instrumentedPhase *best = allData.findBest(); 
1358       CkPrintf("Best known phase is:\n");
1359       best->print();
1360       CkPrintf("\n");
1361       
1362       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1363       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1364         const std::string &name = cpsIter->first;
1365         newControlPoints[name] =  best->controlPoints[name];
1366       }
1367     }
1368   } else if( whichTuningScheme == LDBPeriod) {
1369     // Assume this is used in this manner:
1370     //  1) go to next phase
1371     //  2) request control point
1372     //  3) load balancing
1373     //  4) computation
1374     
1375     
1376     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1377     instrumentedPhase *prevPhase = previousPhaseData();
1378     
1379     
1380     const std::vector<double> &times = twoAgoPhase->times;
1381     const int oldNumTimings = times.size();
1382
1383
1384     const std::vector<double> &timesNew = prevPhase->times;
1385     const int newNumTimings = timesNew.size();
1386
1387
1388     if(oldNumTimings > 4 && newNumTimings > 4){
1389       
1390       // Build model of execution time based on two phases ago
1391       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1392       
1393       double oldSum = 0;
1394       
1395       for(int i=2; i<oldNumTimings; i++){
1396         oldSum += times[i];
1397       }
1398       
1399       const double oldAvg = oldSum / (oldNumTimings-2);
1400       
1401       
1402       
1403       
1404       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1405       const double expectedTotalTime = oldAvg * newNumTimings;
1406       
1407       
1408       // Measure actual time
1409       double newSum = 0.0;
1410       for(int i=2; i<newNumTimings; ++i){
1411         newSum += timesNew[i];
1412       }
1413       
1414       const double newAvg = newSum / (newNumTimings-2);
1415       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1416       
1417       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1418       
1419       // Determine load balance cost
1420       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1421       
1422       const double benefitAfterLB = benefit - lbcost;
1423     
1424     
1425       // Determine whether LB cost outweights the estimated benefit
1426       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1427     
1428     
1429       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1430       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1431         const std::string &name = cpsIter->first;
1432         const std::pair<int,int> &bounds = cpsIter->second;
1433         const int lb = bounds.first;
1434         const int ub = bounds.second;
1435       
1436         if(benefitAfterLB > 0){
1437           CkPrintf("Constant Model: Beneficial LB\n");
1438           int newval = newControlPoints[name] / 2;
1439           if(newval > lb)
1440             newControlPoints[name] = newval;
1441           else 
1442             newControlPoints[name] = lb;
1443         } else {
1444           CkPrintf("Constant Model: Detrimental LB\n");
1445           int newval = newControlPoints[name] * 2;
1446           if(newval < ub)
1447             newControlPoints[name] = newval;
1448           else
1449             newControlPoints[name] = ub;
1450         }
1451       }
1452     }
1453     
1454     
1455   }  else if( whichTuningScheme == LDBPeriodLinear) {
1456     // Assume this is used in this manner:
1457     //  1) go to next phase
1458     //  2) request control point
1459     //  3) load balancing
1460     //  4) computation
1461
1462
1463     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1464     instrumentedPhase *prevPhase = previousPhaseData();
1465     
1466     const std::vector<double> &times = twoAgoPhase->times;
1467     const int oldNumTimings = times.size();
1468
1469     const std::vector<double> &timesNew = prevPhase->times;
1470     const int newNumTimings = timesNew.size();
1471     
1472
1473     if(oldNumTimings > 4 && newNumTimings > 4){
1474
1475       // Build model of execution time based on two phases ago
1476       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1477       const int b1 = 2 + (oldNumTimings-2)/2;
1478       double s1 = 0;
1479       double s2 = 0;
1480     
1481       const double ldbStepsTime = times[0] + times[1];
1482     
1483       for(int i=2; i<b1; i++){
1484         s1 += times[i];
1485       }
1486       for(int i=b1; i<oldNumTimings; i++){
1487         s2 += times[i];
1488       }
1489       
1490       
1491       // Compute the estimated time for the last phase's data
1492     
1493       const double a1 = s1 / (double)(b1-2);
1494       const double a2 = s2 / (double)(oldNumTimings-b1);
1495       const double aold = (a1+a2)/2.0;
1496
1497       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1498         
1499     
1500       // Measure actual time
1501       double sum = 0.0;
1502       for(int i=2; i<newNumTimings; ++i){
1503         sum += timesNew[i];
1504       }
1505
1506       const double avg = sum / ((double)(newNumTimings-2));
1507       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1508
1509       const double benefit = expectedTotalTime - actualTotalTime;
1510
1511       // Determine load balance cost
1512       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1513
1514       const double benefitAfterLB = benefit - lbcost;
1515
1516     
1517       // Determine whether LB cost outweights the estimated benefit
1518       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1519     
1520     
1521     
1522       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1523       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1524         const std::string &name = cpsIter->first;
1525         const std::pair<int,int> &bounds = cpsIter->second;
1526         const int lb = bounds.first;
1527         const int ub = bounds.second;
1528       
1529         if(benefitAfterLB > 0){
1530           CkPrintf("Linear Model: Beneficial LB\n");
1531           int newval = newControlPoints[name] / 2;
1532           if(newval > lb)
1533             newControlPoints[name] = newval;
1534           else 
1535             newControlPoints[name] = lb;
1536         } else {
1537           CkPrintf("Linear Model: Detrimental LB\n");
1538           int newval = newControlPoints[name] * 2;
1539           if(newval < ub)
1540             newControlPoints[name] = newval;
1541           else 
1542             newControlPoints[name] = ub;
1543         }
1544       }
1545     }
1546
1547   }
1548
1549   else if( whichTuningScheme == LDBPeriodQuadratic) {
1550     // Assume this is used in this manner:
1551     //  1) go to next phase
1552     //  2) request control point
1553     //  3) load balancing
1554     //  4) computation
1555
1556
1557     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1558     instrumentedPhase *prevPhase = previousPhaseData();
1559         
1560     const std::vector<double> &times = twoAgoPhase->times;
1561     const int oldNumTimings = times.size();
1562
1563     const std::vector<double> &timesNew = prevPhase->times;
1564     const int newNumTimings = timesNew.size();
1565
1566     
1567     if(oldNumTimings > 4 && newNumTimings > 4){
1568
1569
1570       // Build model of execution time based on two phases ago
1571       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1572       const int b1 = 2 + (oldNumTimings-2)/3;
1573       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1574       double s1 = 0;
1575       double s2 = 0;
1576       double s3 = 0;
1577
1578       const double ldbStepsTime = times[0] + times[1];
1579     
1580       for(int i=2; i<b1; i++){
1581         s1 += times[i];
1582       }
1583       for(int i=b1; i<b2; i++){
1584         s2 += times[i];
1585       }
1586       for(int i=b2; i<oldNumTimings; i++){
1587         s3 += times[i];
1588       }
1589
1590     
1591       // Compute the estimated time for the last phase's data
1592     
1593       const double a1 = s1 / (double)(b1-2);
1594       const double a2 = s2 / (double)(b2-b1);
1595       const double a3 = s3 / (double)(oldNumTimings-b2);
1596     
1597       const double a = (a1-2.0*a2+a3)/2.0;
1598       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1599       const double c = a3;
1600     
1601       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1602       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1603       const double x2 = 0.5;
1604       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1605    
1606     
1607       // Measure actual time
1608       double sum = 0.0;
1609       for(int i=2; i<newNumTimings; ++i){
1610         sum += timesNew[i];
1611       }
1612
1613       const double avg = sum / ((double)(newNumTimings-2));
1614       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1615
1616       const double benefit = expectedTotalTime - actualTotalTime;
1617
1618       // Determine load balance cost
1619       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1620
1621       const double benefitAfterLB = benefit - lbcost;
1622
1623     
1624       // Determine whether LB cost outweights the estimated benefit
1625       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1626     
1627     
1628     
1629       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1630       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1631         const std::string &name = cpsIter->first;
1632         const std::pair<int,int> &bounds = cpsIter->second;
1633         const int lb = bounds.first;
1634         const int ub = bounds.second;
1635       
1636         if(benefitAfterLB > 0){
1637           CkPrintf("QuadraticModel: Beneficial LB\n");
1638           int newval = newControlPoints[name] / 2;
1639           if(newval > lb)
1640             newControlPoints[name] = newval;
1641           else 
1642             newControlPoints[name] = lb;
1643         } else {
1644           CkPrintf("QuadraticModel: Detrimental LB\n");
1645           int newval = newControlPoints[name] * 2;
1646           if(newval < ub)
1647             newControlPoints[name] = newval;
1648           else 
1649             newControlPoints[name] = ub;
1650         }
1651       
1652       }
1653     }
1654     
1655
1656   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1657     // Assume this is used in this manner:
1658     //  1) go to next phase
1659     //  2) request control point
1660     //  3) load balancing
1661     //  4) computation
1662
1663
1664
1665     instrumentedPhase *prevPhase = previousPhaseData();
1666     
1667     const std::vector<double> &times = prevPhase->times;
1668     const int numTimings = times.size();
1669     
1670     if( numTimings > 4){
1671
1672       const int b1 = 2 + (numTimings-2)/2;
1673       double s1 = 0;
1674       double s2 = 0;
1675     
1676     
1677       for(int i=2; i<b1; i++){
1678         s1 += times[i];
1679       }
1680       for(int i=b1; i<numTimings; i++){
1681         s2 += times[i];
1682       }
1683       
1684     
1685       const double a1 = s1 / (double)(b1-2);
1686       const double a2 = s2 / (double)(numTimings-b1);
1687       const double avg = (a1+a1) / 2.0;
1688
1689       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1690
1691       const double ldbStepsTime = times[0] + times[1];
1692       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1693       
1694 #if defined(_WIN32) && ! defined(__CYGWIN__)
1695 #define lround(x)        ((long)(x+0.5))
1696 #endif
1697       int newval = lround(sqrt(2.0*lbcost/m));
1698       
1699
1700       CkPrintf("Optimal Model: lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1701     
1702     
1703       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1704       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1705         // TODO: lookup only control points that are relevant instead of all of them
1706         const std::string &name = cpsIter->first;
1707         const std::pair<int,int> &bounds = cpsIter->second;
1708         const int lb = bounds.first;
1709         const int ub = bounds.second;
1710         
1711         if(newval < lb){
1712           newControlPoints[name] = lb;
1713         } else if(newval > ub){
1714           newControlPoints[name] = ub;
1715         } else {
1716           newControlPoints[name] = newval;
1717         } 
1718         
1719       }
1720       
1721       
1722     }
1723     
1724  
1725
1726   } else if ( whichTuningScheme == UseSteering ) {
1727           // -----------------------------------------------------------
1728           //  STEERING BASED ON KNOWLEDGE
1729
1730           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1731           // plans are only generated after 3 phases
1732
1733           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1734           instrumentedPhase *prevPhase = previousPhaseData();
1735
1736           if(phase_id%4 == 0){
1737                   CkPrintf("Steering based on 2 phases ago:\n");
1738                   twoAgoPhase->print();
1739                   CkPrintf("\n");
1740                   fflush(stdout);
1741
1742                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1743
1744
1745                   // ========================================= Concurrency =============================================
1746                   // See if idle time is high:
1747                   {
1748                           double idleTime = twoAgoPhase->idleTime.avg;
1749                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1750                           fflush(stdout);
1751                           if(idleTime > 0.10){
1752                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1753                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1754
1755                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1756
1757                                   bool found = false;
1758                                   std::string cpName;
1759                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1760                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1761                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1762                                           cpName = iter->first;
1763                                           info = &iter->second;
1764
1765                                           // Initialize a new plan based on two phases ago
1766                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1767
1768                                           CkPrintf("Steering found knob to turn\n");
1769                                           fflush(stdout);
1770
1771                                           if(info->first == ControlPoint::EFF_INC){
1772                                                   const int maxValue = controlPointSpace[cpName].second;
1773                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1774                                                   if(twoAgoValue+1 <= maxValue){
1775                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1776                                                   }
1777                                           } else {
1778                                                   const int minValue = controlPointSpace[cpName].second;
1779                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1780                                                   if(twoAgoValue-1 >= minValue){
1781                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1782                                                   }
1783                                           }
1784
1785                                           possibleNextStepPlans.push_back(aNewPlan);
1786
1787                                   }
1788                           }
1789                   }
1790
1791                   // ========================================= Grain Size =============================================
1792                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1793                   {
1794                           double overheadTime = twoAgoPhase->overheadTime.avg;
1795                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1796                           fflush(stdout);
1797                           if(overheadTime > 0.10){
1798                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1799                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1800
1801                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1802
1803                                   bool found = false;
1804                                   std::string cpName;
1805                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1806                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1807                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1808                                           cpName = iter->first;
1809                                           info = &iter->second;
1810
1811                                           // Initialize a new plan based on two phases ago
1812                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1813
1814
1815
1816                                           CkPrintf("Steering found knob to turn\n");
1817                                           fflush(stdout);
1818
1819                                           if(info->first == ControlPoint::EFF_INC){
1820                                                   const int maxValue = controlPointSpace[cpName].second;
1821                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1822                                                   if(twoAgoValue+1 <= maxValue){
1823                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1824                                                   }
1825                                           } else {
1826                                                   const int minValue = controlPointSpace[cpName].second;
1827                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1828                                                   if(twoAgoValue-1 >= minValue){
1829                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1830                                                   }
1831                                           }
1832
1833                                           possibleNextStepPlans.push_back(aNewPlan);
1834
1835                                   }
1836
1837                           }
1838                   }
1839                   // ========================================= GPU Offload =============================================
1840                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1841                   {
1842                           double idleTime = twoAgoPhase->idleTime.avg;
1843                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1844                           fflush(stdout);
1845                           if(idleTime > 0.10){
1846                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1847                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1848
1849                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1850
1851                                   bool found = false;
1852                                   std::string cpName;
1853                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1854                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1855                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1856                                           cpName = iter->first;
1857                                           info = &iter->second;
1858
1859                                           // Initialize a new plan based on two phases ago
1860                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1861
1862
1863                                           CkPrintf("Steering found knob to turn\n");
1864                                           fflush(stdout);
1865
1866                                           if(info->first == ControlPoint::EFF_DEC){
1867                                                   const int maxValue = controlPointSpace[cpName].second;
1868                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1869                                                   if(twoAgoValue+1 <= maxValue){
1870                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1871                                                   }
1872                                           } else {
1873                                                   const int minValue = controlPointSpace[cpName].second;
1874                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1875                                                   if(twoAgoValue-1 >= minValue){
1876                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1877                                                   }
1878                                           }
1879
1880                                           possibleNextStepPlans.push_back(aNewPlan);
1881
1882                                   }
1883
1884                           }
1885                   }
1886
1887                   // ========================================= Done =============================================
1888
1889
1890                   if(possibleNextStepPlans.size() > 0){
1891                           newControlPoints = possibleNextStepPlans[0];
1892                   }
1893
1894
1895                   CkPrintf("Steering done for this phase\n");
1896                   fflush(stdout);
1897
1898           }  else {
1899                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1900                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1901                   fflush(stdout);
1902           }
1903
1904
1905
1906   } else if( whichTuningScheme == SimulatedAnnealing ) {
1907
1908     // -----------------------------------------------------------
1909     //  SIMULATED ANNEALING
1910     //  Simulated Annealing style hill climbing method
1911     //
1912     //  Find the best search space configuration, and try something
1913     //  nearby it, with a radius decreasing as phases increase
1914
1915     CkPrintf("Finding best phase\n");
1916     instrumentedPhase *bestPhase = allData.findBest();  
1917     CkPrintf("best found:\n"); 
1918     bestPhase->print(); 
1919     CkPrintf("\n"); 
1920     
1921
1922     const int convergeByPhase = 100;
1923     // Determine from 0.0 to 1.0 how far along we are
1924     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1925
1926     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1927     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1928       const std::string &name = cpsIter->first;
1929       const std::pair<int,int> &bounds = cpsIter->second;
1930       const int minValue = bounds.first;
1931       const int maxValue = bounds.second;
1932       
1933       const int before = bestPhase->controlPoints[name];   
1934   
1935       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1936
1937       int high = min(before+range, maxValue);
1938       int low = max(before-range, minValue);
1939       
1940       newControlPoints[name] = low;
1941       if(high-low > 0){
1942         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1943       } 
1944       
1945     }
1946
1947   } else if ( whichTuningScheme == DivideAndConquer ) {
1948
1949           // -----------------------------------------------------------
1950           //  STEERING FOR Divide & Conquer Programs
1951           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1952           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1953           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1954
1955           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1956           // plans are only generated after 3 phases
1957
1958           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1959           instrumentedPhase *prevPhase = previousPhaseData();
1960
1961           if(phase_id%4 == 0){
1962                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1963                   twoAgoPhase->print();
1964                   CkPrintf("\n");
1965                   fflush(stdout);
1966
1967                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1968
1969
1970                   // ========================================= Concurrency =============================================
1971                   // See if idle time is high:
1972                   {
1973                           double idleTime = twoAgoPhase->idleTime.avg;
1974                           double overheadTime = twoAgoPhase->overheadTime.avg;
1975
1976
1977                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1978                           fflush(stdout);
1979                           if(idleTime+overheadTime > 0.10){
1980                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1981                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1982
1983                                   int direction = -1;
1984                                   if (idleTime>overheadTime){
1985                                           // We need to decrease the grain size, or increase the available concurrency
1986                                           direction = 1;
1987                                   }
1988
1989                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1990
1991                                   bool found = false;
1992                                   std::string cpName;
1993                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1994                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1995                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1996                                           cpName = iter->first;
1997                                           info = &iter->second;
1998                                           
1999                                           // Initialize a new plan based on two phases ago
2000                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2001                                           bool newPlanExists = false;
2002
2003                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2004                                           fflush(stdout);
2005
2006
2007
2008                                           if(info->first == ControlPoint::EFF_INC){
2009                                                   const int minValue = controlPointSpace[cpName].first;
2010                                                   const int maxValue = controlPointSpace[cpName].second;
2011                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2012                                                   const int newVal = twoAgoValue+1*direction;
2013                                                   if(newVal <= maxValue && newVal >= minValue){
2014                                                     aNewPlan[cpName] = newVal;
2015                                                     newPlanExists = true;
2016                                                   } else {
2017                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2018                                                   }
2019                                           } else {
2020                                                   const int minValue = controlPointSpace[cpName].first;
2021                                                   const int maxValue = controlPointSpace[cpName].second;
2022                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2023                                                   const int newVal = twoAgoValue-1*direction;
2024                                                   if(newVal <= maxValue && newVal >= minValue){
2025                                                           aNewPlan[cpName] = newVal;
2026                                                           newPlanExists = true;
2027                                                   } else {
2028                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2029                                                   }
2030                                           }
2031
2032                                           if(newPlanExists){
2033                                             possibleNextStepPlans.push_back(aNewPlan);
2034                                           }
2035                                   }
2036                           }
2037                   }
2038
2039                   if(possibleNextStepPlans.size() > 0){
2040                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2041                     newControlPoints = possibleNextStepPlans[0];
2042                   } else {
2043                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2044                   }
2045           }
2046
2047   } else if( whichTuningScheme == Simplex ) {
2048
2049           // -----------------------------------------------------------
2050           //  Nelder Mead Simplex Algorithm
2051           //
2052           //  A scheme that takes a simplex (n+1 points) and moves it
2053           //  toward the minimum, eventually converging there.
2054
2055           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2056
2057   } else if( whichTuningScheme == ExhaustiveSearch ) {
2058     
2059     // -----------------------------------------------------------
2060     // EXHAUSTIVE SEARCH
2061    
2062     int numDimensions = controlPointSpace.size();
2063     CkAssert(numDimensions > 0);
2064   
2065     vector<int> lowerBounds(numDimensions);
2066     vector<int> upperBounds(numDimensions); 
2067   
2068     int d=0;
2069     std::map<std::string, pair<int,int> >::iterator iter;
2070     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2071       //    CkPrintf("Examining dimension %d\n", d);
2072       lowerBounds[d] = iter->second.first;
2073       upperBounds[d] = iter->second.second;
2074       d++;
2075     }
2076    
2077     // get names for each dimension (control point)
2078     vector<std::string> names(numDimensions);
2079     d=0;
2080     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2081       names[d] = niter->first;
2082       d++;
2083     }
2084   
2085   
2086     // Create the first possible configuration
2087     vector<int> config = lowerBounds;
2088     config.push_back(0);
2089   
2090     // Increment until finding an unused configuration
2091     allData.cleanupNames(); // put -1 values in for any control points missing
2092     std::vector<instrumentedPhase*> &phases = allData.phases;     
2093
2094     while(true){
2095     
2096       std::vector<instrumentedPhase*>::iterator piter; 
2097       bool testedConfiguration = false; 
2098       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2099       
2100         // Test if the configuration matches this phase
2101         bool match = true;
2102         for(int j=0;j<numDimensions;j++){
2103           match &= (*piter)->controlPoints[names[j]] == config[j];
2104         }
2105       
2106         if(match){
2107           testedConfiguration = true; 
2108           break;
2109         } 
2110       } 
2111     
2112       if(testedConfiguration == false){ 
2113         break;
2114       } 
2115     
2116       // go to next configuration
2117       config[0] ++;
2118       // Propagate "carrys"
2119       for(int i=0;i<numDimensions;i++){
2120         if(config[i] > upperBounds[i]){
2121           config[i+1]++;
2122           config[i] = lowerBounds[i];
2123         }
2124       }
2125       // check if we have exhausted all possible values
2126       if(config[numDimensions] > 0){
2127         break;
2128       }
2129        
2130     }
2131   
2132     if(config[numDimensions] > 0){
2133       for(int i=0;i<numDimensions;i++){ 
2134         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2135       }
2136     }
2137
2138     // results are now in config[i]
2139     
2140     for(int i=0; i<numDimensions; i++){
2141       newControlPoints[names[i]] = config[i];
2142       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2143     }
2144
2145
2146   } else {
2147     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2148   }
2149
2150
2151   const double endGenerateTime = CmiWallTimer();
2152   
2153   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2154   
2155 }
2156
2157
2158
2159
2160 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2161
2162
2163 /// Get control point value from range of integers [lb,ub]
2164 int controlPoint(const char *name, int lb, int ub){
2165   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2166   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2167   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2168   int result;
2169
2170   // if we already have control point values for phase, return them
2171   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2172     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2173     return thisPhaseData->controlPoints[std::string(name)];
2174   }
2175   
2176
2177   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2178     // For the first few phases, just use the lower bound, or the default if one was provided 
2179     // This ensures that the ranges for all the control points are known before we do anything fancy
2180     result = lb;
2181
2182     if(defaultControlPointValues.count(std::string(name)) > 0){
2183       int v = defaultControlPointValues[std::string(name)];
2184       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2185       result = v;
2186     }
2187
2188   } else if(controlPointSpace.count(std::string(name)) == 0){
2189     // if this is the first time we've seen the CP, then return the lower bound
2190     result = lb;
2191     
2192   }  else {
2193     // Generate a plan one time for each phase
2194     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2195     
2196     // Use the planned value:
2197     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2198     
2199   }
2200
2201   if(!isInRange(result,ub,lb)){
2202     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2203     fflush(stdout);
2204     fflush(stderr);
2205   }
2206   CkAssert(isInRange(result,ub,lb));
2207   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2208
2209   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2210
2211   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2212   //  thisPhaseData->print();
2213   
2214   return result;
2215 }
2216
2217
2218 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2219   CkAssert(sizeof(lb) == 4);
2220   CkAssert(CkMyPe() == 0);
2221   return controlPoint("FortranCP", *lb, *ub);
2222 }
2223
2224
2225
2226
2227 /// Inform the control point framework that a named control point affects the priorities of some array  
2228 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2229 //   CkGroupID aid = arraybase.ckGetArrayID();
2230 //   int groupIdx = aid.idx;
2231 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2232 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2233 // }
2234
2235
2236 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2237 // void controlPointPriorityEntry(const char *name, int idx){
2238 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2239 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2240 // }
2241
2242
2243
2244
2245
2246 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2247
2248     This function decomposes the algorithm into a state machine that allows it to
2249     evaluate one or more configurations through subsequent clls to this function.
2250     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2251
2252     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2253     so a list of them will be created and they will be evaluated, one per call.
2254
2255     Currently there is no stopping criteria, but the simplex ought to contract down
2256     to a few close configurations, and hence not much change will happen after this 
2257     point.
2258
2259  */
2260 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2261
2262         if(useBestKnown){
2263                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2264                 return;
2265         }
2266
2267
2268         if(firstSimplexPhase< 0){
2269                 firstSimplexPhase = allData.phases.size()-1;
2270                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2271         }
2272
2273         int n = controlPointSpace.size();
2274
2275         CkAssert(n>=2);
2276
2277
2278         if(simplexState == beginning){
2279                 // First we evaluate n+1 random points, then we go to a different state
2280                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2281                         CkPrintf("Simplex Tuning: chose random configuration\n");
2282
2283                         // Choose random values from the middle third of the range in each dimension
2284                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2285                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2286                                 const std::string &name = cpsIter->first;
2287                                 const std::pair<int,int> &bounds = cpsIter->second;
2288                                 const int lb = bounds.first;
2289                                 const int ub = bounds.second;
2290                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2291                         }
2292                 } else {
2293                         // Set initial simplex:
2294                         for(int i=0; i<n+1; i++){
2295                                 simplexIndices.insert(firstSimplexPhase+i);
2296                         }
2297                         CkAssert(simplexIndices.size() == n+1);
2298
2299                         // Transition to reflecting state
2300                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2301
2302                 }
2303
2304         } else if (simplexState == reflecting){
2305                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2306                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2307
2308                 // Find the highest time from other points in the simplex
2309                 double highestTimeForOthersInSimplex = 0.0;
2310                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2311                         double t = allData.phases[*iter]->medianTime();
2312                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2313                                 highestTimeForOthersInSimplex = t;
2314                         }
2315                 }
2316
2317                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2318
2319                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2320                         // if y* < yl,  transition to "expanding"
2321                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2322
2323                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2324                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2325                         CkAssert(simplexIndices.size() == n+1);
2326                         simplexIndices.erase(worstPhase);
2327                         CkAssert(simplexIndices.size() == n);
2328                         simplexIndices.insert(pPhase);
2329                         CkAssert(simplexIndices.size() == n+1);
2330                         // Transition to reflecting state
2331                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2332
2333                 } else {
2334                         // if y* > yh
2335                         if(recentPhaseTime <= worstTime){
2336                                 // replace Ph with P*
2337                                 CkAssert(simplexIndices.size() == n+1);
2338                                 simplexIndices.erase(worstPhase);
2339                                 CkAssert(simplexIndices.size() == n);
2340                                 simplexIndices.insert(pPhase);
2341                                 CkAssert(simplexIndices.size() == n+1);
2342                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2343                                 worstPhase = pPhase;
2344                                 // Just as a sanity check, make sure we don't use the non-updated values.
2345                                 worst.clear();
2346                         }
2347
2348                         // Now, form P** and do contracting phase
2349                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2350
2351                 }
2352
2353         } else if (simplexState == doneExpanding){
2354                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2355                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2356                 // A new configuration has been evaluated
2357
2358                 // Check to see if y** < y1
2359                 if(recentPhaseTime < bestTime){
2360                         // replace Ph by P**
2361                         CkAssert(simplexIndices.size() == n+1);
2362                         simplexIndices.erase(worstPhase);
2363                         CkAssert(simplexIndices.size() == n);
2364                         simplexIndices.insert(p2Phase);
2365                         CkAssert(simplexIndices.size() == n+1);
2366                 } else {
2367                         //      replace Ph by P*
2368                         CkAssert(simplexIndices.size() == n+1);
2369                         simplexIndices.erase(worstPhase);
2370                         CkAssert(simplexIndices.size() == n);
2371                         simplexIndices.insert(pPhase);
2372                         CkAssert(simplexIndices.size() == n+1);
2373                 }
2374
2375                 // Transition to reflecting state
2376                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2377
2378         }  else if (simplexState == contracting){
2379                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2380                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2381                 // A new configuration has been evaluated
2382
2383                 // Check to see if y** > yh
2384                 if(recentPhaseTime <= worstTime){
2385                         // replace Ph by P**
2386                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2387                         CkAssert(simplexIndices.size() == n+1);
2388                         simplexIndices.erase(worstPhase);
2389                         CkAssert(simplexIndices.size() == n);
2390                         simplexIndices.insert(p2Phase);
2391                         CkAssert(simplexIndices.size() == n+1);
2392                         // Transition to reflecting state
2393                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2394
2395                 } else {
2396                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2397                         simplexState = stillContracting;
2398
2399                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2400                         stillMustContractList = simplexIndices;
2401
2402                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2403                 }
2404
2405         } else if (simplexState == stillContracting){
2406                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2407
2408                 if(stillMustContractList.size()>0){
2409                         int c = *stillMustContractList.begin();
2410                         stillMustContractList.erase(c);
2411                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2412
2413                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2414
2415                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2416                         int v = 0;
2417                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2418                                 const std::string &name = cpsIter->first;
2419                                 const std::pair<int,int> &bounds = cpsIter->second;
2420                                 const int lb = bounds.first;
2421                                 const int ub = bounds.second;
2422
2423                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2424
2425                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2426                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2427                                 v++;
2428                         }
2429                 } else {
2430                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2431                         CkAssert(stillMustContractList.size() == 0);
2432                         simplexIndices.clear();
2433                         CkAssert(simplexIndices.size()==0);
2434                         for(int i=0; i<n+1; i++){
2435                                 simplexIndices.insert(allData.phases.size()-2-i);
2436                         }
2437                         CkAssert(simplexIndices.size()==n+1);
2438
2439                         // Transition to reflecting state
2440                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2441
2442                 }
2443
2444
2445         } else if (simplexState == expanding){
2446                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2447                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2448                 // A new configuration has been evaluated
2449
2450                 // determine if y** < yl
2451                 if(recentPhaseTime < bestTime){
2452                         // replace Ph by P**
2453                         CkAssert(simplexIndices.size() == n+1);
2454                         simplexIndices.erase(worstPhase);
2455                         CkAssert(simplexIndices.size() == n);
2456                         simplexIndices.insert(p2Phase);
2457                         CkAssert(simplexIndices.size() == n+1);
2458                         // Transition to reflecting state
2459                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2460
2461                 } else {
2462                         // else, replace ph with p*
2463                         CkAssert(simplexIndices.size() == n+1);
2464                         simplexIndices.erase(worstPhase);
2465                         CkAssert(simplexIndices.size() == n);
2466                         simplexIndices.insert(pPhase);
2467                         CkAssert(simplexIndices.size() == n+1);
2468                         // Transition to reflecting state
2469                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2470                 }
2471
2472
2473         } else {
2474                 CkAbort("Unknown simplexState");
2475         }
2476
2477 }
2478
2479
2480
2481 /** Replace the worst point with its reflection across the centroid. */
2482 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2483
2484         int n = controlPointSpace.size();
2485
2486         printSimplex(allData);
2487
2488         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2489
2490
2491         // Quit if the diameter of our simplex is small
2492         double maxr = 0.0;
2493         for(int i=0; i<n+1; i++){
2494                 //              Compute r^2 of this simplex point from the centroid
2495                 double r2 = 0.0;
2496                 std::vector<double> p = pointCoords(allData, i);
2497                 for(int d=0; d<p.size(); d++){
2498                         double r1 = (p[d] * centroid[d]);
2499                         r2 += r1*r1;
2500                 }
2501                 if(r2 > maxr)
2502                         maxr = r2;
2503         }
2504
2505 #if 0
2506         // At some point we quit this tuning
2507         if(maxr < 10){
2508                 useBestKnown = true;
2509                 instrumentedPhase *best = allData.findBest();
2510                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2511
2512                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2513                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2514                         const std::string &name = cpsIter->first;
2515                         newControlPoints[name] =  best->controlPoints[name];
2516                 }
2517         }
2518 #endif
2519
2520         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2521
2522         pPhase = allData.phases.size()-1;
2523         P.resize(n);
2524         for(int i=0; i<n; i++){
2525                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2526         }
2527
2528         for(int i=0; i<P.size(); i++){
2529                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2530         }
2531
2532
2533         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2534         int v = 0;
2535         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2536                 const std::string &name = cpsIter->first;
2537                 const std::pair<int,int> &bounds = cpsIter->second;
2538                 const int lb = bounds.first;
2539                 const int ub = bounds.second;
2540                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2541                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2542                 v++;
2543         }
2544
2545
2546         // Transition to "reflecting" state
2547         simplexState = reflecting;
2548         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2549
2550 }
2551
2552
2553
2554
2555 /** Replace the newly tested reflection with a further expanded version of itself. */
2556 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2557         int n = controlPointSpace.size();
2558         printSimplex(allData);
2559
2560         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2561         // I believe the equation for P** in the text on page 308 is correct.
2562
2563         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2564
2565
2566         p2Phase = allData.phases.size()-1;
2567         P2.resize(n);
2568         for(int i=0; i<n; i++){
2569                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2570         }
2571
2572         for(int i=0; i<P2.size(); i++){
2573                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2574         }
2575
2576
2577         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2578         int v = 0;
2579         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2580                 const std::string &name = cpsIter->first;
2581                 const std::pair<int,int> &bounds = cpsIter->second;
2582                 const int lb = bounds.first;
2583                 const int ub = bounds.second;
2584                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2585                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2586                 v++;
2587         }
2588
2589
2590         // Transition to "doneExpanding" state
2591         simplexState = doneExpanding;
2592         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2593
2594 }
2595
2596
2597
2598
2599 /** Replace the newly tested reflection with a further expanded version of itself. */
2600 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2601         int n = controlPointSpace.size();
2602         printSimplex(allData);
2603
2604         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2605
2606
2607         p2Phase = allData.phases.size()-1;
2608         P2.resize(n);
2609         for(int i=0; i<n; i++){
2610                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2611         }
2612
2613         for(int i=0; i<P2.size(); i++){
2614                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2615         }
2616
2617
2618         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2619         int v = 0;
2620         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2621                 const std::string &name = cpsIter->first;
2622                 const std::pair<int,int> &bounds = cpsIter->second;
2623                 const int lb = bounds.first;
2624                 const int ub = bounds.second;
2625                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2626                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2627                 v++;
2628         }
2629         
2630         
2631         // Transition to "contracting" state
2632         simplexState = contracting;
2633         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2634
2635 }
2636
2637
2638
2639
2640 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2641         int n = controlPointSpace.size();
2642
2643         // Find worst performing point in the simplex
2644         worstPhase = -1;
2645         worstTime = -1.0;
2646         bestPhase = 10000000;
2647         bestTime = 10000000;
2648         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2649                 double t = allData.phases[*iter]->medianTime();
2650                 if(t > worstTime){
2651                         worstTime = t;
2652                         worstPhase = *iter;
2653                 }
2654                 if(t < bestTime){
2655                         bestTime = t;
2656                         bestPhase = *iter;
2657                 }
2658         }
2659         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2660
2661         best = pointCoords(allData, bestPhase);
2662         CkAssert(best.size() == n);
2663
2664         worst = pointCoords(allData, worstPhase);
2665         CkAssert(worst.size() == n);
2666
2667         // Calculate centroid of the remaining points in the simplex
2668         centroid.resize(n);
2669         for(int i=0; i<n; i++){
2670                 centroid[i] = 0.0;
2671         }
2672
2673         int numPts = 0;
2674
2675         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2676                 if(*iter != worstPhase){
2677                         numPts ++;
2678                         // Accumulate into the result vector
2679                         int c = 0;
2680                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2681                                 centroid[c] += citer->second;
2682                                 c++;
2683                         }
2684
2685                 }
2686         }
2687
2688         // Now divide the sums by the number of points.
2689         for(int v = 0; v<centroid.size(); v++) {
2690                 centroid[v] /= (double)numPts;
2691         }
2692
2693         CkAssert(centroid.size() == n);
2694
2695         for(int i=0; i<centroid.size(); i++){
2696                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2697         }
2698
2699
2700 }
2701
2702
2703
2704 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2705         std::vector<double> result;
2706         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2707                 result.push_back((double)citer->second);
2708         }
2709         return result;
2710 }
2711
2712
2713
2714
2715 void ControlPointWriteOutputToDisk(){
2716         CkAssert(CkMyPe() == 0);
2717         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2718 }
2719
2720
2721
2722 /*! @} */
2723
2724 #ifdef CP_DISABLE_TRACING
2725 #include "ControlPointsNoTrace.def.h"
2726 #else
2727 #include "ControlPoints.def.h"
2728 #endif