Fixing a corner case where not enough timings are available to perform control point...
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15
16 #include <climits>
17 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
18  
19
20 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
21 /**  @{ */
22
23 using namespace std;
24
25 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
26
27
28 //#undef DEBUGPRINT
29 //#define DEBUGPRINT 4
30
31 static void periodicProcessControlPoints(void* ptr, double currWallTime);
32
33
34 // A pointer to this PE's controlpoint manager Proxy
35 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
36 /* readonly */ int random_seed;
37 /* readonly */ long controlPointSamplePeriod;
38 /* readonly */ int whichTuningScheme;
39 /* readonly */ bool writeDataFileAtShutdown;
40 /* readonly */ bool shouldFilterOutputData;
41 /* readonly */ bool loadDataFileAtStartup;
42 /* readonly */ bool shouldGatherMemoryUsage;
43 /* readonly */ bool shouldGatherUtilization;
44 /* readonly */ bool shouldGatherAll;
45 /* readonly */ char CPDataFilename[512];
46
47
48
49 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
50 /// These probably come from the command line arguments, so are available only on PE 0
51 std::map<std::string, int> defaultControlPointValues;
52
53
54
55 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic}  tuningScheme;
56
57
58
59 void printTuningScheme(){
60   switch(whichTuningScheme){
61   case RandomSelection:
62     CkPrintf("Tuning Scheme: RandomSelection\n");
63     break;
64   case AlwaysDefaults:
65     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
66     break;
67   case SimulatedAnnealing:
68     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
69     break;
70   case ExhaustiveSearch:
71     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
72     break;
73   case CriticalPathAutoPrioritization:
74     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
75     break;
76   case UseBestKnownTiming:
77     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
78     break;
79   case UseSteering:
80     CkPrintf("Tuning Scheme: UseSteering\n");
81     break;
82   case MemoryAware:
83     CkPrintf("Tuning Scheme: MemoryAware\n");
84     break;
85   case Simplex:
86     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
87     break;
88   case DivideAndConquer:
89     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
90     break;
91   case LDBPeriod:
92     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
93     break;
94   case LDBPeriodLinear:
95     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
96     break;
97   case LDBPeriodQuadratic:
98     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
99     break;
100   default:
101     CkPrintf("Unknown tuning scheme\n");
102     break;
103   }
104   fflush(stdout);
105 }
106
107
108
109 /// A reduction type that combines idle time measurements (min/sum/max etc.)
110 CkReduction::reducerType idleTimeReductionType;
111 /// A reducer that combines idle time measurements (min/sum/max etc.)
112 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
113   double ret[3];
114   if(nMsg > 0){
115     CkAssert(msgs[0]->getSize()==3*sizeof(double));
116     double *m=(double *)msgs[0]->getData();
117     ret[0]=m[0];
118     ret[1]=m[1];
119     ret[2]=m[2];
120   }
121   for (int i=1;i<nMsg;i++) {
122     CkAssert(msgs[i]->getSize()==3*sizeof(double));
123     double *m=(double *)msgs[i]->getData();
124     ret[0]=min(ret[0],m[0]);
125     ret[1]+=m[1];
126     ret[2]=max(ret[2],m[2]);
127   }  
128   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
129 }
130
131
132
133 /// A reduction type that combines idle, overhead, and memory measurements
134 CkReduction::reducerType allMeasuresReductionType;
135 /// A reducer that combines idle, overhead, and memory measurements
136 #define ALL_REDUCTION_SIZE 12
137 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
138   double ret[ALL_REDUCTION_SIZE];
139   if(nMsg > 0){
140     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
141     double *m=(double *)msgs[0]->getData();
142     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
143   }
144   for (int i=1;i<nMsg;i++) {
145     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
146     double *m=(double *)msgs[i]->getData();
147     // idle time (min/sum/max)
148     ret[0]=min(ret[0],m[0]);
149     ret[1]+=m[1];
150     ret[2]=max(ret[2],m[2]);
151     // overhead time (min/sum/max)
152     ret[3]=min(ret[3],m[3]);
153     ret[4]+=m[4];
154     ret[5]=max(ret[5],m[5]);
155     // mem usage (max)
156     ret[6]=max(ret[6],m[6]);
157     // bytes per invocation for two types of entry methods
158     ret[7]+=m[7];
159     ret[8]+=m[8];
160     ret[9]+=m[9];
161     ret[10]+=m[10];
162     // Grain size (avg)
163     ret[11]+=m[11];
164   }  
165   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
166 }
167
168
169 /// Registers the control point framework's reduction handlers at startup on each PE
170 /*initproc*/ void registerCPReductions(void) {
171   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
172   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
173 }
174
175
176
177
178
179
180 /// Return an integer between 0 and num-1 inclusive
181 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
182 unsigned int randInt(unsigned int num, const char* name, int seed=0){
183   CkAssert(num > 0);
184   CkAssert(num < 1000);
185
186   unsigned long hash = 0;
187   unsigned int c;
188   unsigned char * str = (unsigned char*)name;
189
190   while (c = *str++){
191     unsigned int c2 = (c+64)%128;
192     unsigned int c3 = (c2*5953)%127;
193     hash = c3 + (hash << 6) + (hash << 16) - hash;
194   }
195
196   unsigned long shuffled1 = (hash*2083)%7907;
197   unsigned long shuffled2 = (seed*4297)%2017;
198
199   unsigned long shuffled3 = (random_seed*4799)%7919;
200
201   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
202
203   unsigned int result = ((namehash * 6029) % 1117) % num;
204
205   CkAssert(result >=0 && result < num);
206   return result;
207 }
208
209
210
211 controlPointManager::controlPointManager() {
212   generatedPlanForStep = -1;
213
214     exitWhenReady = false;
215     alreadyRequestedMemoryUsage = false;   
216     alreadyRequestedIdleTime = false;
217     alreadyRequestedAll = false;
218     
219     instrumentedPhase * newPhase = new instrumentedPhase();
220     allData.phases.push_back(newPhase);   
221     
222     frameworkShouldAdvancePhase = false;
223     haveControlPointChangeCallback = false;
224 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
225     
226     ControlPoint::initControlPointEffects();
227
228     phase_id = 0;
229
230     if(loadDataFileAtStartup){    
231       loadDataFile();
232     }
233
234     
235     if(CkMyPe() == 0){
236       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
237     }
238
239     traceRegisterUserEvent("No currently executing message", 5000);
240     traceRegisterUserEvent("Zero time along critical path", 5010);
241     traceRegisterUserEvent("Positive total time along critical path", 5020);
242     traceRegisterUserEvent("env->setPathHistory()", 6000);
243     traceRegisterUserEvent("Critical Path", 5900);
244     traceRegisterUserEvent("Table Entry", 5901);
245
246
247 #if USER_EVENT_FOR_REGISTERTERMINALPATH
248     traceRegisterUserEvent("registerTerminalPath", 100); 
249 #endif
250
251   }
252   
253
254  controlPointManager::~controlPointManager(){
255 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
256   }
257
258
259   /// Loads the previous run data file
260   void controlPointManager::loadDataFile(){
261     ifstream infile(CPDataFilename);
262     vector<std::string> names;
263     std::string line;
264   
265     while(getline(infile,line)){
266       if(line[0] != '#')
267         break;
268     }
269   
270     int numTimings = 0;
271     std::istringstream n(line);
272     n >> numTimings;
273   
274     while(getline(infile,line)){ 
275       if(line[0] != '#') 
276         break; 
277     }
278
279     int numControlPointNames = 0;
280     std::istringstream n2(line);
281     n2 >> numControlPointNames;
282   
283     for(int i=0; i<numControlPointNames; i++){
284       getline(infile,line);
285       names.push_back(line);
286     }
287
288     for(int i=0;i<numTimings;i++){
289       getline(infile,line);
290       while(line[0] == '#')
291         getline(infile,line); 
292
293       instrumentedPhase * ips = new instrumentedPhase();
294
295       std::istringstream iss(line);
296
297       // Read memory usage for phase
298       iss >> ips->memoryUsageMB;
299       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
300
301       // Read idle time
302       iss >> ips->idleTime.min;
303       iss >> ips->idleTime.avg;
304       iss >> ips->idleTime.max;
305
306       // Read overhead time
307       iss >> ips->overheadTime.min;
308       iss >> ips->overheadTime.avg;
309       iss >> ips->overheadTime.max;
310
311       // read bytePerInvoke
312       iss >> ips->bytesPerInvoke;
313
314       // read grain size
315       iss >> ips->grainSize;
316
317       // Read control point values
318       for(int cp=0;cp<numControlPointNames;cp++){
319         int cpvalue;
320         iss >> cpvalue;
321         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
322       }
323
324       // ignore median time
325       double mt;
326       iss >> mt;
327
328       // Read all times
329
330       double time;
331
332       while(iss >> time){
333         ips->times.push_back(time);
334 #if DEBUGPRINT > 5
335         CkPrintf("read time %lf from file\n", time);
336 #endif
337       }
338
339       allData.phases.push_back(ips);
340
341     }
342
343     infile.close();
344   }
345
346
347
348   /// Add the current data to allData and output it to a file
349   void controlPointManager::writeDataFile(){
350     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
351     ofstream outfile(CPDataFilename);
352     allData.cleanupNames();
353
354 //    string s = allData.toString();
355 //    CkPrintf("At end: \n %s\n", s.c_str());
356
357     if(shouldFilterOutputData){
358       allData.verify();
359       allData.filterOutIncompletePhases();
360     }
361
362 //    string s2 = allData.toString();
363 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
364     if(allData.toString().length() > 10){
365       outfile << allData.toString();
366     } else {
367       outfile << " No data available to save to disk " << endl;
368     }
369     outfile.close();
370   }
371
372   /// User can register a callback that is called when application should advance to next phase
373   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
374     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
375     controlPointChangeCallback = cb;
376     haveControlPointChangeCallback = true;
377   }
378
379
380 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
381 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
382   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
383 }
384
385   /// Called periodically by the runtime to handle the control points
386   /// Currently called on each PE
387   void controlPointManager::processControlPoints(){
388
389     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
390
391
392     //==========================================================================================
393     // Print the data for each phase
394
395     const int s = allData.phases.size();
396
397 #if DEBUGPRINT
398     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
399     for(int p=0;p<s;++p){
400       const instrumentedPhase &phase = allData.phases[p];
401       const idleTimeContainer &idle = phase.idleTime;
402       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
403       vector<double> const &times = phase.times;
404
405       CkPrintf("Phase %d:\n", p);
406       idle.print();
407      //   CkPrintf("critical paths: (* affected by control point)\n");
408         //   for(int i=0;i<criticalPaths.size(); i++){
409         // If affected by a control point
410         //      criticalPaths[i].print();
411
412       //        criticalPaths[i].print();
413       //        CkPrintf("\n");
414         
415
416       //   }
417       CkPrintf("Timings:\n");
418       for(int i=0;i<times.size(); i++){
419         CkPrintf("%lf ", times[i]);
420       }
421       CkPrintf("\n");
422
423     }
424     
425     CkPrintf("\n\n");
426
427
428 #endif
429
430
431
432     //==========================================================================================
433     // If this is a phase during which we try to adapt control point values based on critical path
434 #if 0
435     if( s%5 == 4) {
436
437       // Find the most recent phase with valid critical path data and idle time measurements      
438       int whichPhase=-1;
439       for(int p=0; p<s; ++p){
440         const instrumentedPhase &phase = allData.phases[p];
441         const idleTimeContainer &idle = phase.idleTime;
442         if(idle.isValid() && phase.criticalPaths.size()>0 ){
443           whichPhase = p;
444         }
445       }
446       
447       
448       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
449       const instrumentedPhase &phase = allData.phases[whichPhase];
450       const idleTimeContainer &idle = phase.idleTime;
451       
452       if(idle.min > 0.1){
453         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
454         
455         // Determine the median critical path for this phase
456         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
457         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
458         CkAssert(phase.criticalPaths.size() > 0);
459         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
460         
461         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
462         
463         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
464           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
465
466
467           CkPrintf("Finding control points related to the critical path\n");
468           int cpcount = 0;
469           std::set<std::string> controlPointsAffectingCriticalPath;
470
471           
472           for(int e=0;e<path.getNumUsed();e++){
473             if(path.getUsedCount(e)>0){
474               int ep = path.getUsedEp(e);
475
476               std::map<std::string, std::set<int> >::iterator iter;
477               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
478                 if(iter->second.count(ep)>0){
479                   controlPointsAffectingCriticalPath.insert(iter->first);
480                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
481                   cpcount++;
482                 }
483               }
484               
485             }
486           }
487           
488
489           if(cpcount==0){
490             CkPrintf("No control points are known to affect the critical path\n");
491           } else {
492             double beginTime = CmiWallTimer();
493
494             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
495             
496             newControlPoints = phase.controlPoints;
497             
498             if(frameworkShouldAdvancePhase){
499               gotoNextPhase();  
500             }
501             
502             if(haveControlPointChangeCallback){ 
503 #if DEBUGPRINT
504               CkPrintf("Calling control point change callback\n");
505 #endif
506               // Create a high priority message and send it to the callback
507               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
508               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
509               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
510               controlPointChangeCallback.send(msg);
511               
512             }
513             
514             
515             // adjust the control points that can affect the critical path
516
517             char textDescription[4096*2];
518             textDescription[0] = '\0';
519
520             std::map<std::string,int>::iterator newCP;
521             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
522               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
523                 // decrease the value (increase priority) if within range
524                 int lowerbound = controlPointSpace[newCP->first].first;
525                 if(newCP->second > lowerbound){
526                   newControlPointsAvailable = true;
527                   newCP->second --;
528                 }
529               }
530             }
531            
532             // Create a string for a projections user event
533             if(1){
534               std::map<std::string,int>::iterator newCP;
535               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
536                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
537               }
538             }
539
540             char *userEventString = new char[4096*2];
541             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
542             
543             static int userEventCounter = 20000;
544             CkPrintf("USER EVENT: %s\n", userEventString);
545             
546             traceRegisterUserEvent(userEventString, userEventCounter); 
547             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
548             userEventCounter++;
549             
550           }
551           
552         } else {
553           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
554         }
555         
556         
557       }
558       
559     } else {
560       
561       
562     }
563    
564 #endif
565
566
567
568     if(frameworkShouldAdvancePhase){
569       gotoNextPhase();  
570     }
571     
572     if(haveControlPointChangeCallback){ 
573       // Create a high priority message and send it to the callback
574       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
575       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
576       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
577       controlPointChangeCallback.send(msg);
578     }
579     
580   }
581   
582   /// Determine if any control point is known to affect an entry method
583   bool controlPointManager::controlPointAffectsThisEP(int ep){
584     std::map<std::string, std::set<int> >::iterator iter;
585     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
586       if(iter->second.count(ep)>0){
587         return true;
588       }
589     }
590     return false;    
591   }
592   
593   /// Determine if any control point is known to affect a chare array  
594   bool controlPointManager::controlPointAffectsThisArray(int array){
595     std::map<std::string, std::set<int> >::iterator iter;
596     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
597       if(iter->second.count(array)>0){
598         return true;
599       }
600     }
601     return false;   
602   }
603   
604
605   /// The data from the current phase
606   instrumentedPhase * controlPointManager::currentPhaseData(){
607     int s = allData.phases.size();
608     CkAssert(s>=1);
609     return allData.phases[s-1];
610   }
611  
612
613   /// The data from the previous phase
614   instrumentedPhase * controlPointManager::previousPhaseData(){
615     int s = allData.phases.size();
616     if(s >= 2 && phase_id > 0) {
617       return allData.phases[s-2];
618     } else {
619       return NULL;
620     }
621   }
622  
623   /// The data from two phases back
624   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
625     int s = allData.phases.size();
626     if(s >= 3 && phase_id > 0) {
627       return allData.phases[s-3];
628     } else {
629       return NULL;
630     }
631   }
632   
633
634   /// Called by either the application or the Control Point Framework to advance to the next phase  
635   void controlPointManager::gotoNextPhase(){
636     
637 #ifndef CP_DISABLE_TRACING
638     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
639     fflush(stdout);
640
641     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
642       alreadyRequestedAll = true;
643       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
644       CkPrintf("Requesting all measurements\n");
645       thisProxy.requestAll(*cb);
646       delete cb;
647     
648     } else {
649       
650       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
651         alreadyRequestedMemoryUsage = true;
652         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
653         thisProxy.requestMemoryUsage(*cb);
654         delete cb;
655       }
656       
657       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
658         alreadyRequestedIdleTime = true;
659         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
660         thisProxy.requestIdleTime(*cb);
661         delete cb;
662       }
663     }
664     
665
666 #endif
667
668
669
670 #if CMK_LBDB_ON && 0
671     LBDatabase * myLBdatabase = LBDatabaseObj();
672     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
673     const CkVec<LBObj*> objs = myLBDB->getObjs();
674     const int objCount = myLBDB->getObjCount();
675     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
676     
677     floatType maxObjWallTime = -1.0;
678     
679     for(int i=0;i<objs.length();i++){
680       LBObj* o = objs[i];
681       const LDObjData d = o->ObjData();
682       floatType cpuTime = d.cpuTime;
683       floatType wallTime = d.wallTime;
684       // can also get object handles from the LDObjData struct
685       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
686       if(wallTime > maxObjWallTime){
687
688       }
689       
690     }
691
692     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
693     
694 #endif    
695
696
697     
698     // increment phase id
699     phase_id++;
700     
701
702     // Create new entry for the phase we are starting now
703     instrumentedPhase * newPhase = new instrumentedPhase();
704     allData.phases.push_back(newPhase);
705     
706     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
707
708   }
709
710   /// An application uses this to register an instrumented timing for this phase
711   void controlPointManager::setTiming(double time){
712     currentPhaseData()->times.push_back(time);
713
714 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
715        
716     // First we should register this currently executing message as a path, because it is likely an important one to consider.
717     //    registerTerminalEntryMethod();
718     
719     // save the critical path for this phase
720     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
721     //    maxTerminalPathHistory.reset();
722     
723     
724     // Reset the counts for the currently executing message
725     //resetThisEntryPath();
726         
727 #endif
728     
729   }
730   
731   /// Entry method called on all PEs to request CPU utilization statistics
732   void controlPointManager::requestIdleTime(CkCallback cb){
733 #ifndef CP_DISABLE_TRACING
734     double i = localControlPointTracingInstance()->idleRatio();
735     double idle[3];
736     idle[0] = i;
737     idle[1] = i;
738     idle[2] = i;
739     
740     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
741     
742     localControlPointTracingInstance()->resetTimings();
743
744     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
745 #else
746     CkAbort("Should not get here\n");
747 #endif
748   }
749   
750   /// All processors reduce their memory usages in requestIdleTime() to this method
751   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
752 #ifndef CP_DISABLE_TRACING
753     int size=msg->getSize() / sizeof(double);
754     CkAssert(size==3);
755     double *r=(double *) msg->getData();
756         
757     instrumentedPhase* prevPhase = previousPhaseData();
758     if(prevPhase != NULL){
759       prevPhase->idleTime.min = r[0];
760       prevPhase->idleTime.avg = r[1]/CkNumPes();
761       prevPhase->idleTime.max = r[2];
762       prevPhase->idleTime.print();
763       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
764     } else {
765       CkPrintf("There is no previous phase to store the idle time measurements\n");
766     }
767     
768     alreadyRequestedIdleTime = false;
769     checkForShutdown();
770     delete msg;
771 #else
772     CkAbort("Should not get here\n");
773 #endif
774   }
775
776
777
778
779
780
781   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
782   void controlPointManager::requestAll(CkCallback cb){
783 #ifndef CP_DISABLE_TRACING
784     TraceControlPoints *t = localControlPointTracingInstance();
785
786     double data[ALL_REDUCTION_SIZE];
787
788     double *idle = data;
789     double *over = data+3;
790     double *mem = data+6;
791     double *msgBytes = data+7;  
792     double *grainsize = data+11;  
793
794     const double i = t->idleRatio();
795     idle[0] = i;
796     idle[1] = i;
797     idle[2] = i;
798
799     const double o = t->overheadRatio();
800     over[0] = o;
801     over[1] = o;
802     over[2] = o;
803
804     const double m = t->memoryUsageMB();
805     mem[0] = m;
806
807     msgBytes[0] = t->b2;
808     msgBytes[1] = t->b3;
809     msgBytes[2] = t->b2mlen;
810     msgBytes[3] = t->b3mlen;
811
812     grainsize[0] = t->grainSize();
813     
814     localControlPointTracingInstance()->resetAll();
815
816     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
817 #else
818     CkAbort("Should not get here\n");
819 #endif
820   }
821   
822   /// All processors reduce their memory usages in requestIdleTime() to this method
823   void controlPointManager::gatherAll(CkReductionMsg *msg){
824 #ifndef CP_DISABLE_TRACING
825     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
826     int size=msg->getSize() / sizeof(double);
827     double *data=(double *) msg->getData();
828         
829     double *idle = data;
830     double *over = data+3;
831     double *mem = data+6;
832     double *msgBytes = data+7;
833     double *granularity = data+11;
834
835
836     //    std::string b = allData.toString();
837
838     instrumentedPhase* prevPhase = previousPhaseData();
839     if(prevPhase != NULL){
840       prevPhase->idleTime.min = idle[0];
841       prevPhase->idleTime.avg = idle[1]/CkNumPes();
842       prevPhase->idleTime.max = idle[2];
843       
844       prevPhase->overheadTime.min = over[0];
845       prevPhase->overheadTime.avg = over[1]/CkNumPes();
846       prevPhase->overheadTime.max = over[2];
847       
848       prevPhase->memoryUsageMB = mem[0];
849       
850       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
851
852       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
853       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
854
855       /** The average of the grain sizes on all PEs in us */
856       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
857
858       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
859       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
860
861       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
862       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
863
864       if(bytesPerInvoke2 > bytesPerInvoke3)
865         prevPhase->bytesPerInvoke = bytesPerInvoke2;
866       else
867         prevPhase->bytesPerInvoke = bytesPerInvoke3;
868
869       //      prevPhase->print();
870       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
871
872       //      std::string a = allData.toString();
873
874       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
875       
876     } else {
877       CkPrintf("There is no previous phase to store measurements\n");
878     }
879     
880     alreadyRequestedAll = false;
881     checkForShutdown();
882     delete msg;
883 #else
884     CkAbort("Should not get here\n");
885 #endif
886   }
887
888
889
890
891   void controlPointManager::checkForShutdown(){
892     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
893       doExitNow();
894     }
895   }
896
897
898   void controlPointManager::exitIfReady(){
899      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
900        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
901        doExitNow();
902      } else {
903        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
904        exitWhenReady = true;
905      }
906   }
907
908
909
910   void controlPointManager::doExitNow(){
911           writeOutputToDisk();
912           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
913           CkExit();
914   }
915
916   void controlPointManager::writeOutputToDisk(){
917           if(writeDataFileAtShutdown){
918                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
919           }
920   }
921
922
923   /// Entry method called on all PEs to request memory usage
924   void controlPointManager::requestMemoryUsage(CkCallback cb){
925     int m = CmiMaxMemoryUsage() / 1024 / 1024;
926     CmiResetMaxMemory();
927     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
928     contribute(sizeof(int),&m,CkReduction::max_int, cb);
929   }
930
931   /// All processors reduce their memory usages to this method
932   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
933     int size=msg->getSize() / sizeof(int);
934     CkAssert(size==1);
935     int *m=(int *) msg->getData();
936
937     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
938     
939     instrumentedPhase* prevPhase = previousPhaseData();
940     if(prevPhase != NULL){
941       prevPhase->memoryUsageMB = m[0];
942     } else {
943       CkPrintf("No place to store memory usage");
944     }
945
946     alreadyRequestedMemoryUsage = false;
947     checkForShutdown();
948     delete msg;
949   }
950
951
952 //   /// Inform the control point framework that a named control point affects the priorities of some array  
953 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
954 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
955     
956 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
957 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
958 //     } else {
959 //       std::set<int> s;
960 //       s.insert(groupIdx);
961 //       affectsPrioritiesArray[std::string(name)] = s;
962 //     }
963     
964 // #if DEBUGPRINT   
965 //     std::map<std::string, std::set<int> >::iterator f;
966 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
967 //       std::string name = f->first;
968 //       std::set<int> &vals = f->second;
969 //       cout << "Control point " << name << " affects arrays: ";
970 //       std::set<int>::iterator i;
971 //       for(i=vals.begin(); i!=vals.end();++i){
972 //      cout << *i << " ";
973 //       }
974 //       cout << endl;
975 //     }
976 // #endif
977     
978 //   }
979   
980 //   /// Inform the control point framework that a named control point affects the priority of some entry method
981 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
982 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
983
984 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
985 //       affectsPrioritiesEP[std::string(name)].insert(idx);
986 //     } else {
987 //       std::set<int> s;
988 //       s.insert(idx);
989 //       affectsPrioritiesEP[std::string(name)] = s;
990 //     }
991     
992 // #if DEBUGPRINT
993 //     std::map<std::string, std::set<int> >::iterator f;
994 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
995 //       std::string name = f->first;
996 //       std::set<int> &vals = f->second;
997 //       cout << "Control point " << name << " affects EP: ";
998 //       std::set<int>::iterator i;
999 //       for(i=vals.begin(); i!=vals.end();++i){
1000 //      cout << *i << " ";
1001 //       }
1002 //       cout << endl;
1003 //     }
1004 // #endif
1005
1006
1007 //   }
1008   
1009
1010
1011 /// An interface callable by the application.
1012 void gotoNextPhase(){
1013   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1014 }
1015
1016 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1017 {
1018   gotoNextPhase();
1019 }
1020
1021
1022 /// A mainchare that is used just to create our controlPointManager group at startup
1023 class controlPointMain : public CBase_controlPointMain {
1024 public:
1025   controlPointMain(CkArgMsg* args){
1026 #if OLDRANDSEED
1027     struct timeval tp;
1028     gettimeofday(& tp, NULL);
1029     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1030 #else
1031     double time = CmiWallTimer();
1032     int sec = (int)time;
1033     int usec = (int)((time - (double)sec)*1000000.0);
1034     random_seed =  sec ^ usec;
1035 #endif
1036     
1037     
1038     double period;
1039     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1040     if(haveSamplePeriod){
1041       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1042       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1043     } else {
1044       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1045     }
1046     
1047     
1048     
1049     whichTuningScheme = RandomSelection;
1050
1051
1052     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1053       whichTuningScheme = RandomSelection;
1054     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1055       whichTuningScheme = ExhaustiveSearch;
1056     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1057       whichTuningScheme = AlwaysDefaults;
1058     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1059       whichTuningScheme = SimulatedAnnealing;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1061       whichTuningScheme = CriticalPathAutoPrioritization;
1062     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1063       whichTuningScheme = UseBestKnownTiming;
1064     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1065       whichTuningScheme = UseSteering;
1066     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1067       whichTuningScheme = MemoryAware;
1068     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1069       whichTuningScheme = Simplex;
1070     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1071       whichTuningScheme = DivideAndConquer;
1072     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period") ){
1073       whichTuningScheme = LDBPeriod;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1075       whichTuningScheme = LDBPeriodLinear;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1077       whichTuningScheme = LDBPeriodQuadratic;
1078     }
1079
1080     char *defValStr = NULL;
1081     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1082       CkPrintf("You specified default value string: %s\n", defValStr);
1083       
1084       // Parse the string, looking for commas
1085      
1086
1087       char *tok = strtok(defValStr, ",");
1088       while (tok) {
1089         // split on the equal sign
1090         int len = strlen(tok);
1091         char *eqsign = strchr(tok, '=');
1092         if(eqsign != NULL && eqsign != tok){
1093           *eqsign = '\0';
1094           char *cpname = tok;
1095           std::string cpName(tok);
1096           char *cpDefVal = eqsign+1;      
1097           int v=-1;
1098           if(sscanf(cpDefVal, "%d", &v) == 1){
1099             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1100             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1101             defaultControlPointValues[cpName] = v;
1102           }
1103         }
1104         tok = strtok(NULL, ",");
1105       }
1106
1107     }
1108
1109     shouldGatherAll = false;
1110     shouldGatherMemoryUsage = false;
1111     shouldGatherUtilization = false;
1112     
1113     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1114       shouldGatherAll = true;
1115     } else {
1116       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1117         shouldGatherMemoryUsage = true;
1118       }
1119       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1120         shouldGatherUtilization = true;
1121       }
1122     }
1123     
1124     writeDataFileAtShutdown = false;   
1125     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1126       writeDataFileAtShutdown = true;
1127     }
1128
1129     shouldFilterOutputData = true;
1130     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1131       shouldFilterOutputData = false;
1132     }
1133
1134
1135    loadDataFileAtStartup = false;   
1136     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1137       loadDataFileAtStartup = true;
1138     }
1139
1140     char *cpdatafile;
1141     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1142       sprintf(CPDataFilename, "%s", cpdatafile);
1143     } else {
1144       sprintf(CPDataFilename, "controlPointData.txt");
1145     }
1146
1147
1148     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1149   }
1150   ~controlPointMain(){}
1151 };
1152
1153 /// An interface callable by the application.
1154 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1155   CkAssert(CkMyPe() == 0);
1156   CkPrintf("Application has registered a control point change callback\n");
1157   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1158 }
1159
1160 /// An interface callable by the application.
1161 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1162   if(CkMyPe() == 0){
1163     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1164     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1165   }
1166 }
1167
1168 /// An interface callable by the application.
1169 void registerControlPointTiming(double time){
1170   CkAssert(CkMyPe() == 0);
1171 #if DEBUGPRINT>0
1172   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1173 #endif
1174   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1175 }
1176
1177 /// An interface callable by the application.
1178 void controlPointTimingStamp() {
1179   CkAssert(CkMyPe() == 0);
1180 #if DEBUGPRINT>0
1181   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1182 #endif
1183   
1184   static double prev_time = 0.0;
1185   double t = CmiWallTimer();
1186   double duration = t - prev_time;
1187   prev_time = t;
1188     
1189   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1190 }
1191
1192 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1193 {
1194   controlPointTimingStamp();
1195 }
1196
1197
1198
1199
1200
1201 /// Shutdown the control point framework, writing data to disk if necessary
1202 extern "C" void controlPointShutdown(){
1203   if(CkMyPe() == 0){
1204
1205     // wait for gathering of idle time & memory usage to complete
1206     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1207
1208   }
1209 }
1210
1211 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1212 void controlPointInitNode(){
1213 //  CkPrintf("controlPointInitNode()\n");
1214   registerExitFn(controlPointShutdown);
1215 }
1216
1217 /// Called periodically to allow control point framework to do things periodically
1218 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1219 #ifdef DEBUGPRINT
1220   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1221 #endif
1222   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1223   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1224 }
1225
1226
1227
1228
1229
1230 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1231 /// user observed characteristic to adapt specific control point values.
1232 /// This function must return valid values for newControlPoints.
1233 void controlPointManager::generatePlan() {
1234   const double startGenerateTime = CmiWallTimer();
1235   const int phase_id = this->phase_id;
1236   const int effective_phase = allData.phases.size();
1237
1238   // Only generate a plan once per phase
1239   if(generatedPlanForStep == phase_id) 
1240     return;
1241   generatedPlanForStep = phase_id;
1242  
1243   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1244   printTuningScheme();
1245
1246   // By default lets put the previous phase data into newControlPoints
1247   instrumentedPhase *prevPhase = previousPhaseData();
1248   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1249           newControlPoints[cpsIter->first] = cpsIter->second;
1250   }
1251
1252
1253   if( whichTuningScheme == RandomSelection ){
1254     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1255     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1256       const std::string &name = cpsIter->first;
1257       const std::pair<int,int> &bounds = cpsIter->second;
1258       const int lb = bounds.first;
1259       const int ub = bounds.second;
1260       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1261     }
1262   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1263     // -----------------------------------------------------------
1264     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1265     //   
1266     // This scheme will return the median value for the range 
1267     // early on, and then will transition over to the new control points
1268     // determined by the critical path adapting code
1269
1270     // This won't work until the periodic function is fixed up a bit
1271
1272     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1273     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1274       const std::string &name = cpsIter->first;
1275       const std::pair<int,int> &bounds = cpsIter->second;
1276       const int lb = bounds.first;
1277       const int ub = bounds.second;
1278       newControlPoints[name] =  (lb+ub)/2;
1279     }
1280
1281   } else if ( whichTuningScheme == MemoryAware ) {
1282
1283     // -----------------------------------------------------------
1284     //  STEERING BASED ON MEMORY USAGE
1285
1286     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1287     instrumentedPhase *prevPhase = previousPhaseData();
1288  
1289     if(phase_id%2 == 0){
1290       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1291       twoAgoPhase->print();
1292       CkPrintf("\n");
1293       fflush(stdout);
1294       
1295       // See if memory usage is low:
1296       double memUsage = twoAgoPhase->memoryUsageMB;
1297       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1298       fflush(stdout);
1299       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1300         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1301         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1302         
1303         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1304         newControlPoints = twoAgoPhase->controlPoints;
1305
1306
1307         CkPrintf("Steering (memory based) initialized plan\n");
1308         fflush(stdout);
1309
1310         // look for a possible control point knob to turn
1311         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1312         
1313         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1314         bool found = false;
1315         std::string cpName;
1316         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1317         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1318         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1319           cpName = iter->first;
1320           info = &iter->second;
1321           found = true;
1322           break;
1323         }
1324
1325         // Adapt the control point value
1326         if(found){
1327           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1328           fflush(stdout);
1329           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1330           const int maxValue = controlPointSpace[cpName].second;
1331           
1332           if(twoAgoValue+1 <= maxValue){
1333             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1334           }
1335         }
1336         
1337       }
1338     }
1339
1340     CkPrintf("Steering (memory based) done for this phase\n");
1341     fflush(stdout);
1342
1343   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1344
1345     // -----------------------------------------------------------
1346     //  USE BEST KNOWN TIME  
1347     static bool firstTime = true;
1348     if(firstTime){
1349       firstTime = false;
1350       instrumentedPhase *best = allData.findBest(); 
1351       CkPrintf("Best known phase is:\n");
1352       best->print();
1353       CkPrintf("\n");
1354       
1355       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1356       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1357         const std::string &name = cpsIter->first;
1358         newControlPoints[name] =  best->controlPoints[name];
1359       }
1360     }
1361   } else if( whichTuningScheme == LDBPeriod) {
1362     // Assume this is used in this manner:
1363     //  1) go to next phase
1364     //  2) request control point
1365     //  3) load balancing
1366     //  4) computation
1367     
1368     
1369     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1370     instrumentedPhase *prevPhase = previousPhaseData();
1371     
1372     
1373     const std::vector<double> &times = twoAgoPhase->times;
1374     const int oldNumTimings = times.size();
1375
1376
1377     const std::vector<double> &timesNew = prevPhase->times;
1378     const int newNumTimings = timesNew.size();
1379
1380
1381     if(oldNumTimings > 4 && newNumTimings > 4){
1382       
1383       // Build model of execution time based on two phases ago
1384       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1385       
1386       double oldSum = 0;
1387       
1388       for(int i=2; i<oldNumTimings; i++){
1389         oldSum += times[i];
1390       }
1391       
1392       const double oldAvg = oldSum / (oldNumTimings-2);
1393       
1394       
1395       
1396       
1397       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1398       const double expectedTotalTime = oldAvg * newNumTimings;
1399       
1400       
1401       // Measure actual time
1402       double newSum = 0.0;
1403       for(int i=2; i<newNumTimings; ++i){
1404         newSum += timesNew[i];
1405       }
1406       
1407       const double newAvg = newSum / (newNumTimings-2);
1408       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1409       
1410       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1411       
1412       // Determine load balance cost
1413       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1414       
1415       const double benefitAfterLB = benefit - lbcost;
1416     
1417     
1418       // Determine whether LB cost outweights the estimated benefit
1419       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1420     
1421     
1422       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1423       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1424         const std::string &name = cpsIter->first;
1425         const std::pair<int,int> &bounds = cpsIter->second;
1426         const int lb = bounds.first;
1427         const int ub = bounds.second;
1428       
1429         if(benefitAfterLB > 0){
1430           CkPrintf("Constant Model: Beneficial LB\n");
1431           int newval = newControlPoints[name] / 2;
1432           if(newval > lb)
1433             newControlPoints[name] = newval;
1434           else 
1435             newControlPoints[name] = lb;
1436         } else {
1437           CkPrintf("Constant Model: Detrimental LB\n");
1438           int newval = newControlPoints[name] * 2;
1439           if(newval < ub)
1440             newControlPoints[name] = newval;
1441           else
1442             newControlPoints[name] = ub;
1443         }
1444       }
1445     }
1446     
1447     
1448   }  else if( whichTuningScheme == LDBPeriodLinear) {
1449     // Assume this is used in this manner:
1450     //  1) go to next phase
1451     //  2) request control point
1452     //  3) load balancing
1453     //  4) computation
1454
1455
1456     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1457     instrumentedPhase *prevPhase = previousPhaseData();
1458     
1459     const std::vector<double> &times = twoAgoPhase->times;
1460     const int oldNumTimings = times.size();
1461
1462     const std::vector<double> &timesNew = prevPhase->times;
1463     const int newNumTimings = timesNew.size();
1464     
1465
1466     if(oldNumTimings > 4 && newNumTimings > 4){
1467
1468       // Build model of execution time based on two phases ago
1469       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1470       const int b1 = 2 + (oldNumTimings-2)/2;
1471       double s1 = 0;
1472       double s2 = 0;
1473     
1474       const double ldbStepsTime = times[0] + times[1];
1475     
1476       for(int i=2; i<b1; i++){
1477         s1 += times[i];
1478       }
1479       for(int i=b1; i<oldNumTimings; i++){
1480         s2 += times[i];
1481       }
1482     
1483     
1484       // Compute the estimated time for the last phase's data
1485     
1486       const double a1 = s1 / ((newNumTimings-2)/2);
1487       const double a2 = s2 / ((newNumTimings-2)/2);
1488     
1489       const double expectedTotalTime = ((5.0*a2-3.0*a1)/2.0 ) * newNumTimings;
1490         
1491     
1492       // Measure actual time
1493       double sum = 0.0;
1494       for(int i=2; i<newNumTimings; ++i){
1495         sum += timesNew[i];
1496       }
1497
1498       const double avg = sum / ((double)(newNumTimings-2));
1499       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1500
1501       const double benefit = expectedTotalTime - actualTotalTime;
1502
1503       // Determine load balance cost
1504       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1505
1506       const double benefitAfterLB = benefit - lbcost;
1507
1508     
1509       // Determine whether LB cost outweights the estimated benefit
1510       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1511     
1512     
1513     
1514       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1515       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1516         const std::string &name = cpsIter->first;
1517         const std::pair<int,int> &bounds = cpsIter->second;
1518         const int lb = bounds.first;
1519         const int ub = bounds.second;
1520       
1521         if(benefitAfterLB > 0){
1522           CkPrintf("Linear Model: Beneficial LB\n");
1523           int newval = newControlPoints[name] / 2;
1524           if(newval > lb)
1525             newControlPoints[name] = newval;
1526           else 
1527             newControlPoints[name] = lb;
1528         } else {
1529           CkPrintf("Linear Model: Detrimental LB\n");
1530           int newval = newControlPoints[name] * 2;
1531           if(newval < ub)
1532             newControlPoints[name] = newval;
1533           else 
1534             newControlPoints[name] = ub;
1535         }
1536       }
1537     }
1538
1539   }
1540
1541   else if( whichTuningScheme == LDBPeriodQuadratic) {
1542     // Assume this is used in this manner:
1543     //  1) go to next phase
1544     //  2) request control point
1545     //  3) load balancing
1546     //  4) computation
1547
1548
1549     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1550     instrumentedPhase *prevPhase = previousPhaseData();
1551         
1552     const std::vector<double> &times = twoAgoPhase->times;
1553     const int oldNumTimings = times.size();
1554
1555     const std::vector<double> &timesNew = prevPhase->times;
1556     const int newNumTimings = timesNew.size();
1557
1558     
1559     if(oldNumTimings > 4 && newNumTimings > 4){
1560
1561
1562       // Build model of execution time based on two phases ago
1563       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1564       const int b1 = 2 + (oldNumTimings-2)/3;
1565       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1566       double s1 = 0;
1567       double s2 = 0;
1568       double s3 = 0;
1569
1570       const double ldbStepsTime = times[0] + times[1];
1571     
1572       for(int i=2; i<b1; i++){
1573         s1 += times[i];
1574       }
1575       for(int i=b1; i<b2; i++){
1576         s2 += times[i];
1577       }
1578       for(int i=b2; i<oldNumTimings; i++){
1579         s3 += times[i];
1580       }
1581
1582     
1583       // Compute the estimated time for the last phase's data
1584     
1585       const double a1 = s1 / ((newNumTimings-2)/3);
1586       const double a2 = s2 / ((newNumTimings-2)/3);
1587       const double a3 = s3 / ((newNumTimings-2)/3);
1588     
1589       const double a = (a1-2.0*a2+a3)/2.0;
1590       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1591       const double c = a3;
1592     
1593       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1594       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1595       const double x2 = 0.5;
1596       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1597    
1598     
1599       // Measure actual time
1600       double sum = 0.0;
1601       for(int i=2; i<newNumTimings; ++i){
1602         sum += timesNew[i];
1603       }
1604
1605       const double avg = sum / ((double)(newNumTimings-2));
1606       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1607
1608       const double benefit = expectedTotalTime - actualTotalTime;
1609
1610       // Determine load balance cost
1611       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1612
1613       const double benefitAfterLB = benefit - lbcost;
1614
1615     
1616       // Determine whether LB cost outweights the estimated benefit
1617       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1618     
1619     
1620     
1621       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1622       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1623         const std::string &name = cpsIter->first;
1624         const std::pair<int,int> &bounds = cpsIter->second;
1625         const int lb = bounds.first;
1626         const int ub = bounds.second;
1627       
1628         if(benefitAfterLB > 0){
1629           CkPrintf("QuadraticModel: Beneficial LB\n");
1630           int newval = newControlPoints[name] / 2;
1631           if(newval > lb)
1632             newControlPoints[name] = newval;
1633           else 
1634             newControlPoints[name] = lb;
1635         } else {
1636           CkPrintf("QuadraticModel: Detrimental LB\n");
1637           int newval = newControlPoints[name] * 2;
1638           if(newval < ub)
1639             newControlPoints[name] = newval;
1640           else 
1641             newControlPoints[name] = ub;
1642         }
1643       
1644       }
1645     }
1646     
1647   } else if ( whichTuningScheme == UseSteering ) {
1648           // -----------------------------------------------------------
1649           //  STEERING BASED ON KNOWLEDGE
1650
1651           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1652           // plans are only generated after 3 phases
1653
1654           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1655           instrumentedPhase *prevPhase = previousPhaseData();
1656
1657           if(phase_id%4 == 0){
1658                   CkPrintf("Steering based on 2 phases ago:\n");
1659                   twoAgoPhase->print();
1660                   CkPrintf("\n");
1661                   fflush(stdout);
1662
1663                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1664
1665
1666                   // ========================================= Concurrency =============================================
1667                   // See if idle time is high:
1668                   {
1669                           double idleTime = twoAgoPhase->idleTime.avg;
1670                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1671                           fflush(stdout);
1672                           if(idleTime > 0.10){
1673                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1674                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1675
1676                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1677
1678                                   bool found = false;
1679                                   std::string cpName;
1680                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1681                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1682                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1683                                           cpName = iter->first;
1684                                           info = &iter->second;
1685
1686                                           // Initialize a new plan based on two phases ago
1687                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1688
1689                                           CkPrintf("Steering found knob to turn\n");
1690                                           fflush(stdout);
1691
1692                                           if(info->first == ControlPoint::EFF_INC){
1693                                                   const int maxValue = controlPointSpace[cpName].second;
1694                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1695                                                   if(twoAgoValue+1 <= maxValue){
1696                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1697                                                   }
1698                                           } else {
1699                                                   const int minValue = controlPointSpace[cpName].second;
1700                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1701                                                   if(twoAgoValue-1 >= minValue){
1702                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1703                                                   }
1704                                           }
1705
1706                                           possibleNextStepPlans.push_back(aNewPlan);
1707
1708                                   }
1709                           }
1710                   }
1711
1712                   // ========================================= Grain Size =============================================
1713                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1714                   {
1715                           double overheadTime = twoAgoPhase->overheadTime.avg;
1716                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1717                           fflush(stdout);
1718                           if(overheadTime > 0.10){
1719                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1720                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1721
1722                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1723
1724                                   bool found = false;
1725                                   std::string cpName;
1726                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1727                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1728                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1729                                           cpName = iter->first;
1730                                           info = &iter->second;
1731
1732                                           // Initialize a new plan based on two phases ago
1733                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1734
1735
1736
1737                                           CkPrintf("Steering found knob to turn\n");
1738                                           fflush(stdout);
1739
1740                                           if(info->first == ControlPoint::EFF_INC){
1741                                                   const int maxValue = controlPointSpace[cpName].second;
1742                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1743                                                   if(twoAgoValue+1 <= maxValue){
1744                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1745                                                   }
1746                                           } else {
1747                                                   const int minValue = controlPointSpace[cpName].second;
1748                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1749                                                   if(twoAgoValue-1 >= minValue){
1750                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1751                                                   }
1752                                           }
1753
1754                                           possibleNextStepPlans.push_back(aNewPlan);
1755
1756                                   }
1757
1758                           }
1759                   }
1760                   // ========================================= GPU Offload =============================================
1761                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1762                   {
1763                           double idleTime = twoAgoPhase->idleTime.avg;
1764                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1765                           fflush(stdout);
1766                           if(idleTime > 0.10){
1767                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1768                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1769
1770                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1771
1772                                   bool found = false;
1773                                   std::string cpName;
1774                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1775                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1776                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1777                                           cpName = iter->first;
1778                                           info = &iter->second;
1779
1780                                           // Initialize a new plan based on two phases ago
1781                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1782
1783
1784                                           CkPrintf("Steering found knob to turn\n");
1785                                           fflush(stdout);
1786
1787                                           if(info->first == ControlPoint::EFF_DEC){
1788                                                   const int maxValue = controlPointSpace[cpName].second;
1789                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1790                                                   if(twoAgoValue+1 <= maxValue){
1791                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1792                                                   }
1793                                           } else {
1794                                                   const int minValue = controlPointSpace[cpName].second;
1795                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1796                                                   if(twoAgoValue-1 >= minValue){
1797                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1798                                                   }
1799                                           }
1800
1801                                           possibleNextStepPlans.push_back(aNewPlan);
1802
1803                                   }
1804
1805                           }
1806                   }
1807
1808                   // ========================================= Done =============================================
1809
1810
1811                   if(possibleNextStepPlans.size() > 0){
1812                           newControlPoints = possibleNextStepPlans[0];
1813                   }
1814
1815
1816                   CkPrintf("Steering done for this phase\n");
1817                   fflush(stdout);
1818
1819           }  else {
1820                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1821                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1822                   fflush(stdout);
1823           }
1824
1825
1826
1827   } else if( whichTuningScheme == SimulatedAnnealing ) {
1828
1829     // -----------------------------------------------------------
1830     //  SIMULATED ANNEALING
1831     //  Simulated Annealing style hill climbing method
1832     //
1833     //  Find the best search space configuration, and try something
1834     //  nearby it, with a radius decreasing as phases increase
1835
1836     CkPrintf("Finding best phase\n");
1837     instrumentedPhase *bestPhase = allData.findBest();  
1838     CkPrintf("best found:\n"); 
1839     bestPhase->print(); 
1840     CkPrintf("\n"); 
1841     
1842
1843     const int convergeByPhase = 100;
1844     // Determine from 0.0 to 1.0 how far along we are
1845     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1846
1847     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1848     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1849       const std::string &name = cpsIter->first;
1850       const std::pair<int,int> &bounds = cpsIter->second;
1851       const int minValue = bounds.first;
1852       const int maxValue = bounds.second;
1853       
1854       const int before = bestPhase->controlPoints[name];   
1855   
1856       const int range = (maxValue-minValue+1)*(1.0-progress);
1857
1858       int high = min(before+range, maxValue);
1859       int low = max(before-range, minValue);
1860       
1861       newControlPoints[name] = low;
1862       if(high-low > 0){
1863         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1864       } 
1865       
1866     }
1867
1868   } else if ( whichTuningScheme == DivideAndConquer ) {
1869
1870           // -----------------------------------------------------------
1871           //  STEERING FOR Divide & Conquer Programs
1872           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1873           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1874           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1875
1876           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1877           // plans are only generated after 3 phases
1878
1879           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1880           instrumentedPhase *prevPhase = previousPhaseData();
1881
1882           if(phase_id%4 == 0){
1883                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1884                   twoAgoPhase->print();
1885                   CkPrintf("\n");
1886                   fflush(stdout);
1887
1888                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1889
1890
1891                   // ========================================= Concurrency =============================================
1892                   // See if idle time is high:
1893                   {
1894                           double idleTime = twoAgoPhase->idleTime.avg;
1895                           double overheadTime = twoAgoPhase->overheadTime.avg;
1896
1897
1898                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1899                           fflush(stdout);
1900                           if(idleTime+overheadTime > 0.10){
1901                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1902                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1903
1904                                   int direction = -1;
1905                                   if (idleTime>overheadTime){
1906                                           // We need to decrease the grain size, or increase the available concurrency
1907                                           direction = 1;
1908                                   }
1909
1910                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1911
1912                                   bool found = false;
1913                                   std::string cpName;
1914                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1915                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1916                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1917                                           cpName = iter->first;
1918                                           info = &iter->second;
1919                                           
1920                                           // Initialize a new plan based on two phases ago
1921                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1922                                           bool newPlanExists = false;
1923
1924                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1925                                           fflush(stdout);
1926
1927
1928
1929                                           if(info->first == ControlPoint::EFF_INC){
1930                                                   const int minValue = controlPointSpace[cpName].first;
1931                                                   const int maxValue = controlPointSpace[cpName].second;
1932                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1933                                                   const int newVal = twoAgoValue+1*direction;
1934                                                   if(newVal <= maxValue && newVal >= minValue){
1935                                                     aNewPlan[cpName] = newVal;
1936                                                     newPlanExists = true;
1937                                                   } else {
1938                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1939                                                   }
1940                                           } else {
1941                                                   const int minValue = controlPointSpace[cpName].first;
1942                                                   const int maxValue = controlPointSpace[cpName].second;
1943                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1944                                                   const int newVal = twoAgoValue-1*direction;
1945                                                   if(newVal <= maxValue && newVal >= minValue){
1946                                                           aNewPlan[cpName] = newVal;
1947                                                           newPlanExists = true;
1948                                                   } else {
1949                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1950                                                   }
1951                                           }
1952
1953                                           if(newPlanExists){
1954                                             possibleNextStepPlans.push_back(aNewPlan);
1955                                           }
1956                                   }
1957                           }
1958                   }
1959
1960                   if(possibleNextStepPlans.size() > 0){
1961                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1962                     newControlPoints = possibleNextStepPlans[0];
1963                   } else {
1964                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1965                   }
1966           }
1967
1968   } else if( whichTuningScheme == Simplex ) {
1969
1970           // -----------------------------------------------------------
1971           //  Nelder Mead Simplex Algorithm
1972           //
1973           //  A scheme that takes a simplex (n+1 points) and moves it
1974           //  toward the minimum, eventually converging there.
1975
1976           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1977
1978   } else if( whichTuningScheme == ExhaustiveSearch ) {
1979     
1980     // -----------------------------------------------------------
1981     // EXHAUSTIVE SEARCH
1982    
1983     int numDimensions = controlPointSpace.size();
1984     CkAssert(numDimensions > 0);
1985   
1986     vector<int> lowerBounds(numDimensions);
1987     vector<int> upperBounds(numDimensions); 
1988   
1989     int d=0;
1990     std::map<std::string, pair<int,int> >::iterator iter;
1991     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1992       //    CkPrintf("Examining dimension %d\n", d);
1993       lowerBounds[d] = iter->second.first;
1994       upperBounds[d] = iter->second.second;
1995       d++;
1996     }
1997    
1998     // get names for each dimension (control point)
1999     vector<std::string> names(numDimensions);
2000     d=0;
2001     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2002       names[d] = niter->first;
2003       d++;
2004     }
2005   
2006   
2007     // Create the first possible configuration
2008     vector<int> config = lowerBounds;
2009     config.push_back(0);
2010   
2011     // Increment until finding an unused configuration
2012     allData.cleanupNames(); // put -1 values in for any control points missing
2013     std::vector<instrumentedPhase*> &phases = allData.phases;     
2014
2015     while(true){
2016     
2017       std::vector<instrumentedPhase*>::iterator piter; 
2018       bool testedConfiguration = false; 
2019       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2020       
2021         // Test if the configuration matches this phase
2022         bool match = true;
2023         for(int j=0;j<numDimensions;j++){
2024           match &= (*piter)->controlPoints[names[j]] == config[j];
2025         }
2026       
2027         if(match){
2028           testedConfiguration = true; 
2029           break;
2030         } 
2031       } 
2032     
2033       if(testedConfiguration == false){ 
2034         break;
2035       } 
2036     
2037       // go to next configuration
2038       config[0] ++;
2039       // Propagate "carrys"
2040       for(int i=0;i<numDimensions;i++){
2041         if(config[i] > upperBounds[i]){
2042           config[i+1]++;
2043           config[i] = lowerBounds[i];
2044         }
2045       }
2046       // check if we have exhausted all possible values
2047       if(config[numDimensions] > 0){
2048         break;
2049       }
2050        
2051     }
2052   
2053     if(config[numDimensions] > 0){
2054       for(int i=0;i<numDimensions;i++){ 
2055         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2056       }
2057     }
2058
2059     // results are now in config[i]
2060     
2061     for(int i=0; i<numDimensions; i++){
2062       newControlPoints[names[i]] = config[i];
2063       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2064     }
2065
2066
2067   } else {
2068     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2069   }
2070
2071
2072   const double endGenerateTime = CmiWallTimer();
2073   
2074   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2075   
2076 }
2077
2078
2079
2080
2081 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2082
2083
2084 /// Get control point value from range of integers [lb,ub]
2085 int controlPoint(const char *name, int lb, int ub){
2086   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2087   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2088   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2089   int result;
2090
2091   // if we already have control point values for phase, return them
2092   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2093     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2094     return thisPhaseData->controlPoints[std::string(name)];
2095   }
2096   
2097
2098   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2099     // For the first few phases, just use the lower bound, or the default if one was provided 
2100     // This ensures that the ranges for all the control points are known before we do anything fancy
2101     result = lb;
2102
2103     if(defaultControlPointValues.count(std::string(name)) > 0){
2104       int v = defaultControlPointValues[std::string(name)];
2105       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2106       result = v;
2107     }
2108
2109   } else if(controlPointSpace.count(std::string(name)) == 0){
2110     // if this is the first time we've seen the CP, then return the lower bound
2111     result = lb;
2112     
2113   }  else {
2114     // Generate a plan one time for each phase
2115     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2116     
2117     // Use the planned value:
2118     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2119     
2120   }
2121
2122   if(!isInRange(result,ub,lb)){
2123     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2124     fflush(stdout);
2125     fflush(stderr);
2126   }
2127   CkAssert(isInRange(result,ub,lb));
2128   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2129
2130   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2131
2132   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2133   //  thisPhaseData->print();
2134   
2135   return result;
2136 }
2137
2138
2139 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2140   CkAssert(sizeof(lb) == 4);
2141   CkAssert(CkMyPe() == 0);
2142   return controlPoint("FortranCP", *lb, *ub);
2143 }
2144
2145
2146
2147
2148 /// Inform the control point framework that a named control point affects the priorities of some array  
2149 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2150 //   CkGroupID aid = arraybase.ckGetArrayID();
2151 //   int groupIdx = aid.idx;
2152 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2153 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2154 // }
2155
2156
2157 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2158 // void controlPointPriorityEntry(const char *name, int idx){
2159 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2160 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2161 // }
2162
2163
2164
2165
2166
2167 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2168
2169     This function decomposes the algorithm into a state machine that allows it to
2170     evaluate one or more configurations through subsequent clls to this function.
2171     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2172
2173     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2174     so a list of them will be created and they will be evaluated, one per call.
2175
2176     Currently there is no stopping criteria, but the simplex ought to contract down
2177     to a few close configurations, and hence not much change will happen after this 
2178     point.
2179
2180  */
2181 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2182
2183         if(useBestKnown){
2184                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2185                 return;
2186         }
2187
2188
2189         if(firstSimplexPhase< 0){
2190                 firstSimplexPhase = allData.phases.size()-1;
2191                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2192         }
2193
2194         int n = controlPointSpace.size();
2195
2196         CkAssert(n>=2);
2197
2198
2199         if(simplexState == beginning){
2200                 // First we evaluate n+1 random points, then we go to a different state
2201                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2202                         CkPrintf("Simplex Tuning: chose random configuration\n");
2203
2204                         // Choose random values from the middle third of the range in each dimension
2205                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2206                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2207                                 const std::string &name = cpsIter->first;
2208                                 const std::pair<int,int> &bounds = cpsIter->second;
2209                                 const int lb = bounds.first;
2210                                 const int ub = bounds.second;
2211                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2212                         }
2213                 } else {
2214                         // Set initial simplex:
2215                         for(int i=0; i<n+1; i++){
2216                                 simplexIndices.insert(firstSimplexPhase+i);
2217                         }
2218                         CkAssert(simplexIndices.size() == n+1);
2219
2220                         // Transition to reflecting state
2221                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2222
2223                 }
2224
2225         } else if (simplexState == reflecting){
2226                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2227                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2228
2229                 // Find the highest time from other points in the simplex
2230                 double highestTimeForOthersInSimplex = 0.0;
2231                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2232                         double t = allData.phases[*iter]->medianTime();
2233                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2234                                 highestTimeForOthersInSimplex = t;
2235                         }
2236                 }
2237
2238                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2239
2240                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2241                         // if y* < yl,  transition to "expanding"
2242                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2243
2244                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2245                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2246                         CkAssert(simplexIndices.size() == n+1);
2247                         simplexIndices.erase(worstPhase);
2248                         CkAssert(simplexIndices.size() == n);
2249                         simplexIndices.insert(pPhase);
2250                         CkAssert(simplexIndices.size() == n+1);
2251                         // Transition to reflecting state
2252                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2253
2254                 } else {
2255                         // if y* > yh
2256                         if(recentPhaseTime <= worstTime){
2257                                 // replace Ph with P*
2258                                 CkAssert(simplexIndices.size() == n+1);
2259                                 simplexIndices.erase(worstPhase);
2260                                 CkAssert(simplexIndices.size() == n);
2261                                 simplexIndices.insert(pPhase);
2262                                 CkAssert(simplexIndices.size() == n+1);
2263                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2264                                 worstPhase = pPhase;
2265                                 // Just as a sanity check, make sure we don't use the non-updated values.
2266                                 worst.clear();
2267                         }
2268
2269                         // Now, form P** and do contracting phase
2270                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2271
2272                 }
2273
2274         } else if (simplexState == doneExpanding){
2275                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2276                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2277                 // A new configuration has been evaluated
2278
2279                 // Check to see if y** < y1
2280                 if(recentPhaseTime < bestTime){
2281                         // replace Ph by P**
2282                         CkAssert(simplexIndices.size() == n+1);
2283                         simplexIndices.erase(worstPhase);
2284                         CkAssert(simplexIndices.size() == n);
2285                         simplexIndices.insert(p2Phase);
2286                         CkAssert(simplexIndices.size() == n+1);
2287                 } else {
2288                         //      replace Ph by P*
2289                         CkAssert(simplexIndices.size() == n+1);
2290                         simplexIndices.erase(worstPhase);
2291                         CkAssert(simplexIndices.size() == n);
2292                         simplexIndices.insert(pPhase);
2293                         CkAssert(simplexIndices.size() == n+1);
2294                 }
2295
2296                 // Transition to reflecting state
2297                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2298
2299         }  else if (simplexState == contracting){
2300                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2301                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2302                 // A new configuration has been evaluated
2303
2304                 // Check to see if y** > yh
2305                 if(recentPhaseTime <= worstTime){
2306                         // replace Ph by P**
2307                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2308                         CkAssert(simplexIndices.size() == n+1);
2309                         simplexIndices.erase(worstPhase);
2310                         CkAssert(simplexIndices.size() == n);
2311                         simplexIndices.insert(p2Phase);
2312                         CkAssert(simplexIndices.size() == n+1);
2313                         // Transition to reflecting state
2314                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2315
2316                 } else {
2317                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2318                         simplexState = stillContracting;
2319
2320                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2321                         stillMustContractList = simplexIndices;
2322
2323                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2324                 }
2325
2326         } else if (simplexState == stillContracting){
2327                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2328
2329                 if(stillMustContractList.size()>0){
2330                         int c = *stillMustContractList.begin();
2331                         stillMustContractList.erase(c);
2332                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2333
2334                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2335
2336                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2337                         int v = 0;
2338                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2339                                 const std::string &name = cpsIter->first;
2340                                 const std::pair<int,int> &bounds = cpsIter->second;
2341                                 const int lb = bounds.first;
2342                                 const int ub = bounds.second;
2343
2344                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2345
2346                                 newControlPoints[name] = keepInRange(val,lb,ub);
2347                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2348                                 v++;
2349                         }
2350                 } else {
2351                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2352                         CkAssert(stillMustContractList.size() == 0);
2353                         simplexIndices.clear();
2354                         CkAssert(simplexIndices.size()==0);
2355                         for(int i=0; i<n+1; i++){
2356                                 simplexIndices.insert(allData.phases.size()-2-i);
2357                         }
2358                         CkAssert(simplexIndices.size()==n+1);
2359
2360                         // Transition to reflecting state
2361                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2362
2363                 }
2364
2365
2366         } else if (simplexState == expanding){
2367                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2368                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2369                 // A new configuration has been evaluated
2370
2371                 // determine if y** < yl
2372                 if(recentPhaseTime < bestTime){
2373                         // replace Ph by P**
2374                         CkAssert(simplexIndices.size() == n+1);
2375                         simplexIndices.erase(worstPhase);
2376                         CkAssert(simplexIndices.size() == n);
2377                         simplexIndices.insert(p2Phase);
2378                         CkAssert(simplexIndices.size() == n+1);
2379                         // Transition to reflecting state
2380                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2381
2382                 } else {
2383                         // else, replace ph with p*
2384                         CkAssert(simplexIndices.size() == n+1);
2385                         simplexIndices.erase(worstPhase);
2386                         CkAssert(simplexIndices.size() == n);
2387                         simplexIndices.insert(pPhase);
2388                         CkAssert(simplexIndices.size() == n+1);
2389                         // Transition to reflecting state
2390                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2391                 }
2392
2393
2394         } else {
2395                 CkAbort("Unknown simplexState");
2396         }
2397
2398 }
2399
2400
2401
2402 /** Replace the worst point with its reflection across the centroid. */
2403 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2404
2405         int n = controlPointSpace.size();
2406
2407         printSimplex(allData);
2408
2409         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2410
2411
2412         // Quit if the diameter of our simplex is small
2413         double maxr = 0.0;
2414         for(int i=0; i<n+1; i++){
2415                 //              Compute r^2 of this simplex point from the centroid
2416                 double r2 = 0.0;
2417                 std::vector<double> p = pointCoords(allData, i);
2418                 for(int d=0; d<p.size(); d++){
2419                         double r1 = (p[d] * centroid[d]);
2420                         r2 += r1*r1;
2421                 }
2422                 if(r2 > maxr)
2423                         maxr = r2;
2424         }
2425
2426 #if 0
2427         // At some point we quit this tuning
2428         if(maxr < 10){
2429                 useBestKnown = true;
2430                 instrumentedPhase *best = allData.findBest();
2431                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2432
2433                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2434                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2435                         const std::string &name = cpsIter->first;
2436                         newControlPoints[name] =  best->controlPoints[name];
2437                 }
2438         }
2439 #endif
2440
2441         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2442
2443         pPhase = allData.phases.size()-1;
2444         P.resize(n);
2445         for(int i=0; i<n; i++){
2446                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2447         }
2448
2449         for(int i=0; i<P.size(); i++){
2450                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2451         }
2452
2453
2454         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2455         int v = 0;
2456         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2457                 const std::string &name = cpsIter->first;
2458                 const std::pair<int,int> &bounds = cpsIter->second;
2459                 const int lb = bounds.first;
2460                 const int ub = bounds.second;
2461                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2462                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2463                 v++;
2464         }
2465
2466
2467         // Transition to "reflecting" state
2468         simplexState = reflecting;
2469         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2470
2471 }
2472
2473
2474
2475
2476 /** Replace the newly tested reflection with a further expanded version of itself. */
2477 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2478         int n = controlPointSpace.size();
2479         printSimplex(allData);
2480
2481         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2482         // I believe the equation for P** in the text on page 308 is correct.
2483
2484         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2485
2486
2487         p2Phase = allData.phases.size()-1;
2488         P2.resize(n);
2489         for(int i=0; i<n; i++){
2490                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2491         }
2492
2493         for(int i=0; i<P2.size(); i++){
2494                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2495         }
2496
2497
2498         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2499         int v = 0;
2500         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2501                 const std::string &name = cpsIter->first;
2502                 const std::pair<int,int> &bounds = cpsIter->second;
2503                 const int lb = bounds.first;
2504                 const int ub = bounds.second;
2505                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2506                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2507                 v++;
2508         }
2509
2510
2511         // Transition to "doneExpanding" state
2512         simplexState = doneExpanding;
2513         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2514
2515 }
2516
2517
2518
2519
2520 /** Replace the newly tested reflection with a further expanded version of itself. */
2521 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2522         int n = controlPointSpace.size();
2523         printSimplex(allData);
2524
2525         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2526
2527
2528         p2Phase = allData.phases.size()-1;
2529         P2.resize(n);
2530         for(int i=0; i<n; i++){
2531                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2532         }
2533
2534         for(int i=0; i<P2.size(); i++){
2535                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2536         }
2537
2538
2539         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2540         int v = 0;
2541         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2542                 const std::string &name = cpsIter->first;
2543                 const std::pair<int,int> &bounds = cpsIter->second;
2544                 const int lb = bounds.first;
2545                 const int ub = bounds.second;
2546                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2547                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2548                 v++;
2549         }
2550         
2551         
2552         // Transition to "contracting" state
2553         simplexState = contracting;
2554         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2555
2556 }
2557
2558
2559
2560
2561 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2562         int n = controlPointSpace.size();
2563
2564         // Find worst performing point in the simplex
2565         worstPhase = -1;
2566         worstTime = -1.0;
2567         bestPhase = 10000000;
2568         bestTime = 10000000;
2569         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2570                 double t = allData.phases[*iter]->medianTime();
2571                 if(t > worstTime){
2572                         worstTime = t;
2573                         worstPhase = *iter;
2574                 }
2575                 if(t < bestTime){
2576                         bestTime = t;
2577                         bestPhase = *iter;
2578                 }
2579         }
2580         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2581
2582         best = pointCoords(allData, bestPhase);
2583         CkAssert(best.size() == n);
2584
2585         worst = pointCoords(allData, worstPhase);
2586         CkAssert(worst.size() == n);
2587
2588         // Calculate centroid of the remaining points in the simplex
2589         centroid.resize(n);
2590         for(int i=0; i<n; i++){
2591                 centroid[i] = 0.0;
2592         }
2593
2594         int numPts = 0;
2595
2596         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2597                 if(*iter != worstPhase){
2598                         numPts ++;
2599                         // Accumulate into the result vector
2600                         int c = 0;
2601                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2602                                 centroid[c] += citer->second;
2603                                 c++;
2604                         }
2605
2606                 }
2607         }
2608
2609         // Now divide the sums by the number of points.
2610         for(int v = 0; v<centroid.size(); v++) {
2611                 centroid[v] /= (double)numPts;
2612         }
2613
2614         CkAssert(centroid.size() == n);
2615
2616         for(int i=0; i<centroid.size(); i++){
2617                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2618         }
2619
2620
2621 }
2622
2623
2624
2625 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2626         std::vector<double> result;
2627         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2628                 result.push_back((double)citer->second);
2629         }
2630         return result;
2631 }
2632
2633
2634
2635
2636 void ControlPointWriteOutputToDisk(){
2637         CkAssert(CkMyPe() == 0);
2638         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2639 }
2640
2641
2642
2643 /*! @} */
2644
2645 #ifdef CP_DISABLE_TRACING
2646 #include "ControlPointsNoTrace.def.h"
2647 #else
2648 #include "ControlPoints.def.h"
2649 #endif