Fixing linear model for load balancing period control point.
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15
16 #include <climits>
17 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
18  
19
20 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
21 /**  @{ */
22
23 using namespace std;
24
25 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
26
27
28 //#undef DEBUGPRINT
29 //#define DEBUGPRINT 4
30
31 static void periodicProcessControlPoints(void* ptr, double currWallTime);
32
33
34 // A pointer to this PE's controlpoint manager Proxy
35 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
36 /* readonly */ int random_seed;
37 /* readonly */ long controlPointSamplePeriod;
38 /* readonly */ int whichTuningScheme;
39 /* readonly */ bool writeDataFileAtShutdown;
40 /* readonly */ bool shouldFilterOutputData;
41 /* readonly */ bool loadDataFileAtStartup;
42 /* readonly */ bool shouldGatherMemoryUsage;
43 /* readonly */ bool shouldGatherUtilization;
44 /* readonly */ bool shouldGatherAll;
45 /* readonly */ char CPDataFilename[512];
46
47
48
49 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
50 /// These probably come from the command line arguments, so are available only on PE 0
51 std::map<std::string, int> defaultControlPointValues;
52
53
54
55 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic}  tuningScheme;
56
57
58
59 void printTuningScheme(){
60   switch(whichTuningScheme){
61   case RandomSelection:
62     CkPrintf("Tuning Scheme: RandomSelection\n");
63     break;
64   case AlwaysDefaults:
65     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
66     break;
67   case SimulatedAnnealing:
68     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
69     break;
70   case ExhaustiveSearch:
71     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
72     break;
73   case CriticalPathAutoPrioritization:
74     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
75     break;
76   case UseBestKnownTiming:
77     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
78     break;
79   case UseSteering:
80     CkPrintf("Tuning Scheme: UseSteering\n");
81     break;
82   case MemoryAware:
83     CkPrintf("Tuning Scheme: MemoryAware\n");
84     break;
85   case Simplex:
86     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
87     break;
88   case DivideAndConquer:
89     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
90     break;
91   case LDBPeriod:
92     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
93     break;
94   case LDBPeriodLinear:
95     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
96     break;
97   case LDBPeriodQuadratic:
98     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
99     break;
100   default:
101     CkPrintf("Unknown tuning scheme\n");
102     break;
103   }
104   fflush(stdout);
105 }
106
107
108
109 /// A reduction type that combines idle time measurements (min/sum/max etc.)
110 CkReduction::reducerType idleTimeReductionType;
111 /// A reducer that combines idle time measurements (min/sum/max etc.)
112 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
113   double ret[3];
114   if(nMsg > 0){
115     CkAssert(msgs[0]->getSize()==3*sizeof(double));
116     double *m=(double *)msgs[0]->getData();
117     ret[0]=m[0];
118     ret[1]=m[1];
119     ret[2]=m[2];
120   }
121   for (int i=1;i<nMsg;i++) {
122     CkAssert(msgs[i]->getSize()==3*sizeof(double));
123     double *m=(double *)msgs[i]->getData();
124     ret[0]=min(ret[0],m[0]);
125     ret[1]+=m[1];
126     ret[2]=max(ret[2],m[2]);
127   }  
128   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
129 }
130
131
132
133 /// A reduction type that combines idle, overhead, and memory measurements
134 CkReduction::reducerType allMeasuresReductionType;
135 /// A reducer that combines idle, overhead, and memory measurements
136 #define ALL_REDUCTION_SIZE 12
137 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
138   double ret[ALL_REDUCTION_SIZE];
139   if(nMsg > 0){
140     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
141     double *m=(double *)msgs[0]->getData();
142     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
143   }
144   for (int i=1;i<nMsg;i++) {
145     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
146     double *m=(double *)msgs[i]->getData();
147     // idle time (min/sum/max)
148     ret[0]=min(ret[0],m[0]);
149     ret[1]+=m[1];
150     ret[2]=max(ret[2],m[2]);
151     // overhead time (min/sum/max)
152     ret[3]=min(ret[3],m[3]);
153     ret[4]+=m[4];
154     ret[5]=max(ret[5],m[5]);
155     // mem usage (max)
156     ret[6]=max(ret[6],m[6]);
157     // bytes per invocation for two types of entry methods
158     ret[7]+=m[7];
159     ret[8]+=m[8];
160     ret[9]+=m[9];
161     ret[10]+=m[10];
162     // Grain size (avg)
163     ret[11]+=m[11];
164   }  
165   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
166 }
167
168
169 /// Registers the control point framework's reduction handlers at startup on each PE
170 /*initproc*/ void registerCPReductions(void) {
171   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
172   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
173 }
174
175
176
177
178
179
180 /// Return an integer between 0 and num-1 inclusive
181 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
182 unsigned int randInt(unsigned int num, const char* name, int seed=0){
183   CkAssert(num > 0);
184   CkAssert(num < 1000);
185
186   unsigned long hash = 0;
187   unsigned int c;
188   unsigned char * str = (unsigned char*)name;
189
190   while (c = *str++){
191     unsigned int c2 = (c+64)%128;
192     unsigned int c3 = (c2*5953)%127;
193     hash = c3 + (hash << 6) + (hash << 16) - hash;
194   }
195
196   unsigned long shuffled1 = (hash*2083)%7907;
197   unsigned long shuffled2 = (seed*4297)%2017;
198
199   unsigned long shuffled3 = (random_seed*4799)%7919;
200
201   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
202
203   unsigned int result = ((namehash * 6029) % 1117) % num;
204
205   CkAssert(result >=0 && result < num);
206   return result;
207 }
208
209
210
211 controlPointManager::controlPointManager() {
212   generatedPlanForStep = -1;
213
214     exitWhenReady = false;
215     alreadyRequestedMemoryUsage = false;   
216     alreadyRequestedIdleTime = false;
217     alreadyRequestedAll = false;
218     
219     instrumentedPhase * newPhase = new instrumentedPhase();
220     allData.phases.push_back(newPhase);   
221     
222     frameworkShouldAdvancePhase = false;
223     haveControlPointChangeCallback = false;
224 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
225     
226     ControlPoint::initControlPointEffects();
227
228     phase_id = 0;
229
230     if(loadDataFileAtStartup){    
231       loadDataFile();
232     }
233
234     
235     if(CkMyPe() == 0){
236       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
237     }
238
239     traceRegisterUserEvent("No currently executing message", 5000);
240     traceRegisterUserEvent("Zero time along critical path", 5010);
241     traceRegisterUserEvent("Positive total time along critical path", 5020);
242     traceRegisterUserEvent("env->setPathHistory()", 6000);
243     traceRegisterUserEvent("Critical Path", 5900);
244     traceRegisterUserEvent("Table Entry", 5901);
245
246
247 #if USER_EVENT_FOR_REGISTERTERMINALPATH
248     traceRegisterUserEvent("registerTerminalPath", 100); 
249 #endif
250
251   }
252   
253
254  controlPointManager::~controlPointManager(){
255 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
256   }
257
258
259   /// Loads the previous run data file
260   void controlPointManager::loadDataFile(){
261     ifstream infile(CPDataFilename);
262     vector<std::string> names;
263     std::string line;
264   
265     while(getline(infile,line)){
266       if(line[0] != '#')
267         break;
268     }
269   
270     int numTimings = 0;
271     std::istringstream n(line);
272     n >> numTimings;
273   
274     while(getline(infile,line)){ 
275       if(line[0] != '#') 
276         break; 
277     }
278
279     int numControlPointNames = 0;
280     std::istringstream n2(line);
281     n2 >> numControlPointNames;
282   
283     for(int i=0; i<numControlPointNames; i++){
284       getline(infile,line);
285       names.push_back(line);
286     }
287
288     for(int i=0;i<numTimings;i++){
289       getline(infile,line);
290       while(line[0] == '#')
291         getline(infile,line); 
292
293       instrumentedPhase * ips = new instrumentedPhase();
294
295       std::istringstream iss(line);
296
297       // Read memory usage for phase
298       iss >> ips->memoryUsageMB;
299       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
300
301       // Read idle time
302       iss >> ips->idleTime.min;
303       iss >> ips->idleTime.avg;
304       iss >> ips->idleTime.max;
305
306       // Read overhead time
307       iss >> ips->overheadTime.min;
308       iss >> ips->overheadTime.avg;
309       iss >> ips->overheadTime.max;
310
311       // read bytePerInvoke
312       iss >> ips->bytesPerInvoke;
313
314       // read grain size
315       iss >> ips->grainSize;
316
317       // Read control point values
318       for(int cp=0;cp<numControlPointNames;cp++){
319         int cpvalue;
320         iss >> cpvalue;
321         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
322       }
323
324       // ignore median time
325       double mt;
326       iss >> mt;
327
328       // Read all times
329
330       double time;
331
332       while(iss >> time){
333         ips->times.push_back(time);
334 #if DEBUGPRINT > 5
335         CkPrintf("read time %lf from file\n", time);
336 #endif
337       }
338
339       allData.phases.push_back(ips);
340
341     }
342
343     infile.close();
344   }
345
346
347
348   /// Add the current data to allData and output it to a file
349   void controlPointManager::writeDataFile(){
350     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
351     ofstream outfile(CPDataFilename);
352     allData.cleanupNames();
353
354 //    string s = allData.toString();
355 //    CkPrintf("At end: \n %s\n", s.c_str());
356
357     if(shouldFilterOutputData){
358       allData.verify();
359       allData.filterOutIncompletePhases();
360     }
361
362 //    string s2 = allData.toString();
363 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
364     if(allData.toString().length() > 10){
365       outfile << allData.toString();
366     } else {
367       outfile << " No data available to save to disk " << endl;
368     }
369     outfile.close();
370   }
371
372   /// User can register a callback that is called when application should advance to next phase
373   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
374     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
375     controlPointChangeCallback = cb;
376     haveControlPointChangeCallback = true;
377   }
378
379
380 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
381 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
382   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
383 }
384
385   /// Called periodically by the runtime to handle the control points
386   /// Currently called on each PE
387   void controlPointManager::processControlPoints(){
388
389     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
390
391
392     //==========================================================================================
393     // Print the data for each phase
394
395     const int s = allData.phases.size();
396
397 #if DEBUGPRINT
398     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
399     for(int p=0;p<s;++p){
400       const instrumentedPhase &phase = allData.phases[p];
401       const idleTimeContainer &idle = phase.idleTime;
402       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
403       vector<double> const &times = phase.times;
404
405       CkPrintf("Phase %d:\n", p);
406       idle.print();
407      //   CkPrintf("critical paths: (* affected by control point)\n");
408         //   for(int i=0;i<criticalPaths.size(); i++){
409         // If affected by a control point
410         //      criticalPaths[i].print();
411
412       //        criticalPaths[i].print();
413       //        CkPrintf("\n");
414         
415
416       //   }
417       CkPrintf("Timings:\n");
418       for(int i=0;i<times.size(); i++){
419         CkPrintf("%lf ", times[i]);
420       }
421       CkPrintf("\n");
422
423     }
424     
425     CkPrintf("\n\n");
426
427
428 #endif
429
430
431
432     //==========================================================================================
433     // If this is a phase during which we try to adapt control point values based on critical path
434 #if 0
435     if( s%5 == 4) {
436
437       // Find the most recent phase with valid critical path data and idle time measurements      
438       int whichPhase=-1;
439       for(int p=0; p<s; ++p){
440         const instrumentedPhase &phase = allData.phases[p];
441         const idleTimeContainer &idle = phase.idleTime;
442         if(idle.isValid() && phase.criticalPaths.size()>0 ){
443           whichPhase = p;
444         }
445       }
446       
447       
448       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
449       const instrumentedPhase &phase = allData.phases[whichPhase];
450       const idleTimeContainer &idle = phase.idleTime;
451       
452       if(idle.min > 0.1){
453         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
454         
455         // Determine the median critical path for this phase
456         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
457         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
458         CkAssert(phase.criticalPaths.size() > 0);
459         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
460         
461         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
462         
463         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
464           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
465
466
467           CkPrintf("Finding control points related to the critical path\n");
468           int cpcount = 0;
469           std::set<std::string> controlPointsAffectingCriticalPath;
470
471           
472           for(int e=0;e<path.getNumUsed();e++){
473             if(path.getUsedCount(e)>0){
474               int ep = path.getUsedEp(e);
475
476               std::map<std::string, std::set<int> >::iterator iter;
477               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
478                 if(iter->second.count(ep)>0){
479                   controlPointsAffectingCriticalPath.insert(iter->first);
480                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
481                   cpcount++;
482                 }
483               }
484               
485             }
486           }
487           
488
489           if(cpcount==0){
490             CkPrintf("No control points are known to affect the critical path\n");
491           } else {
492             double beginTime = CmiWallTimer();
493
494             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
495             
496             newControlPoints = phase.controlPoints;
497             
498             if(frameworkShouldAdvancePhase){
499               gotoNextPhase();  
500             }
501             
502             if(haveControlPointChangeCallback){ 
503 #if DEBUGPRINT
504               CkPrintf("Calling control point change callback\n");
505 #endif
506               // Create a high priority message and send it to the callback
507               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
508               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
509               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
510               controlPointChangeCallback.send(msg);
511               
512             }
513             
514             
515             // adjust the control points that can affect the critical path
516
517             char textDescription[4096*2];
518             textDescription[0] = '\0';
519
520             std::map<std::string,int>::iterator newCP;
521             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
522               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
523                 // decrease the value (increase priority) if within range
524                 int lowerbound = controlPointSpace[newCP->first].first;
525                 if(newCP->second > lowerbound){
526                   newControlPointsAvailable = true;
527                   newCP->second --;
528                 }
529               }
530             }
531            
532             // Create a string for a projections user event
533             if(1){
534               std::map<std::string,int>::iterator newCP;
535               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
536                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
537               }
538             }
539
540             char *userEventString = new char[4096*2];
541             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
542             
543             static int userEventCounter = 20000;
544             CkPrintf("USER EVENT: %s\n", userEventString);
545             
546             traceRegisterUserEvent(userEventString, userEventCounter); 
547             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
548             userEventCounter++;
549             
550           }
551           
552         } else {
553           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
554         }
555         
556         
557       }
558       
559     } else {
560       
561       
562     }
563    
564 #endif
565
566
567
568     if(frameworkShouldAdvancePhase){
569       gotoNextPhase();  
570     }
571     
572     if(haveControlPointChangeCallback){ 
573       // Create a high priority message and send it to the callback
574       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
575       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
576       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
577       controlPointChangeCallback.send(msg);
578     }
579     
580   }
581   
582   /// Determine if any control point is known to affect an entry method
583   bool controlPointManager::controlPointAffectsThisEP(int ep){
584     std::map<std::string, std::set<int> >::iterator iter;
585     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
586       if(iter->second.count(ep)>0){
587         return true;
588       }
589     }
590     return false;    
591   }
592   
593   /// Determine if any control point is known to affect a chare array  
594   bool controlPointManager::controlPointAffectsThisArray(int array){
595     std::map<std::string, std::set<int> >::iterator iter;
596     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
597       if(iter->second.count(array)>0){
598         return true;
599       }
600     }
601     return false;   
602   }
603   
604
605   /// The data from the current phase
606   instrumentedPhase * controlPointManager::currentPhaseData(){
607     int s = allData.phases.size();
608     CkAssert(s>=1);
609     return allData.phases[s-1];
610   }
611  
612
613   /// The data from the previous phase
614   instrumentedPhase * controlPointManager::previousPhaseData(){
615     int s = allData.phases.size();
616     if(s >= 2 && phase_id > 0) {
617       return allData.phases[s-2];
618     } else {
619       return NULL;
620     }
621   }
622  
623   /// The data from two phases back
624   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
625     int s = allData.phases.size();
626     if(s >= 3 && phase_id > 0) {
627       return allData.phases[s-3];
628     } else {
629       return NULL;
630     }
631   }
632   
633
634   /// Called by either the application or the Control Point Framework to advance to the next phase  
635   void controlPointManager::gotoNextPhase(){
636     
637 #ifndef CP_DISABLE_TRACING
638     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
639     fflush(stdout);
640
641     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
642       alreadyRequestedAll = true;
643       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
644       CkPrintf("Requesting all measurements\n");
645       thisProxy.requestAll(*cb);
646       delete cb;
647     
648     } else {
649       
650       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
651         alreadyRequestedMemoryUsage = true;
652         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
653         thisProxy.requestMemoryUsage(*cb);
654         delete cb;
655       }
656       
657       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
658         alreadyRequestedIdleTime = true;
659         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
660         thisProxy.requestIdleTime(*cb);
661         delete cb;
662       }
663     }
664     
665
666 #endif
667
668
669
670 #if CMK_LBDB_ON && 0
671     LBDatabase * myLBdatabase = LBDatabaseObj();
672     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
673     const CkVec<LBObj*> objs = myLBDB->getObjs();
674     const int objCount = myLBDB->getObjCount();
675     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
676     
677     floatType maxObjWallTime = -1.0;
678     
679     for(int i=0;i<objs.length();i++){
680       LBObj* o = objs[i];
681       const LDObjData d = o->ObjData();
682       floatType cpuTime = d.cpuTime;
683       floatType wallTime = d.wallTime;
684       // can also get object handles from the LDObjData struct
685       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
686       if(wallTime > maxObjWallTime){
687
688       }
689       
690     }
691
692     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
693     
694 #endif    
695
696
697     
698     // increment phase id
699     phase_id++;
700     
701
702     // Create new entry for the phase we are starting now
703     instrumentedPhase * newPhase = new instrumentedPhase();
704     allData.phases.push_back(newPhase);
705     
706     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
707
708   }
709
710   /// An application uses this to register an instrumented timing for this phase
711   void controlPointManager::setTiming(double time){
712     currentPhaseData()->times.push_back(time);
713
714 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
715        
716     // First we should register this currently executing message as a path, because it is likely an important one to consider.
717     //    registerTerminalEntryMethod();
718     
719     // save the critical path for this phase
720     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
721     //    maxTerminalPathHistory.reset();
722     
723     
724     // Reset the counts for the currently executing message
725     //resetThisEntryPath();
726         
727 #endif
728     
729   }
730   
731   /// Entry method called on all PEs to request CPU utilization statistics
732   void controlPointManager::requestIdleTime(CkCallback cb){
733 #ifndef CP_DISABLE_TRACING
734     double i = localControlPointTracingInstance()->idleRatio();
735     double idle[3];
736     idle[0] = i;
737     idle[1] = i;
738     idle[2] = i;
739     
740     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
741     
742     localControlPointTracingInstance()->resetTimings();
743
744     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
745 #else
746     CkAbort("Should not get here\n");
747 #endif
748   }
749   
750   /// All processors reduce their memory usages in requestIdleTime() to this method
751   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
752 #ifndef CP_DISABLE_TRACING
753     int size=msg->getSize() / sizeof(double);
754     CkAssert(size==3);
755     double *r=(double *) msg->getData();
756         
757     instrumentedPhase* prevPhase = previousPhaseData();
758     if(prevPhase != NULL){
759       prevPhase->idleTime.min = r[0];
760       prevPhase->idleTime.avg = r[1]/CkNumPes();
761       prevPhase->idleTime.max = r[2];
762       prevPhase->idleTime.print();
763       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
764     } else {
765       CkPrintf("There is no previous phase to store the idle time measurements\n");
766     }
767     
768     alreadyRequestedIdleTime = false;
769     checkForShutdown();
770     delete msg;
771 #else
772     CkAbort("Should not get here\n");
773 #endif
774   }
775
776
777
778
779
780
781   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
782   void controlPointManager::requestAll(CkCallback cb){
783 #ifndef CP_DISABLE_TRACING
784     TraceControlPoints *t = localControlPointTracingInstance();
785
786     double data[ALL_REDUCTION_SIZE];
787
788     double *idle = data;
789     double *over = data+3;
790     double *mem = data+6;
791     double *msgBytes = data+7;  
792     double *grainsize = data+11;  
793
794     const double i = t->idleRatio();
795     idle[0] = i;
796     idle[1] = i;
797     idle[2] = i;
798
799     const double o = t->overheadRatio();
800     over[0] = o;
801     over[1] = o;
802     over[2] = o;
803
804     const double m = t->memoryUsageMB();
805     mem[0] = m;
806
807     msgBytes[0] = t->b2;
808     msgBytes[1] = t->b3;
809     msgBytes[2] = t->b2mlen;
810     msgBytes[3] = t->b3mlen;
811
812     grainsize[0] = t->grainSize();
813     
814     localControlPointTracingInstance()->resetAll();
815
816     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
817 #else
818     CkAbort("Should not get here\n");
819 #endif
820   }
821   
822   /// All processors reduce their memory usages in requestIdleTime() to this method
823   void controlPointManager::gatherAll(CkReductionMsg *msg){
824 #ifndef CP_DISABLE_TRACING
825     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
826     int size=msg->getSize() / sizeof(double);
827     double *data=(double *) msg->getData();
828         
829     double *idle = data;
830     double *over = data+3;
831     double *mem = data+6;
832     double *msgBytes = data+7;
833     double *granularity = data+11;
834
835
836     //    std::string b = allData.toString();
837
838     instrumentedPhase* prevPhase = previousPhaseData();
839     if(prevPhase != NULL){
840       prevPhase->idleTime.min = idle[0];
841       prevPhase->idleTime.avg = idle[1]/CkNumPes();
842       prevPhase->idleTime.max = idle[2];
843       
844       prevPhase->overheadTime.min = over[0];
845       prevPhase->overheadTime.avg = over[1]/CkNumPes();
846       prevPhase->overheadTime.max = over[2];
847       
848       prevPhase->memoryUsageMB = mem[0];
849       
850       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
851
852       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
853       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
854
855       /** The average of the grain sizes on all PEs in us */
856       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
857
858       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
859       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
860
861       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
862       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
863
864       if(bytesPerInvoke2 > bytesPerInvoke3)
865         prevPhase->bytesPerInvoke = bytesPerInvoke2;
866       else
867         prevPhase->bytesPerInvoke = bytesPerInvoke3;
868
869       //      prevPhase->print();
870       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
871
872       //      std::string a = allData.toString();
873
874       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
875       
876     } else {
877       CkPrintf("There is no previous phase to store measurements\n");
878     }
879     
880     alreadyRequestedAll = false;
881     checkForShutdown();
882     delete msg;
883 #else
884     CkAbort("Should not get here\n");
885 #endif
886   }
887
888
889
890
891   void controlPointManager::checkForShutdown(){
892     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
893       doExitNow();
894     }
895   }
896
897
898   void controlPointManager::exitIfReady(){
899      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
900        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
901        doExitNow();
902      } else {
903        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
904        exitWhenReady = true;
905      }
906   }
907
908
909
910   void controlPointManager::doExitNow(){
911           writeOutputToDisk();
912           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
913           CkExit();
914   }
915
916   void controlPointManager::writeOutputToDisk(){
917           if(writeDataFileAtShutdown){
918                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
919           }
920   }
921
922
923   /// Entry method called on all PEs to request memory usage
924   void controlPointManager::requestMemoryUsage(CkCallback cb){
925     int m = CmiMaxMemoryUsage() / 1024 / 1024;
926     CmiResetMaxMemory();
927     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
928     contribute(sizeof(int),&m,CkReduction::max_int, cb);
929   }
930
931   /// All processors reduce their memory usages to this method
932   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
933     int size=msg->getSize() / sizeof(int);
934     CkAssert(size==1);
935     int *m=(int *) msg->getData();
936
937     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
938     
939     instrumentedPhase* prevPhase = previousPhaseData();
940     if(prevPhase != NULL){
941       prevPhase->memoryUsageMB = m[0];
942     } else {
943       CkPrintf("No place to store memory usage");
944     }
945
946     alreadyRequestedMemoryUsage = false;
947     checkForShutdown();
948     delete msg;
949   }
950
951
952 //   /// Inform the control point framework that a named control point affects the priorities of some array  
953 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
954 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
955     
956 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
957 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
958 //     } else {
959 //       std::set<int> s;
960 //       s.insert(groupIdx);
961 //       affectsPrioritiesArray[std::string(name)] = s;
962 //     }
963     
964 // #if DEBUGPRINT   
965 //     std::map<std::string, std::set<int> >::iterator f;
966 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
967 //       std::string name = f->first;
968 //       std::set<int> &vals = f->second;
969 //       cout << "Control point " << name << " affects arrays: ";
970 //       std::set<int>::iterator i;
971 //       for(i=vals.begin(); i!=vals.end();++i){
972 //      cout << *i << " ";
973 //       }
974 //       cout << endl;
975 //     }
976 // #endif
977     
978 //   }
979   
980 //   /// Inform the control point framework that a named control point affects the priority of some entry method
981 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
982 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
983
984 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
985 //       affectsPrioritiesEP[std::string(name)].insert(idx);
986 //     } else {
987 //       std::set<int> s;
988 //       s.insert(idx);
989 //       affectsPrioritiesEP[std::string(name)] = s;
990 //     }
991     
992 // #if DEBUGPRINT
993 //     std::map<std::string, std::set<int> >::iterator f;
994 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
995 //       std::string name = f->first;
996 //       std::set<int> &vals = f->second;
997 //       cout << "Control point " << name << " affects EP: ";
998 //       std::set<int>::iterator i;
999 //       for(i=vals.begin(); i!=vals.end();++i){
1000 //      cout << *i << " ";
1001 //       }
1002 //       cout << endl;
1003 //     }
1004 // #endif
1005
1006
1007 //   }
1008   
1009
1010
1011 /// An interface callable by the application.
1012 void gotoNextPhase(){
1013   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1014 }
1015
1016 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1017 {
1018   gotoNextPhase();
1019 }
1020
1021
1022 /// A mainchare that is used just to create our controlPointManager group at startup
1023 class controlPointMain : public CBase_controlPointMain {
1024 public:
1025   controlPointMain(CkArgMsg* args){
1026 #if OLDRANDSEED
1027     struct timeval tp;
1028     gettimeofday(& tp, NULL);
1029     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1030 #else
1031     double time = CmiWallTimer();
1032     int sec = (int)time;
1033     int usec = (int)((time - (double)sec)*1000000.0);
1034     random_seed =  sec ^ usec;
1035 #endif
1036     
1037     
1038     double period;
1039     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1040     if(haveSamplePeriod){
1041       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1042       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1043     } else {
1044       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1045     }
1046     
1047     
1048     
1049     whichTuningScheme = RandomSelection;
1050
1051
1052     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1053       whichTuningScheme = RandomSelection;
1054     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1055       whichTuningScheme = ExhaustiveSearch;
1056     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1057       whichTuningScheme = AlwaysDefaults;
1058     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1059       whichTuningScheme = SimulatedAnnealing;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1061       whichTuningScheme = CriticalPathAutoPrioritization;
1062     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1063       whichTuningScheme = UseBestKnownTiming;
1064     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1065       whichTuningScheme = UseSteering;
1066     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1067       whichTuningScheme = MemoryAware;
1068     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1069       whichTuningScheme = Simplex;
1070     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1071       whichTuningScheme = DivideAndConquer;
1072     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period") ){
1073       whichTuningScheme = LDBPeriod;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1075       whichTuningScheme = LDBPeriodLinear;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1077       whichTuningScheme = LDBPeriodQuadratic;
1078     }
1079
1080     char *defValStr = NULL;
1081     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1082       CkPrintf("You specified default value string: %s\n", defValStr);
1083       
1084       // Parse the string, looking for commas
1085      
1086
1087       char *tok = strtok(defValStr, ",");
1088       while (tok) {
1089         // split on the equal sign
1090         int len = strlen(tok);
1091         char *eqsign = strchr(tok, '=');
1092         if(eqsign != NULL && eqsign != tok){
1093           *eqsign = '\0';
1094           char *cpname = tok;
1095           std::string cpName(tok);
1096           char *cpDefVal = eqsign+1;      
1097           int v=-1;
1098           if(sscanf(cpDefVal, "%d", &v) == 1){
1099             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1100             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1101             defaultControlPointValues[cpName] = v;
1102           }
1103         }
1104         tok = strtok(NULL, ",");
1105       }
1106
1107     }
1108
1109     shouldGatherAll = false;
1110     shouldGatherMemoryUsage = false;
1111     shouldGatherUtilization = false;
1112     
1113     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1114       shouldGatherAll = true;
1115     } else {
1116       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1117         shouldGatherMemoryUsage = true;
1118       }
1119       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1120         shouldGatherUtilization = true;
1121       }
1122     }
1123     
1124     writeDataFileAtShutdown = false;   
1125     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1126       writeDataFileAtShutdown = true;
1127     }
1128
1129     shouldFilterOutputData = true;
1130     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1131       shouldFilterOutputData = false;
1132     }
1133
1134
1135    loadDataFileAtStartup = false;   
1136     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1137       loadDataFileAtStartup = true;
1138     }
1139
1140     char *cpdatafile;
1141     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1142       sprintf(CPDataFilename, "%s", cpdatafile);
1143     } else {
1144       sprintf(CPDataFilename, "controlPointData.txt");
1145     }
1146
1147
1148     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1149   }
1150   ~controlPointMain(){}
1151 };
1152
1153 /// An interface callable by the application.
1154 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1155   CkAssert(CkMyPe() == 0);
1156   CkPrintf("Application has registered a control point change callback\n");
1157   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1158 }
1159
1160 /// An interface callable by the application.
1161 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1162   if(CkMyPe() == 0){
1163     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1164     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1165   }
1166 }
1167
1168 /// An interface callable by the application.
1169 void registerControlPointTiming(double time){
1170   CkAssert(CkMyPe() == 0);
1171 #if DEBUGPRINT>0
1172   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1173 #endif
1174   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1175 }
1176
1177 /// An interface callable by the application.
1178 void controlPointTimingStamp() {
1179   CkAssert(CkMyPe() == 0);
1180 #if DEBUGPRINT>0
1181   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1182 #endif
1183   
1184   static double prev_time = 0.0;
1185   double t = CmiWallTimer();
1186   double duration = t - prev_time;
1187   prev_time = t;
1188     
1189   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1190 }
1191
1192 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1193 {
1194   controlPointTimingStamp();
1195 }
1196
1197
1198
1199
1200
1201 /// Shutdown the control point framework, writing data to disk if necessary
1202 extern "C" void controlPointShutdown(){
1203   if(CkMyPe() == 0){
1204
1205     // wait for gathering of idle time & memory usage to complete
1206     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1207
1208   }
1209 }
1210
1211 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1212 void controlPointInitNode(){
1213 //  CkPrintf("controlPointInitNode()\n");
1214   registerExitFn(controlPointShutdown);
1215 }
1216
1217 /// Called periodically to allow control point framework to do things periodically
1218 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1219 #ifdef DEBUGPRINT
1220   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1221 #endif
1222   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1223   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1224 }
1225
1226
1227
1228
1229
1230 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1231 /// user observed characteristic to adapt specific control point values.
1232 /// This function must return valid values for newControlPoints.
1233 void controlPointManager::generatePlan() {
1234   const double startGenerateTime = CmiWallTimer();
1235   const int phase_id = this->phase_id;
1236   const int effective_phase = allData.phases.size();
1237
1238   // Only generate a plan once per phase
1239   if(generatedPlanForStep == phase_id) 
1240     return;
1241   generatedPlanForStep = phase_id;
1242  
1243   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1244   printTuningScheme();
1245
1246   // By default lets put the previous phase data into newControlPoints
1247   instrumentedPhase *prevPhase = previousPhaseData();
1248   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1249           newControlPoints[cpsIter->first] = cpsIter->second;
1250   }
1251
1252
1253   if( whichTuningScheme == RandomSelection ){
1254     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1255     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1256       const std::string &name = cpsIter->first;
1257       const std::pair<int,int> &bounds = cpsIter->second;
1258       const int lb = bounds.first;
1259       const int ub = bounds.second;
1260       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1261     }
1262   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1263     // -----------------------------------------------------------
1264     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1265     //   
1266     // This scheme will return the median value for the range 
1267     // early on, and then will transition over to the new control points
1268     // determined by the critical path adapting code
1269
1270     // This won't work until the periodic function is fixed up a bit
1271
1272     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1273     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1274       const std::string &name = cpsIter->first;
1275       const std::pair<int,int> &bounds = cpsIter->second;
1276       const int lb = bounds.first;
1277       const int ub = bounds.second;
1278       newControlPoints[name] =  (lb+ub)/2;
1279     }
1280
1281   } else if ( whichTuningScheme == MemoryAware ) {
1282
1283     // -----------------------------------------------------------
1284     //  STEERING BASED ON MEMORY USAGE
1285
1286     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1287     instrumentedPhase *prevPhase = previousPhaseData();
1288  
1289     if(phase_id%2 == 0){
1290       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1291       twoAgoPhase->print();
1292       CkPrintf("\n");
1293       fflush(stdout);
1294       
1295       // See if memory usage is low:
1296       double memUsage = twoAgoPhase->memoryUsageMB;
1297       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1298       fflush(stdout);
1299       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1300         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1301         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1302         
1303         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1304         newControlPoints = twoAgoPhase->controlPoints;
1305
1306
1307         CkPrintf("Steering (memory based) initialized plan\n");
1308         fflush(stdout);
1309
1310         // look for a possible control point knob to turn
1311         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1312         
1313         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1314         bool found = false;
1315         std::string cpName;
1316         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1317         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1318         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1319           cpName = iter->first;
1320           info = &iter->second;
1321           found = true;
1322           break;
1323         }
1324
1325         // Adapt the control point value
1326         if(found){
1327           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1328           fflush(stdout);
1329           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1330           const int maxValue = controlPointSpace[cpName].second;
1331           
1332           if(twoAgoValue+1 <= maxValue){
1333             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1334           }
1335         }
1336         
1337       }
1338     }
1339
1340     CkPrintf("Steering (memory based) done for this phase\n");
1341     fflush(stdout);
1342
1343   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1344
1345     // -----------------------------------------------------------
1346     //  USE BEST KNOWN TIME  
1347     static bool firstTime = true;
1348     if(firstTime){
1349       firstTime = false;
1350       instrumentedPhase *best = allData.findBest(); 
1351       CkPrintf("Best known phase is:\n");
1352       best->print();
1353       CkPrintf("\n");
1354       
1355       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1356       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1357         const std::string &name = cpsIter->first;
1358         newControlPoints[name] =  best->controlPoints[name];
1359       }
1360     }
1361   } else if( whichTuningScheme == LDBPeriod) {
1362     // Assume this is used in this manner:
1363     //  1) go to next phase
1364     //  2) request control point
1365     //  3) load balancing
1366     //  4) computation
1367     
1368     
1369     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1370     instrumentedPhase *prevPhase = previousPhaseData();
1371     
1372     
1373     const std::vector<double> &times = twoAgoPhase->times;
1374     const int oldNumTimings = times.size();
1375
1376
1377     const std::vector<double> &timesNew = prevPhase->times;
1378     const int newNumTimings = timesNew.size();
1379
1380
1381     if(oldNumTimings > 4 && newNumTimings > 4){
1382       
1383       // Build model of execution time based on two phases ago
1384       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1385       
1386       double oldSum = 0;
1387       
1388       for(int i=2; i<oldNumTimings; i++){
1389         oldSum += times[i];
1390       }
1391       
1392       const double oldAvg = oldSum / (oldNumTimings-2);
1393       
1394       
1395       
1396       
1397       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1398       const double expectedTotalTime = oldAvg * newNumTimings;
1399       
1400       
1401       // Measure actual time
1402       double newSum = 0.0;
1403       for(int i=2; i<newNumTimings; ++i){
1404         newSum += timesNew[i];
1405       }
1406       
1407       const double newAvg = newSum / (newNumTimings-2);
1408       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1409       
1410       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1411       
1412       // Determine load balance cost
1413       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1414       
1415       const double benefitAfterLB = benefit - lbcost;
1416     
1417     
1418       // Determine whether LB cost outweights the estimated benefit
1419       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1420     
1421     
1422       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1423       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1424         const std::string &name = cpsIter->first;
1425         const std::pair<int,int> &bounds = cpsIter->second;
1426         const int lb = bounds.first;
1427         const int ub = bounds.second;
1428       
1429         if(benefitAfterLB > 0){
1430           CkPrintf("Constant Model: Beneficial LB\n");
1431           int newval = newControlPoints[name] / 2;
1432           if(newval > lb)
1433             newControlPoints[name] = newval;
1434           else 
1435             newControlPoints[name] = lb;
1436         } else {
1437           CkPrintf("Constant Model: Detrimental LB\n");
1438           int newval = newControlPoints[name] * 2;
1439           if(newval < ub)
1440             newControlPoints[name] = newval;
1441           else
1442             newControlPoints[name] = ub;
1443         }
1444       }
1445     }
1446     
1447     
1448   }  else if( whichTuningScheme == LDBPeriodLinear) {
1449     // Assume this is used in this manner:
1450     //  1) go to next phase
1451     //  2) request control point
1452     //  3) load balancing
1453     //  4) computation
1454
1455
1456     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1457     instrumentedPhase *prevPhase = previousPhaseData();
1458     
1459     const std::vector<double> &times = twoAgoPhase->times;
1460     const int oldNumTimings = times.size();
1461
1462     const std::vector<double> &timesNew = prevPhase->times;
1463     const int newNumTimings = timesNew.size();
1464     
1465
1466     if(oldNumTimings > 4 && newNumTimings > 4){
1467
1468       // Build model of execution time based on two phases ago
1469       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1470       const int b1 = 2 + (oldNumTimings-2)/2;
1471       double s1 = 0;
1472       double s2 = 0;
1473     
1474       const double ldbStepsTime = times[0] + times[1];
1475     
1476       for(int i=2; i<b1; i++){
1477         s1 += times[i];
1478       }
1479       for(int i=b1; i<oldNumTimings; i++){
1480         s2 += times[i];
1481       }
1482       
1483       
1484       // Compute the estimated time for the last phase's data
1485     
1486       const double a1 = s1 / (double)(b1-2);
1487       const double a2 = s2 / (double)(oldNumTimings-b1);
1488       const double aold = (a1+a2)/2.0;
1489
1490       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1491         
1492     
1493       // Measure actual time
1494       double sum = 0.0;
1495       for(int i=2; i<newNumTimings; ++i){
1496         sum += timesNew[i];
1497       }
1498
1499       const double avg = sum / ((double)(newNumTimings-2));
1500       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1501
1502       const double benefit = expectedTotalTime - actualTotalTime;
1503
1504       // Determine load balance cost
1505       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1506
1507       const double benefitAfterLB = benefit - lbcost;
1508
1509     
1510       // Determine whether LB cost outweights the estimated benefit
1511       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1512     
1513     
1514     
1515       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1516       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1517         const std::string &name = cpsIter->first;
1518         const std::pair<int,int> &bounds = cpsIter->second;
1519         const int lb = bounds.first;
1520         const int ub = bounds.second;
1521       
1522         if(benefitAfterLB > 0){
1523           CkPrintf("Linear Model: Beneficial LB\n");
1524           int newval = newControlPoints[name] / 2;
1525           if(newval > lb)
1526             newControlPoints[name] = newval;
1527           else 
1528             newControlPoints[name] = lb;
1529         } else {
1530           CkPrintf("Linear Model: Detrimental LB\n");
1531           int newval = newControlPoints[name] * 2;
1532           if(newval < ub)
1533             newControlPoints[name] = newval;
1534           else 
1535             newControlPoints[name] = ub;
1536         }
1537       }
1538     }
1539
1540   }
1541
1542   else if( whichTuningScheme == LDBPeriodQuadratic) {
1543     // Assume this is used in this manner:
1544     //  1) go to next phase
1545     //  2) request control point
1546     //  3) load balancing
1547     //  4) computation
1548
1549
1550     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1551     instrumentedPhase *prevPhase = previousPhaseData();
1552         
1553     const std::vector<double> &times = twoAgoPhase->times;
1554     const int oldNumTimings = times.size();
1555
1556     const std::vector<double> &timesNew = prevPhase->times;
1557     const int newNumTimings = timesNew.size();
1558
1559     
1560     if(oldNumTimings > 4 && newNumTimings > 4){
1561
1562
1563       // Build model of execution time based on two phases ago
1564       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1565       const int b1 = 2 + (oldNumTimings-2)/3;
1566       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1567       double s1 = 0;
1568       double s2 = 0;
1569       double s3 = 0;
1570
1571       const double ldbStepsTime = times[0] + times[1];
1572     
1573       for(int i=2; i<b1; i++){
1574         s1 += times[i];
1575       }
1576       for(int i=b1; i<b2; i++){
1577         s2 += times[i];
1578       }
1579       for(int i=b2; i<oldNumTimings; i++){
1580         s3 += times[i];
1581       }
1582
1583     
1584       // Compute the estimated time for the last phase's data
1585     
1586       const double a1 = s1 / (double)(b1-2);
1587       const double a2 = s2 / (double)(b2-b1);
1588       const double a3 = s3 / (double)(oldNumTimings-b2);
1589     
1590       const double a = (a1-2.0*a2+a3)/2.0;
1591       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1592       const double c = a3;
1593     
1594       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1595       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1596       const double x2 = 0.5;
1597       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1598    
1599     
1600       // Measure actual time
1601       double sum = 0.0;
1602       for(int i=2; i<newNumTimings; ++i){
1603         sum += timesNew[i];
1604       }
1605
1606       const double avg = sum / ((double)(newNumTimings-2));
1607       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1608
1609       const double benefit = expectedTotalTime - actualTotalTime;
1610
1611       // Determine load balance cost
1612       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1613
1614       const double benefitAfterLB = benefit - lbcost;
1615
1616     
1617       // Determine whether LB cost outweights the estimated benefit
1618       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1619     
1620     
1621     
1622       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1623       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1624         const std::string &name = cpsIter->first;
1625         const std::pair<int,int> &bounds = cpsIter->second;
1626         const int lb = bounds.first;
1627         const int ub = bounds.second;
1628       
1629         if(benefitAfterLB > 0){
1630           CkPrintf("QuadraticModel: Beneficial LB\n");
1631           int newval = newControlPoints[name] / 2;
1632           if(newval > lb)
1633             newControlPoints[name] = newval;
1634           else 
1635             newControlPoints[name] = lb;
1636         } else {
1637           CkPrintf("QuadraticModel: Detrimental LB\n");
1638           int newval = newControlPoints[name] * 2;
1639           if(newval < ub)
1640             newControlPoints[name] = newval;
1641           else 
1642             newControlPoints[name] = ub;
1643         }
1644       
1645       }
1646     }
1647     
1648   } else if ( whichTuningScheme == UseSteering ) {
1649           // -----------------------------------------------------------
1650           //  STEERING BASED ON KNOWLEDGE
1651
1652           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1653           // plans are only generated after 3 phases
1654
1655           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1656           instrumentedPhase *prevPhase = previousPhaseData();
1657
1658           if(phase_id%4 == 0){
1659                   CkPrintf("Steering based on 2 phases ago:\n");
1660                   twoAgoPhase->print();
1661                   CkPrintf("\n");
1662                   fflush(stdout);
1663
1664                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1665
1666
1667                   // ========================================= Concurrency =============================================
1668                   // See if idle time is high:
1669                   {
1670                           double idleTime = twoAgoPhase->idleTime.avg;
1671                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1672                           fflush(stdout);
1673                           if(idleTime > 0.10){
1674                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1675                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1676
1677                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1678
1679                                   bool found = false;
1680                                   std::string cpName;
1681                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1682                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1683                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1684                                           cpName = iter->first;
1685                                           info = &iter->second;
1686
1687                                           // Initialize a new plan based on two phases ago
1688                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1689
1690                                           CkPrintf("Steering found knob to turn\n");
1691                                           fflush(stdout);
1692
1693                                           if(info->first == ControlPoint::EFF_INC){
1694                                                   const int maxValue = controlPointSpace[cpName].second;
1695                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1696                                                   if(twoAgoValue+1 <= maxValue){
1697                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1698                                                   }
1699                                           } else {
1700                                                   const int minValue = controlPointSpace[cpName].second;
1701                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1702                                                   if(twoAgoValue-1 >= minValue){
1703                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1704                                                   }
1705                                           }
1706
1707                                           possibleNextStepPlans.push_back(aNewPlan);
1708
1709                                   }
1710                           }
1711                   }
1712
1713                   // ========================================= Grain Size =============================================
1714                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1715                   {
1716                           double overheadTime = twoAgoPhase->overheadTime.avg;
1717                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1718                           fflush(stdout);
1719                           if(overheadTime > 0.10){
1720                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1721                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1722
1723                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1724
1725                                   bool found = false;
1726                                   std::string cpName;
1727                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1728                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1729                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1730                                           cpName = iter->first;
1731                                           info = &iter->second;
1732
1733                                           // Initialize a new plan based on two phases ago
1734                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1735
1736
1737
1738                                           CkPrintf("Steering found knob to turn\n");
1739                                           fflush(stdout);
1740
1741                                           if(info->first == ControlPoint::EFF_INC){
1742                                                   const int maxValue = controlPointSpace[cpName].second;
1743                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1744                                                   if(twoAgoValue+1 <= maxValue){
1745                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1746                                                   }
1747                                           } else {
1748                                                   const int minValue = controlPointSpace[cpName].second;
1749                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1750                                                   if(twoAgoValue-1 >= minValue){
1751                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1752                                                   }
1753                                           }
1754
1755                                           possibleNextStepPlans.push_back(aNewPlan);
1756
1757                                   }
1758
1759                           }
1760                   }
1761                   // ========================================= GPU Offload =============================================
1762                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1763                   {
1764                           double idleTime = twoAgoPhase->idleTime.avg;
1765                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1766                           fflush(stdout);
1767                           if(idleTime > 0.10){
1768                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1769                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1770
1771                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1772
1773                                   bool found = false;
1774                                   std::string cpName;
1775                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1776                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1777                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1778                                           cpName = iter->first;
1779                                           info = &iter->second;
1780
1781                                           // Initialize a new plan based on two phases ago
1782                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1783
1784
1785                                           CkPrintf("Steering found knob to turn\n");
1786                                           fflush(stdout);
1787
1788                                           if(info->first == ControlPoint::EFF_DEC){
1789                                                   const int maxValue = controlPointSpace[cpName].second;
1790                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1791                                                   if(twoAgoValue+1 <= maxValue){
1792                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1793                                                   }
1794                                           } else {
1795                                                   const int minValue = controlPointSpace[cpName].second;
1796                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1797                                                   if(twoAgoValue-1 >= minValue){
1798                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1799                                                   }
1800                                           }
1801
1802                                           possibleNextStepPlans.push_back(aNewPlan);
1803
1804                                   }
1805
1806                           }
1807                   }
1808
1809                   // ========================================= Done =============================================
1810
1811
1812                   if(possibleNextStepPlans.size() > 0){
1813                           newControlPoints = possibleNextStepPlans[0];
1814                   }
1815
1816
1817                   CkPrintf("Steering done for this phase\n");
1818                   fflush(stdout);
1819
1820           }  else {
1821                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1822                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1823                   fflush(stdout);
1824           }
1825
1826
1827
1828   } else if( whichTuningScheme == SimulatedAnnealing ) {
1829
1830     // -----------------------------------------------------------
1831     //  SIMULATED ANNEALING
1832     //  Simulated Annealing style hill climbing method
1833     //
1834     //  Find the best search space configuration, and try something
1835     //  nearby it, with a radius decreasing as phases increase
1836
1837     CkPrintf("Finding best phase\n");
1838     instrumentedPhase *bestPhase = allData.findBest();  
1839     CkPrintf("best found:\n"); 
1840     bestPhase->print(); 
1841     CkPrintf("\n"); 
1842     
1843
1844     const int convergeByPhase = 100;
1845     // Determine from 0.0 to 1.0 how far along we are
1846     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1847
1848     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1849     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1850       const std::string &name = cpsIter->first;
1851       const std::pair<int,int> &bounds = cpsIter->second;
1852       const int minValue = bounds.first;
1853       const int maxValue = bounds.second;
1854       
1855       const int before = bestPhase->controlPoints[name];   
1856   
1857       const int range = (maxValue-minValue+1)*(1.0-progress);
1858
1859       int high = min(before+range, maxValue);
1860       int low = max(before-range, minValue);
1861       
1862       newControlPoints[name] = low;
1863       if(high-low > 0){
1864         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1865       } 
1866       
1867     }
1868
1869   } else if ( whichTuningScheme == DivideAndConquer ) {
1870
1871           // -----------------------------------------------------------
1872           //  STEERING FOR Divide & Conquer Programs
1873           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1874           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1875           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1876
1877           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1878           // plans are only generated after 3 phases
1879
1880           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1881           instrumentedPhase *prevPhase = previousPhaseData();
1882
1883           if(phase_id%4 == 0){
1884                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1885                   twoAgoPhase->print();
1886                   CkPrintf("\n");
1887                   fflush(stdout);
1888
1889                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1890
1891
1892                   // ========================================= Concurrency =============================================
1893                   // See if idle time is high:
1894                   {
1895                           double idleTime = twoAgoPhase->idleTime.avg;
1896                           double overheadTime = twoAgoPhase->overheadTime.avg;
1897
1898
1899                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1900                           fflush(stdout);
1901                           if(idleTime+overheadTime > 0.10){
1902                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1903                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1904
1905                                   int direction = -1;
1906                                   if (idleTime>overheadTime){
1907                                           // We need to decrease the grain size, or increase the available concurrency
1908                                           direction = 1;
1909                                   }
1910
1911                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1912
1913                                   bool found = false;
1914                                   std::string cpName;
1915                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1916                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1917                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1918                                           cpName = iter->first;
1919                                           info = &iter->second;
1920                                           
1921                                           // Initialize a new plan based on two phases ago
1922                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1923                                           bool newPlanExists = false;
1924
1925                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1926                                           fflush(stdout);
1927
1928
1929
1930                                           if(info->first == ControlPoint::EFF_INC){
1931                                                   const int minValue = controlPointSpace[cpName].first;
1932                                                   const int maxValue = controlPointSpace[cpName].second;
1933                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1934                                                   const int newVal = twoAgoValue+1*direction;
1935                                                   if(newVal <= maxValue && newVal >= minValue){
1936                                                     aNewPlan[cpName] = newVal;
1937                                                     newPlanExists = true;
1938                                                   } else {
1939                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1940                                                   }
1941                                           } else {
1942                                                   const int minValue = controlPointSpace[cpName].first;
1943                                                   const int maxValue = controlPointSpace[cpName].second;
1944                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1945                                                   const int newVal = twoAgoValue-1*direction;
1946                                                   if(newVal <= maxValue && newVal >= minValue){
1947                                                           aNewPlan[cpName] = newVal;
1948                                                           newPlanExists = true;
1949                                                   } else {
1950                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1951                                                   }
1952                                           }
1953
1954                                           if(newPlanExists){
1955                                             possibleNextStepPlans.push_back(aNewPlan);
1956                                           }
1957                                   }
1958                           }
1959                   }
1960
1961                   if(possibleNextStepPlans.size() > 0){
1962                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1963                     newControlPoints = possibleNextStepPlans[0];
1964                   } else {
1965                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1966                   }
1967           }
1968
1969   } else if( whichTuningScheme == Simplex ) {
1970
1971           // -----------------------------------------------------------
1972           //  Nelder Mead Simplex Algorithm
1973           //
1974           //  A scheme that takes a simplex (n+1 points) and moves it
1975           //  toward the minimum, eventually converging there.
1976
1977           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1978
1979   } else if( whichTuningScheme == ExhaustiveSearch ) {
1980     
1981     // -----------------------------------------------------------
1982     // EXHAUSTIVE SEARCH
1983    
1984     int numDimensions = controlPointSpace.size();
1985     CkAssert(numDimensions > 0);
1986   
1987     vector<int> lowerBounds(numDimensions);
1988     vector<int> upperBounds(numDimensions); 
1989   
1990     int d=0;
1991     std::map<std::string, pair<int,int> >::iterator iter;
1992     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1993       //    CkPrintf("Examining dimension %d\n", d);
1994       lowerBounds[d] = iter->second.first;
1995       upperBounds[d] = iter->second.second;
1996       d++;
1997     }
1998    
1999     // get names for each dimension (control point)
2000     vector<std::string> names(numDimensions);
2001     d=0;
2002     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2003       names[d] = niter->first;
2004       d++;
2005     }
2006   
2007   
2008     // Create the first possible configuration
2009     vector<int> config = lowerBounds;
2010     config.push_back(0);
2011   
2012     // Increment until finding an unused configuration
2013     allData.cleanupNames(); // put -1 values in for any control points missing
2014     std::vector<instrumentedPhase*> &phases = allData.phases;     
2015
2016     while(true){
2017     
2018       std::vector<instrumentedPhase*>::iterator piter; 
2019       bool testedConfiguration = false; 
2020       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2021       
2022         // Test if the configuration matches this phase
2023         bool match = true;
2024         for(int j=0;j<numDimensions;j++){
2025           match &= (*piter)->controlPoints[names[j]] == config[j];
2026         }
2027       
2028         if(match){
2029           testedConfiguration = true; 
2030           break;
2031         } 
2032       } 
2033     
2034       if(testedConfiguration == false){ 
2035         break;
2036       } 
2037     
2038       // go to next configuration
2039       config[0] ++;
2040       // Propagate "carrys"
2041       for(int i=0;i<numDimensions;i++){
2042         if(config[i] > upperBounds[i]){
2043           config[i+1]++;
2044           config[i] = lowerBounds[i];
2045         }
2046       }
2047       // check if we have exhausted all possible values
2048       if(config[numDimensions] > 0){
2049         break;
2050       }
2051        
2052     }
2053   
2054     if(config[numDimensions] > 0){
2055       for(int i=0;i<numDimensions;i++){ 
2056         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2057       }
2058     }
2059
2060     // results are now in config[i]
2061     
2062     for(int i=0; i<numDimensions; i++){
2063       newControlPoints[names[i]] = config[i];
2064       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2065     }
2066
2067
2068   } else {
2069     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2070   }
2071
2072
2073   const double endGenerateTime = CmiWallTimer();
2074   
2075   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2076   
2077 }
2078
2079
2080
2081
2082 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2083
2084
2085 /// Get control point value from range of integers [lb,ub]
2086 int controlPoint(const char *name, int lb, int ub){
2087   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2088   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2089   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2090   int result;
2091
2092   // if we already have control point values for phase, return them
2093   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2094     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2095     return thisPhaseData->controlPoints[std::string(name)];
2096   }
2097   
2098
2099   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2100     // For the first few phases, just use the lower bound, or the default if one was provided 
2101     // This ensures that the ranges for all the control points are known before we do anything fancy
2102     result = lb;
2103
2104     if(defaultControlPointValues.count(std::string(name)) > 0){
2105       int v = defaultControlPointValues[std::string(name)];
2106       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2107       result = v;
2108     }
2109
2110   } else if(controlPointSpace.count(std::string(name)) == 0){
2111     // if this is the first time we've seen the CP, then return the lower bound
2112     result = lb;
2113     
2114   }  else {
2115     // Generate a plan one time for each phase
2116     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2117     
2118     // Use the planned value:
2119     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2120     
2121   }
2122
2123   if(!isInRange(result,ub,lb)){
2124     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2125     fflush(stdout);
2126     fflush(stderr);
2127   }
2128   CkAssert(isInRange(result,ub,lb));
2129   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2130
2131   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2132
2133   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2134   //  thisPhaseData->print();
2135   
2136   return result;
2137 }
2138
2139
2140 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2141   CkAssert(sizeof(lb) == 4);
2142   CkAssert(CkMyPe() == 0);
2143   return controlPoint("FortranCP", *lb, *ub);
2144 }
2145
2146
2147
2148
2149 /// Inform the control point framework that a named control point affects the priorities of some array  
2150 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2151 //   CkGroupID aid = arraybase.ckGetArrayID();
2152 //   int groupIdx = aid.idx;
2153 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2154 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2155 // }
2156
2157
2158 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2159 // void controlPointPriorityEntry(const char *name, int idx){
2160 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2161 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2162 // }
2163
2164
2165
2166
2167
2168 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2169
2170     This function decomposes the algorithm into a state machine that allows it to
2171     evaluate one or more configurations through subsequent clls to this function.
2172     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2173
2174     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2175     so a list of them will be created and they will be evaluated, one per call.
2176
2177     Currently there is no stopping criteria, but the simplex ought to contract down
2178     to a few close configurations, and hence not much change will happen after this 
2179     point.
2180
2181  */
2182 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2183
2184         if(useBestKnown){
2185                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2186                 return;
2187         }
2188
2189
2190         if(firstSimplexPhase< 0){
2191                 firstSimplexPhase = allData.phases.size()-1;
2192                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2193         }
2194
2195         int n = controlPointSpace.size();
2196
2197         CkAssert(n>=2);
2198
2199
2200         if(simplexState == beginning){
2201                 // First we evaluate n+1 random points, then we go to a different state
2202                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2203                         CkPrintf("Simplex Tuning: chose random configuration\n");
2204
2205                         // Choose random values from the middle third of the range in each dimension
2206                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2207                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2208                                 const std::string &name = cpsIter->first;
2209                                 const std::pair<int,int> &bounds = cpsIter->second;
2210                                 const int lb = bounds.first;
2211                                 const int ub = bounds.second;
2212                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2213                         }
2214                 } else {
2215                         // Set initial simplex:
2216                         for(int i=0; i<n+1; i++){
2217                                 simplexIndices.insert(firstSimplexPhase+i);
2218                         }
2219                         CkAssert(simplexIndices.size() == n+1);
2220
2221                         // Transition to reflecting state
2222                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2223
2224                 }
2225
2226         } else if (simplexState == reflecting){
2227                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2228                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2229
2230                 // Find the highest time from other points in the simplex
2231                 double highestTimeForOthersInSimplex = 0.0;
2232                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2233                         double t = allData.phases[*iter]->medianTime();
2234                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2235                                 highestTimeForOthersInSimplex = t;
2236                         }
2237                 }
2238
2239                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2240
2241                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2242                         // if y* < yl,  transition to "expanding"
2243                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2244
2245                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2246                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2247                         CkAssert(simplexIndices.size() == n+1);
2248                         simplexIndices.erase(worstPhase);
2249                         CkAssert(simplexIndices.size() == n);
2250                         simplexIndices.insert(pPhase);
2251                         CkAssert(simplexIndices.size() == n+1);
2252                         // Transition to reflecting state
2253                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2254
2255                 } else {
2256                         // if y* > yh
2257                         if(recentPhaseTime <= worstTime){
2258                                 // replace Ph with P*
2259                                 CkAssert(simplexIndices.size() == n+1);
2260                                 simplexIndices.erase(worstPhase);
2261                                 CkAssert(simplexIndices.size() == n);
2262                                 simplexIndices.insert(pPhase);
2263                                 CkAssert(simplexIndices.size() == n+1);
2264                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2265                                 worstPhase = pPhase;
2266                                 // Just as a sanity check, make sure we don't use the non-updated values.
2267                                 worst.clear();
2268                         }
2269
2270                         // Now, form P** and do contracting phase
2271                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2272
2273                 }
2274
2275         } else if (simplexState == doneExpanding){
2276                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2277                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2278                 // A new configuration has been evaluated
2279
2280                 // Check to see if y** < y1
2281                 if(recentPhaseTime < bestTime){
2282                         // replace Ph by P**
2283                         CkAssert(simplexIndices.size() == n+1);
2284                         simplexIndices.erase(worstPhase);
2285                         CkAssert(simplexIndices.size() == n);
2286                         simplexIndices.insert(p2Phase);
2287                         CkAssert(simplexIndices.size() == n+1);
2288                 } else {
2289                         //      replace Ph by P*
2290                         CkAssert(simplexIndices.size() == n+1);
2291                         simplexIndices.erase(worstPhase);
2292                         CkAssert(simplexIndices.size() == n);
2293                         simplexIndices.insert(pPhase);
2294                         CkAssert(simplexIndices.size() == n+1);
2295                 }
2296
2297                 // Transition to reflecting state
2298                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2299
2300         }  else if (simplexState == contracting){
2301                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2302                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2303                 // A new configuration has been evaluated
2304
2305                 // Check to see if y** > yh
2306                 if(recentPhaseTime <= worstTime){
2307                         // replace Ph by P**
2308                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2309                         CkAssert(simplexIndices.size() == n+1);
2310                         simplexIndices.erase(worstPhase);
2311                         CkAssert(simplexIndices.size() == n);
2312                         simplexIndices.insert(p2Phase);
2313                         CkAssert(simplexIndices.size() == n+1);
2314                         // Transition to reflecting state
2315                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2316
2317                 } else {
2318                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2319                         simplexState = stillContracting;
2320
2321                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2322                         stillMustContractList = simplexIndices;
2323
2324                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2325                 }
2326
2327         } else if (simplexState == stillContracting){
2328                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2329
2330                 if(stillMustContractList.size()>0){
2331                         int c = *stillMustContractList.begin();
2332                         stillMustContractList.erase(c);
2333                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2334
2335                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2336
2337                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2338                         int v = 0;
2339                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2340                                 const std::string &name = cpsIter->first;
2341                                 const std::pair<int,int> &bounds = cpsIter->second;
2342                                 const int lb = bounds.first;
2343                                 const int ub = bounds.second;
2344
2345                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2346
2347                                 newControlPoints[name] = keepInRange(val,lb,ub);
2348                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2349                                 v++;
2350                         }
2351                 } else {
2352                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2353                         CkAssert(stillMustContractList.size() == 0);
2354                         simplexIndices.clear();
2355                         CkAssert(simplexIndices.size()==0);
2356                         for(int i=0; i<n+1; i++){
2357                                 simplexIndices.insert(allData.phases.size()-2-i);
2358                         }
2359                         CkAssert(simplexIndices.size()==n+1);
2360
2361                         // Transition to reflecting state
2362                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2363
2364                 }
2365
2366
2367         } else if (simplexState == expanding){
2368                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2369                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2370                 // A new configuration has been evaluated
2371
2372                 // determine if y** < yl
2373                 if(recentPhaseTime < bestTime){
2374                         // replace Ph by P**
2375                         CkAssert(simplexIndices.size() == n+1);
2376                         simplexIndices.erase(worstPhase);
2377                         CkAssert(simplexIndices.size() == n);
2378                         simplexIndices.insert(p2Phase);
2379                         CkAssert(simplexIndices.size() == n+1);
2380                         // Transition to reflecting state
2381                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2382
2383                 } else {
2384                         // else, replace ph with p*
2385                         CkAssert(simplexIndices.size() == n+1);
2386                         simplexIndices.erase(worstPhase);
2387                         CkAssert(simplexIndices.size() == n);
2388                         simplexIndices.insert(pPhase);
2389                         CkAssert(simplexIndices.size() == n+1);
2390                         // Transition to reflecting state
2391                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2392                 }
2393
2394
2395         } else {
2396                 CkAbort("Unknown simplexState");
2397         }
2398
2399 }
2400
2401
2402
2403 /** Replace the worst point with its reflection across the centroid. */
2404 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2405
2406         int n = controlPointSpace.size();
2407
2408         printSimplex(allData);
2409
2410         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2411
2412
2413         // Quit if the diameter of our simplex is small
2414         double maxr = 0.0;
2415         for(int i=0; i<n+1; i++){
2416                 //              Compute r^2 of this simplex point from the centroid
2417                 double r2 = 0.0;
2418                 std::vector<double> p = pointCoords(allData, i);
2419                 for(int d=0; d<p.size(); d++){
2420                         double r1 = (p[d] * centroid[d]);
2421                         r2 += r1*r1;
2422                 }
2423                 if(r2 > maxr)
2424                         maxr = r2;
2425         }
2426
2427 #if 0
2428         // At some point we quit this tuning
2429         if(maxr < 10){
2430                 useBestKnown = true;
2431                 instrumentedPhase *best = allData.findBest();
2432                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2433
2434                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2435                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2436                         const std::string &name = cpsIter->first;
2437                         newControlPoints[name] =  best->controlPoints[name];
2438                 }
2439         }
2440 #endif
2441
2442         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2443
2444         pPhase = allData.phases.size()-1;
2445         P.resize(n);
2446         for(int i=0; i<n; i++){
2447                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2448         }
2449
2450         for(int i=0; i<P.size(); i++){
2451                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2452         }
2453
2454
2455         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2456         int v = 0;
2457         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2458                 const std::string &name = cpsIter->first;
2459                 const std::pair<int,int> &bounds = cpsIter->second;
2460                 const int lb = bounds.first;
2461                 const int ub = bounds.second;
2462                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2463                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2464                 v++;
2465         }
2466
2467
2468         // Transition to "reflecting" state
2469         simplexState = reflecting;
2470         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2471
2472 }
2473
2474
2475
2476
2477 /** Replace the newly tested reflection with a further expanded version of itself. */
2478 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2479         int n = controlPointSpace.size();
2480         printSimplex(allData);
2481
2482         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2483         // I believe the equation for P** in the text on page 308 is correct.
2484
2485         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2486
2487
2488         p2Phase = allData.phases.size()-1;
2489         P2.resize(n);
2490         for(int i=0; i<n; i++){
2491                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2492         }
2493
2494         for(int i=0; i<P2.size(); i++){
2495                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2496         }
2497
2498
2499         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2500         int v = 0;
2501         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2502                 const std::string &name = cpsIter->first;
2503                 const std::pair<int,int> &bounds = cpsIter->second;
2504                 const int lb = bounds.first;
2505                 const int ub = bounds.second;
2506                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2507                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2508                 v++;
2509         }
2510
2511
2512         // Transition to "doneExpanding" state
2513         simplexState = doneExpanding;
2514         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2515
2516 }
2517
2518
2519
2520
2521 /** Replace the newly tested reflection with a further expanded version of itself. */
2522 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2523         int n = controlPointSpace.size();
2524         printSimplex(allData);
2525
2526         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2527
2528
2529         p2Phase = allData.phases.size()-1;
2530         P2.resize(n);
2531         for(int i=0; i<n; i++){
2532                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2533         }
2534
2535         for(int i=0; i<P2.size(); i++){
2536                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2537         }
2538
2539
2540         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2541         int v = 0;
2542         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2543                 const std::string &name = cpsIter->first;
2544                 const std::pair<int,int> &bounds = cpsIter->second;
2545                 const int lb = bounds.first;
2546                 const int ub = bounds.second;
2547                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2548                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2549                 v++;
2550         }
2551         
2552         
2553         // Transition to "contracting" state
2554         simplexState = contracting;
2555         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2556
2557 }
2558
2559
2560
2561
2562 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2563         int n = controlPointSpace.size();
2564
2565         // Find worst performing point in the simplex
2566         worstPhase = -1;
2567         worstTime = -1.0;
2568         bestPhase = 10000000;
2569         bestTime = 10000000;
2570         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2571                 double t = allData.phases[*iter]->medianTime();
2572                 if(t > worstTime){
2573                         worstTime = t;
2574                         worstPhase = *iter;
2575                 }
2576                 if(t < bestTime){
2577                         bestTime = t;
2578                         bestPhase = *iter;
2579                 }
2580         }
2581         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2582
2583         best = pointCoords(allData, bestPhase);
2584         CkAssert(best.size() == n);
2585
2586         worst = pointCoords(allData, worstPhase);
2587         CkAssert(worst.size() == n);
2588
2589         // Calculate centroid of the remaining points in the simplex
2590         centroid.resize(n);
2591         for(int i=0; i<n; i++){
2592                 centroid[i] = 0.0;
2593         }
2594
2595         int numPts = 0;
2596
2597         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2598                 if(*iter != worstPhase){
2599                         numPts ++;
2600                         // Accumulate into the result vector
2601                         int c = 0;
2602                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2603                                 centroid[c] += citer->second;
2604                                 c++;
2605                         }
2606
2607                 }
2608         }
2609
2610         // Now divide the sums by the number of points.
2611         for(int v = 0; v<centroid.size(); v++) {
2612                 centroid[v] /= (double)numPts;
2613         }
2614
2615         CkAssert(centroid.size() == n);
2616
2617         for(int i=0; i<centroid.size(); i++){
2618                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2619         }
2620
2621
2622 }
2623
2624
2625
2626 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2627         std::vector<double> result;
2628         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2629                 result.push_back((double)citer->second);
2630         }
2631         return result;
2632 }
2633
2634
2635
2636
2637 void ControlPointWriteOutputToDisk(){
2638         CkAssert(CkMyPe() == 0);
2639         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2640 }
2641
2642
2643
2644 /*! @} */
2645
2646 #ifdef CP_DISABLE_TRACING
2647 #include "ControlPointsNoTrace.def.h"
2648 #else
2649 #include "ControlPoints.def.h"
2650 #endif