Adding multiple predictor models for use in tuning load balancing period.
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15
16 #include <climits>
17 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
18  
19
20 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
21 /**  @{ */
22
23 using namespace std;
24
25 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
26
27
28 //#undef DEBUGPRINT
29 //#define DEBUGPRINT 4
30
31 static void periodicProcessControlPoints(void* ptr, double currWallTime);
32
33
34 // A pointer to this PE's controlpoint manager Proxy
35 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
36 /* readonly */ int random_seed;
37 /* readonly */ long controlPointSamplePeriod;
38 /* readonly */ int whichTuningScheme;
39 /* readonly */ bool writeDataFileAtShutdown;
40 /* readonly */ bool shouldFilterOutputData;
41 /* readonly */ bool loadDataFileAtStartup;
42 /* readonly */ bool shouldGatherMemoryUsage;
43 /* readonly */ bool shouldGatherUtilization;
44 /* readonly */ bool shouldGatherAll;
45 /* readonly */ char CPDataFilename[512];
46
47
48
49 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
50 /// These probably come from the command line arguments, so are available only on PE 0
51 std::map<std::string, int> defaultControlPointValues;
52
53
54
55 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic}  tuningScheme;
56
57
58
59 void printTuningScheme(){
60   switch(whichTuningScheme){
61   case RandomSelection:
62     CkPrintf("Tuning Scheme: RandomSelection\n");
63     break;
64   case AlwaysDefaults:
65     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
66     break;
67   case SimulatedAnnealing:
68     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
69     break;
70   case ExhaustiveSearch:
71     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
72     break;
73   case CriticalPathAutoPrioritization:
74     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
75     break;
76   case UseBestKnownTiming:
77     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
78     break;
79   case UseSteering:
80     CkPrintf("Tuning Scheme: UseSteering\n");
81     break;
82   case MemoryAware:
83     CkPrintf("Tuning Scheme: MemoryAware\n");
84     break;
85   case Simplex:
86     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
87     break;
88   case DivideAndConquer:
89     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
90     break;
91   case LDBPeriod:
92     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
93     break;
94   case LDBPeriodLinear:
95     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
96     break;
97   case LDBPeriodQuadratic:
98     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
99     break;
100   default:
101     CkPrintf("Unknown tuning scheme\n");
102     break;
103   }
104   fflush(stdout);
105 }
106
107
108
109 /// A reduction type that combines idle time measurements (min/sum/max etc.)
110 CkReduction::reducerType idleTimeReductionType;
111 /// A reducer that combines idle time measurements (min/sum/max etc.)
112 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
113   double ret[3];
114   if(nMsg > 0){
115     CkAssert(msgs[0]->getSize()==3*sizeof(double));
116     double *m=(double *)msgs[0]->getData();
117     ret[0]=m[0];
118     ret[1]=m[1];
119     ret[2]=m[2];
120   }
121   for (int i=1;i<nMsg;i++) {
122     CkAssert(msgs[i]->getSize()==3*sizeof(double));
123     double *m=(double *)msgs[i]->getData();
124     ret[0]=min(ret[0],m[0]);
125     ret[1]+=m[1];
126     ret[2]=max(ret[2],m[2]);
127   }  
128   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
129 }
130
131
132
133 /// A reduction type that combines idle, overhead, and memory measurements
134 CkReduction::reducerType allMeasuresReductionType;
135 /// A reducer that combines idle, overhead, and memory measurements
136 #define ALL_REDUCTION_SIZE 12
137 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
138   double ret[ALL_REDUCTION_SIZE];
139   if(nMsg > 0){
140     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
141     double *m=(double *)msgs[0]->getData();
142     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
143   }
144   for (int i=1;i<nMsg;i++) {
145     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
146     double *m=(double *)msgs[i]->getData();
147     // idle time (min/sum/max)
148     ret[0]=min(ret[0],m[0]);
149     ret[1]+=m[1];
150     ret[2]=max(ret[2],m[2]);
151     // overhead time (min/sum/max)
152     ret[3]=min(ret[3],m[3]);
153     ret[4]+=m[4];
154     ret[5]=max(ret[5],m[5]);
155     // mem usage (max)
156     ret[6]=max(ret[6],m[6]);
157     // bytes per invocation for two types of entry methods
158     ret[7]+=m[7];
159     ret[8]+=m[8];
160     ret[9]+=m[9];
161     ret[10]+=m[10];
162     // Grain size (avg)
163     ret[11]+=m[11];
164   }  
165   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
166 }
167
168
169 /// Registers the control point framework's reduction handlers at startup on each PE
170 /*initproc*/ void registerCPReductions(void) {
171   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
172   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
173 }
174
175
176
177
178
179
180 /// Return an integer between 0 and num-1 inclusive
181 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
182 unsigned int randInt(unsigned int num, const char* name, int seed=0){
183   CkAssert(num > 0);
184   CkAssert(num < 1000);
185
186   unsigned long hash = 0;
187   unsigned int c;
188   unsigned char * str = (unsigned char*)name;
189
190   while (c = *str++){
191     unsigned int c2 = (c+64)%128;
192     unsigned int c3 = (c2*5953)%127;
193     hash = c3 + (hash << 6) + (hash << 16) - hash;
194   }
195
196   unsigned long shuffled1 = (hash*2083)%7907;
197   unsigned long shuffled2 = (seed*4297)%2017;
198
199   unsigned long shuffled3 = (random_seed*4799)%7919;
200
201   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
202
203   unsigned int result = ((namehash * 6029) % 1117) % num;
204
205   CkAssert(result >=0 && result < num);
206   return result;
207 }
208
209
210
211 controlPointManager::controlPointManager() {
212   generatedPlanForStep = -1;
213
214     exitWhenReady = false;
215     alreadyRequestedMemoryUsage = false;   
216     alreadyRequestedIdleTime = false;
217     alreadyRequestedAll = false;
218     
219     instrumentedPhase * newPhase = new instrumentedPhase();
220     allData.phases.push_back(newPhase);   
221     
222     frameworkShouldAdvancePhase = false;
223     haveControlPointChangeCallback = false;
224 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
225     
226     ControlPoint::initControlPointEffects();
227
228     phase_id = 0;
229
230     if(loadDataFileAtStartup){    
231       loadDataFile();
232     }
233
234     
235     if(CkMyPe() == 0){
236       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
237     }
238
239     traceRegisterUserEvent("No currently executing message", 5000);
240     traceRegisterUserEvent("Zero time along critical path", 5010);
241     traceRegisterUserEvent("Positive total time along critical path", 5020);
242     traceRegisterUserEvent("env->setPathHistory()", 6000);
243     traceRegisterUserEvent("Critical Path", 5900);
244     traceRegisterUserEvent("Table Entry", 5901);
245
246
247 #if USER_EVENT_FOR_REGISTERTERMINALPATH
248     traceRegisterUserEvent("registerTerminalPath", 100); 
249 #endif
250
251   }
252   
253
254  controlPointManager::~controlPointManager(){
255 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
256   }
257
258
259   /// Loads the previous run data file
260   void controlPointManager::loadDataFile(){
261     ifstream infile(CPDataFilename);
262     vector<std::string> names;
263     std::string line;
264   
265     while(getline(infile,line)){
266       if(line[0] != '#')
267         break;
268     }
269   
270     int numTimings = 0;
271     std::istringstream n(line);
272     n >> numTimings;
273   
274     while(getline(infile,line)){ 
275       if(line[0] != '#') 
276         break; 
277     }
278
279     int numControlPointNames = 0;
280     std::istringstream n2(line);
281     n2 >> numControlPointNames;
282   
283     for(int i=0; i<numControlPointNames; i++){
284       getline(infile,line);
285       names.push_back(line);
286     }
287
288     for(int i=0;i<numTimings;i++){
289       getline(infile,line);
290       while(line[0] == '#')
291         getline(infile,line); 
292
293       instrumentedPhase * ips = new instrumentedPhase();
294
295       std::istringstream iss(line);
296
297       // Read memory usage for phase
298       iss >> ips->memoryUsageMB;
299       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
300
301       // Read idle time
302       iss >> ips->idleTime.min;
303       iss >> ips->idleTime.avg;
304       iss >> ips->idleTime.max;
305
306       // Read overhead time
307       iss >> ips->overheadTime.min;
308       iss >> ips->overheadTime.avg;
309       iss >> ips->overheadTime.max;
310
311       // read bytePerInvoke
312       iss >> ips->bytesPerInvoke;
313
314       // read grain size
315       iss >> ips->grainSize;
316
317       // Read control point values
318       for(int cp=0;cp<numControlPointNames;cp++){
319         int cpvalue;
320         iss >> cpvalue;
321         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
322       }
323
324       // ignore median time
325       double mt;
326       iss >> mt;
327
328       // Read all times
329
330       double time;
331
332       while(iss >> time){
333         ips->times.push_back(time);
334 #if DEBUGPRINT > 5
335         CkPrintf("read time %lf from file\n", time);
336 #endif
337       }
338
339       allData.phases.push_back(ips);
340
341     }
342
343     infile.close();
344   }
345
346
347
348   /// Add the current data to allData and output it to a file
349   void controlPointManager::writeDataFile(){
350     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
351     ofstream outfile(CPDataFilename);
352     allData.cleanupNames();
353
354 //    string s = allData.toString();
355 //    CkPrintf("At end: \n %s\n", s.c_str());
356
357     if(shouldFilterOutputData){
358       allData.verify();
359       allData.filterOutIncompletePhases();
360     }
361
362 //    string s2 = allData.toString();
363 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
364     if(allData.toString().length() > 10){
365       outfile << allData.toString();
366     } else {
367       outfile << " No data available to save to disk " << endl;
368     }
369     outfile.close();
370   }
371
372   /// User can register a callback that is called when application should advance to next phase
373   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
374     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
375     controlPointChangeCallback = cb;
376     haveControlPointChangeCallback = true;
377   }
378
379
380 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
381 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
382   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
383 }
384
385   /// Called periodically by the runtime to handle the control points
386   /// Currently called on each PE
387   void controlPointManager::processControlPoints(){
388
389     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
390
391
392     //==========================================================================================
393     // Print the data for each phase
394
395     const int s = allData.phases.size();
396
397 #if DEBUGPRINT
398     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
399     for(int p=0;p<s;++p){
400       const instrumentedPhase &phase = allData.phases[p];
401       const idleTimeContainer &idle = phase.idleTime;
402       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
403       vector<double> const &times = phase.times;
404
405       CkPrintf("Phase %d:\n", p);
406       idle.print();
407      //   CkPrintf("critical paths: (* affected by control point)\n");
408         //   for(int i=0;i<criticalPaths.size(); i++){
409         // If affected by a control point
410         //      criticalPaths[i].print();
411
412       //        criticalPaths[i].print();
413       //        CkPrintf("\n");
414         
415
416       //   }
417       CkPrintf("Timings:\n");
418       for(int i=0;i<times.size(); i++){
419         CkPrintf("%lf ", times[i]);
420       }
421       CkPrintf("\n");
422
423     }
424     
425     CkPrintf("\n\n");
426
427
428 #endif
429
430
431
432     //==========================================================================================
433     // If this is a phase during which we try to adapt control point values based on critical path
434 #if 0
435     if( s%5 == 4) {
436
437       // Find the most recent phase with valid critical path data and idle time measurements      
438       int whichPhase=-1;
439       for(int p=0; p<s; ++p){
440         const instrumentedPhase &phase = allData.phases[p];
441         const idleTimeContainer &idle = phase.idleTime;
442         if(idle.isValid() && phase.criticalPaths.size()>0 ){
443           whichPhase = p;
444         }
445       }
446       
447       
448       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
449       const instrumentedPhase &phase = allData.phases[whichPhase];
450       const idleTimeContainer &idle = phase.idleTime;
451       
452       if(idle.min > 0.1){
453         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
454         
455         // Determine the median critical path for this phase
456         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
457         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
458         CkAssert(phase.criticalPaths.size() > 0);
459         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
460         
461         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
462         
463         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
464           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
465
466
467           CkPrintf("Finding control points related to the critical path\n");
468           int cpcount = 0;
469           std::set<std::string> controlPointsAffectingCriticalPath;
470
471           
472           for(int e=0;e<path.getNumUsed();e++){
473             if(path.getUsedCount(e)>0){
474               int ep = path.getUsedEp(e);
475
476               std::map<std::string, std::set<int> >::iterator iter;
477               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
478                 if(iter->second.count(ep)>0){
479                   controlPointsAffectingCriticalPath.insert(iter->first);
480                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
481                   cpcount++;
482                 }
483               }
484               
485             }
486           }
487           
488
489           if(cpcount==0){
490             CkPrintf("No control points are known to affect the critical path\n");
491           } else {
492             double beginTime = CmiWallTimer();
493
494             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
495             
496             newControlPoints = phase.controlPoints;
497             
498             if(frameworkShouldAdvancePhase){
499               gotoNextPhase();  
500             }
501             
502             if(haveControlPointChangeCallback){ 
503 #if DEBUGPRINT
504               CkPrintf("Calling control point change callback\n");
505 #endif
506               // Create a high priority message and send it to the callback
507               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
508               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
509               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
510               controlPointChangeCallback.send(msg);
511               
512             }
513             
514             
515             // adjust the control points that can affect the critical path
516
517             char textDescription[4096*2];
518             textDescription[0] = '\0';
519
520             std::map<std::string,int>::iterator newCP;
521             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
522               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
523                 // decrease the value (increase priority) if within range
524                 int lowerbound = controlPointSpace[newCP->first].first;
525                 if(newCP->second > lowerbound){
526                   newControlPointsAvailable = true;
527                   newCP->second --;
528                 }
529               }
530             }
531            
532             // Create a string for a projections user event
533             if(1){
534               std::map<std::string,int>::iterator newCP;
535               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
536                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
537               }
538             }
539
540             char *userEventString = new char[4096*2];
541             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
542             
543             static int userEventCounter = 20000;
544             CkPrintf("USER EVENT: %s\n", userEventString);
545             
546             traceRegisterUserEvent(userEventString, userEventCounter); 
547             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
548             userEventCounter++;
549             
550           }
551           
552         } else {
553           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
554         }
555         
556         
557       }
558       
559     } else {
560       
561       
562     }
563    
564 #endif
565
566
567
568     if(frameworkShouldAdvancePhase){
569       gotoNextPhase();  
570     }
571     
572     if(haveControlPointChangeCallback){ 
573       // Create a high priority message and send it to the callback
574       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
575       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
576       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
577       controlPointChangeCallback.send(msg);
578     }
579     
580   }
581   
582   /// Determine if any control point is known to affect an entry method
583   bool controlPointManager::controlPointAffectsThisEP(int ep){
584     std::map<std::string, std::set<int> >::iterator iter;
585     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
586       if(iter->second.count(ep)>0){
587         return true;
588       }
589     }
590     return false;    
591   }
592   
593   /// Determine if any control point is known to affect a chare array  
594   bool controlPointManager::controlPointAffectsThisArray(int array){
595     std::map<std::string, std::set<int> >::iterator iter;
596     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
597       if(iter->second.count(array)>0){
598         return true;
599       }
600     }
601     return false;   
602   }
603   
604
605   /// The data from the current phase
606   instrumentedPhase * controlPointManager::currentPhaseData(){
607     int s = allData.phases.size();
608     CkAssert(s>=1);
609     return allData.phases[s-1];
610   }
611  
612
613   /// The data from the previous phase
614   instrumentedPhase * controlPointManager::previousPhaseData(){
615     int s = allData.phases.size();
616     if(s >= 2 && phase_id > 0) {
617       return allData.phases[s-2];
618     } else {
619       return NULL;
620     }
621   }
622  
623   /// The data from two phases back
624   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
625     int s = allData.phases.size();
626     if(s >= 3 && phase_id > 0) {
627       return allData.phases[s-3];
628     } else {
629       return NULL;
630     }
631   }
632   
633
634   /// Called by either the application or the Control Point Framework to advance to the next phase  
635   void controlPointManager::gotoNextPhase(){
636     
637 #ifndef CP_DISABLE_TRACING
638     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
639     fflush(stdout);
640
641     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
642       alreadyRequestedAll = true;
643       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
644       CkPrintf("Requesting all measurements\n");
645       thisProxy.requestAll(*cb);
646       delete cb;
647     
648     } else {
649       
650       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
651         alreadyRequestedMemoryUsage = true;
652         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
653         thisProxy.requestMemoryUsage(*cb);
654         delete cb;
655       }
656       
657       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
658         alreadyRequestedIdleTime = true;
659         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
660         thisProxy.requestIdleTime(*cb);
661         delete cb;
662       }
663     }
664     
665
666 #endif
667
668
669
670 #if CMK_LBDB_ON && 0
671     LBDatabase * myLBdatabase = LBDatabaseObj();
672     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
673     const CkVec<LBObj*> objs = myLBDB->getObjs();
674     const int objCount = myLBDB->getObjCount();
675     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
676     
677     floatType maxObjWallTime = -1.0;
678     
679     for(int i=0;i<objs.length();i++){
680       LBObj* o = objs[i];
681       const LDObjData d = o->ObjData();
682       floatType cpuTime = d.cpuTime;
683       floatType wallTime = d.wallTime;
684       // can also get object handles from the LDObjData struct
685       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
686       if(wallTime > maxObjWallTime){
687
688       }
689       
690     }
691
692     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
693     
694 #endif    
695
696
697     
698     // increment phase id
699     phase_id++;
700     
701
702     // Create new entry for the phase we are starting now
703     instrumentedPhase * newPhase = new instrumentedPhase();
704     allData.phases.push_back(newPhase);
705     
706     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
707
708   }
709
710   /// An application uses this to register an instrumented timing for this phase
711   void controlPointManager::setTiming(double time){
712     currentPhaseData()->times.push_back(time);
713
714 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
715        
716     // First we should register this currently executing message as a path, because it is likely an important one to consider.
717     //    registerTerminalEntryMethod();
718     
719     // save the critical path for this phase
720     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
721     //    maxTerminalPathHistory.reset();
722     
723     
724     // Reset the counts for the currently executing message
725     //resetThisEntryPath();
726         
727 #endif
728     
729   }
730   
731   /// Entry method called on all PEs to request CPU utilization statistics
732   void controlPointManager::requestIdleTime(CkCallback cb){
733 #ifndef CP_DISABLE_TRACING
734     double i = localControlPointTracingInstance()->idleRatio();
735     double idle[3];
736     idle[0] = i;
737     idle[1] = i;
738     idle[2] = i;
739     
740     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
741     
742     localControlPointTracingInstance()->resetTimings();
743
744     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
745 #else
746     CkAbort("Should not get here\n");
747 #endif
748   }
749   
750   /// All processors reduce their memory usages in requestIdleTime() to this method
751   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
752 #ifndef CP_DISABLE_TRACING
753     int size=msg->getSize() / sizeof(double);
754     CkAssert(size==3);
755     double *r=(double *) msg->getData();
756         
757     instrumentedPhase* prevPhase = previousPhaseData();
758     if(prevPhase != NULL){
759       prevPhase->idleTime.min = r[0];
760       prevPhase->idleTime.avg = r[1]/CkNumPes();
761       prevPhase->idleTime.max = r[2];
762       prevPhase->idleTime.print();
763       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
764     } else {
765       CkPrintf("There is no previous phase to store the idle time measurements\n");
766     }
767     
768     alreadyRequestedIdleTime = false;
769     checkForShutdown();
770     delete msg;
771 #else
772     CkAbort("Should not get here\n");
773 #endif
774   }
775
776
777
778
779
780
781   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
782   void controlPointManager::requestAll(CkCallback cb){
783 #ifndef CP_DISABLE_TRACING
784     TraceControlPoints *t = localControlPointTracingInstance();
785
786     double data[ALL_REDUCTION_SIZE];
787
788     double *idle = data;
789     double *over = data+3;
790     double *mem = data+6;
791     double *msgBytes = data+7;  
792     double *grainsize = data+11;  
793
794     const double i = t->idleRatio();
795     idle[0] = i;
796     idle[1] = i;
797     idle[2] = i;
798
799     const double o = t->overheadRatio();
800     over[0] = o;
801     over[1] = o;
802     over[2] = o;
803
804     const double m = t->memoryUsageMB();
805     mem[0] = m;
806
807     msgBytes[0] = t->b2;
808     msgBytes[1] = t->b3;
809     msgBytes[2] = t->b2mlen;
810     msgBytes[3] = t->b3mlen;
811
812     grainsize[0] = t->grainSize();
813     
814     localControlPointTracingInstance()->resetAll();
815
816     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
817 #else
818     CkAbort("Should not get here\n");
819 #endif
820   }
821   
822   /// All processors reduce their memory usages in requestIdleTime() to this method
823   void controlPointManager::gatherAll(CkReductionMsg *msg){
824 #ifndef CP_DISABLE_TRACING
825     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
826     int size=msg->getSize() / sizeof(double);
827     double *data=(double *) msg->getData();
828         
829     double *idle = data;
830     double *over = data+3;
831     double *mem = data+6;
832     double *msgBytes = data+7;
833     double *granularity = data+11;
834
835
836     //    std::string b = allData.toString();
837
838     instrumentedPhase* prevPhase = previousPhaseData();
839     if(prevPhase != NULL){
840       prevPhase->idleTime.min = idle[0];
841       prevPhase->idleTime.avg = idle[1]/CkNumPes();
842       prevPhase->idleTime.max = idle[2];
843       
844       prevPhase->overheadTime.min = over[0];
845       prevPhase->overheadTime.avg = over[1]/CkNumPes();
846       prevPhase->overheadTime.max = over[2];
847       
848       prevPhase->memoryUsageMB = mem[0];
849       
850       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
851
852       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
853       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
854
855       /** The average of the grain sizes on all PEs in us */
856       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
857
858       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
859       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
860
861       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
862       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
863
864       if(bytesPerInvoke2 > bytesPerInvoke3)
865         prevPhase->bytesPerInvoke = bytesPerInvoke2;
866       else
867         prevPhase->bytesPerInvoke = bytesPerInvoke3;
868
869       //      prevPhase->print();
870       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
871
872       //      std::string a = allData.toString();
873
874       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
875       
876     } else {
877       CkPrintf("There is no previous phase to store measurements\n");
878     }
879     
880     alreadyRequestedAll = false;
881     checkForShutdown();
882     delete msg;
883 #else
884     CkAbort("Should not get here\n");
885 #endif
886   }
887
888
889
890
891   void controlPointManager::checkForShutdown(){
892     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
893       doExitNow();
894     }
895   }
896
897
898   void controlPointManager::exitIfReady(){
899      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
900        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
901        doExitNow();
902      } else {
903        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
904        exitWhenReady = true;
905      }
906   }
907
908
909
910   void controlPointManager::doExitNow(){
911           writeOutputToDisk();
912           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
913           CkExit();
914   }
915
916   void controlPointManager::writeOutputToDisk(){
917           if(writeDataFileAtShutdown){
918                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
919           }
920   }
921
922
923   /// Entry method called on all PEs to request memory usage
924   void controlPointManager::requestMemoryUsage(CkCallback cb){
925     int m = CmiMaxMemoryUsage() / 1024 / 1024;
926     CmiResetMaxMemory();
927     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
928     contribute(sizeof(int),&m,CkReduction::max_int, cb);
929   }
930
931   /// All processors reduce their memory usages to this method
932   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
933     int size=msg->getSize() / sizeof(int);
934     CkAssert(size==1);
935     int *m=(int *) msg->getData();
936
937     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
938     
939     instrumentedPhase* prevPhase = previousPhaseData();
940     if(prevPhase != NULL){
941       prevPhase->memoryUsageMB = m[0];
942     } else {
943       CkPrintf("No place to store memory usage");
944     }
945
946     alreadyRequestedMemoryUsage = false;
947     checkForShutdown();
948     delete msg;
949   }
950
951
952 //   /// Inform the control point framework that a named control point affects the priorities of some array  
953 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
954 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
955     
956 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
957 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
958 //     } else {
959 //       std::set<int> s;
960 //       s.insert(groupIdx);
961 //       affectsPrioritiesArray[std::string(name)] = s;
962 //     }
963     
964 // #if DEBUGPRINT   
965 //     std::map<std::string, std::set<int> >::iterator f;
966 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
967 //       std::string name = f->first;
968 //       std::set<int> &vals = f->second;
969 //       cout << "Control point " << name << " affects arrays: ";
970 //       std::set<int>::iterator i;
971 //       for(i=vals.begin(); i!=vals.end();++i){
972 //      cout << *i << " ";
973 //       }
974 //       cout << endl;
975 //     }
976 // #endif
977     
978 //   }
979   
980 //   /// Inform the control point framework that a named control point affects the priority of some entry method
981 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
982 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
983
984 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
985 //       affectsPrioritiesEP[std::string(name)].insert(idx);
986 //     } else {
987 //       std::set<int> s;
988 //       s.insert(idx);
989 //       affectsPrioritiesEP[std::string(name)] = s;
990 //     }
991     
992 // #if DEBUGPRINT
993 //     std::map<std::string, std::set<int> >::iterator f;
994 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
995 //       std::string name = f->first;
996 //       std::set<int> &vals = f->second;
997 //       cout << "Control point " << name << " affects EP: ";
998 //       std::set<int>::iterator i;
999 //       for(i=vals.begin(); i!=vals.end();++i){
1000 //      cout << *i << " ";
1001 //       }
1002 //       cout << endl;
1003 //     }
1004 // #endif
1005
1006
1007 //   }
1008   
1009
1010
1011 /// An interface callable by the application.
1012 void gotoNextPhase(){
1013   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1014 }
1015
1016 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1017 {
1018   gotoNextPhase();
1019 }
1020
1021
1022 /// A mainchare that is used just to create our controlPointManager group at startup
1023 class controlPointMain : public CBase_controlPointMain {
1024 public:
1025   controlPointMain(CkArgMsg* args){
1026 #if OLDRANDSEED
1027     struct timeval tp;
1028     gettimeofday(& tp, NULL);
1029     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1030 #else
1031     double time = CmiWallTimer();
1032     int sec = (int)time;
1033     int usec = (int)((time - (double)sec)*1000000.0);
1034     random_seed =  sec ^ usec;
1035 #endif
1036     
1037     
1038     double period;
1039     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1040     if(haveSamplePeriod){
1041       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1042       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1043     } else {
1044       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1045     }
1046     
1047     
1048     
1049     whichTuningScheme = RandomSelection;
1050
1051
1052     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1053       whichTuningScheme = RandomSelection;
1054     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1055       whichTuningScheme = ExhaustiveSearch;
1056     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1057       whichTuningScheme = AlwaysDefaults;
1058     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1059       whichTuningScheme = SimulatedAnnealing;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1061       whichTuningScheme = CriticalPathAutoPrioritization;
1062     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1063       whichTuningScheme = UseBestKnownTiming;
1064     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1065       whichTuningScheme = UseSteering;
1066     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1067       whichTuningScheme = MemoryAware;
1068     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1069       whichTuningScheme = Simplex;
1070     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1071       whichTuningScheme = DivideAndConquer;
1072     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period") ){
1073       whichTuningScheme = LDBPeriod;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1075       whichTuningScheme = LDBPeriodLinear;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1077       whichTuningScheme = LDBPeriodQuadratic;
1078     }
1079
1080     char *defValStr = NULL;
1081     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1082       CkPrintf("You specified default value string: %s\n", defValStr);
1083       
1084       // Parse the string, looking for commas
1085      
1086
1087       char *tok = strtok(defValStr, ",");
1088       while (tok) {
1089         // split on the equal sign
1090         int len = strlen(tok);
1091         char *eqsign = strchr(tok, '=');
1092         if(eqsign != NULL && eqsign != tok){
1093           *eqsign = '\0';
1094           char *cpname = tok;
1095           std::string cpName(tok);
1096           char *cpDefVal = eqsign+1;      
1097           int v=-1;
1098           if(sscanf(cpDefVal, "%d", &v) == 1){
1099             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1100             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1101             defaultControlPointValues[cpName] = v;
1102           }
1103         }
1104         tok = strtok(NULL, ",");
1105       }
1106
1107     }
1108
1109     shouldGatherAll = false;
1110     shouldGatherMemoryUsage = false;
1111     shouldGatherUtilization = false;
1112     
1113     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1114       shouldGatherAll = true;
1115     } else {
1116       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1117         shouldGatherMemoryUsage = true;
1118       }
1119       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1120         shouldGatherUtilization = true;
1121       }
1122     }
1123     
1124     writeDataFileAtShutdown = false;   
1125     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1126       writeDataFileAtShutdown = true;
1127     }
1128
1129     shouldFilterOutputData = true;
1130     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1131       shouldFilterOutputData = false;
1132     }
1133
1134
1135    loadDataFileAtStartup = false;   
1136     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1137       loadDataFileAtStartup = true;
1138     }
1139
1140     char *cpdatafile;
1141     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1142       sprintf(CPDataFilename, "%s", cpdatafile);
1143     } else {
1144       sprintf(CPDataFilename, "controlPointData.txt");
1145     }
1146
1147
1148     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1149   }
1150   ~controlPointMain(){}
1151 };
1152
1153 /// An interface callable by the application.
1154 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1155   CkAssert(CkMyPe() == 0);
1156   CkPrintf("Application has registered a control point change callback\n");
1157   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1158 }
1159
1160 /// An interface callable by the application.
1161 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1162   if(CkMyPe() == 0){
1163     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1164     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1165   }
1166 }
1167
1168 /// An interface callable by the application.
1169 void registerControlPointTiming(double time){
1170   CkAssert(CkMyPe() == 0);
1171 #if DEBUGPRINT>0
1172   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1173 #endif
1174   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1175 }
1176
1177 /// An interface callable by the application.
1178 void controlPointTimingStamp() {
1179   CkAssert(CkMyPe() == 0);
1180 #if DEBUGPRINT>0
1181   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1182 #endif
1183   
1184   static double prev_time = 0.0;
1185   double t = CmiWallTimer();
1186   double duration = t - prev_time;
1187   prev_time = t;
1188     
1189   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1190 }
1191
1192 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1193 {
1194   controlPointTimingStamp();
1195 }
1196
1197
1198
1199
1200
1201 /// Shutdown the control point framework, writing data to disk if necessary
1202 extern "C" void controlPointShutdown(){
1203   if(CkMyPe() == 0){
1204
1205     // wait for gathering of idle time & memory usage to complete
1206     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1207
1208   }
1209 }
1210
1211 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1212 void controlPointInitNode(){
1213 //  CkPrintf("controlPointInitNode()\n");
1214   registerExitFn(controlPointShutdown);
1215 }
1216
1217 /// Called periodically to allow control point framework to do things periodically
1218 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1219 #ifdef DEBUGPRINT
1220   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1221 #endif
1222   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1223   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1224 }
1225
1226
1227
1228
1229
1230 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1231 /// user observed characteristic to adapt specific control point values.
1232 /// This function must return valid values for newControlPoints.
1233 void controlPointManager::generatePlan() {
1234   const double startGenerateTime = CmiWallTimer();
1235   const int phase_id = this->phase_id;
1236   const int effective_phase = allData.phases.size();
1237
1238   // Only generate a plan once per phase
1239   if(generatedPlanForStep == phase_id) 
1240     return;
1241   generatedPlanForStep = phase_id;
1242  
1243   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1244   printTuningScheme();
1245
1246   // By default lets put the previous phase data into newControlPoints
1247   instrumentedPhase *prevPhase = previousPhaseData();
1248   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1249           newControlPoints[cpsIter->first] = cpsIter->second;
1250   }
1251
1252
1253   if( whichTuningScheme == RandomSelection ){
1254     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1255     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1256       const std::string &name = cpsIter->first;
1257       const std::pair<int,int> &bounds = cpsIter->second;
1258       const int lb = bounds.first;
1259       const int ub = bounds.second;
1260       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1261     }
1262   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1263     // -----------------------------------------------------------
1264     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1265     //   
1266     // This scheme will return the median value for the range 
1267     // early on, and then will transition over to the new control points
1268     // determined by the critical path adapting code
1269
1270     // This won't work until the periodic function is fixed up a bit
1271
1272     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1273     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1274       const std::string &name = cpsIter->first;
1275       const std::pair<int,int> &bounds = cpsIter->second;
1276       const int lb = bounds.first;
1277       const int ub = bounds.second;
1278       newControlPoints[name] =  (lb+ub)/2;
1279     }
1280
1281   } else if ( whichTuningScheme == MemoryAware ) {
1282
1283     // -----------------------------------------------------------
1284     //  STEERING BASED ON MEMORY USAGE
1285
1286     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1287     instrumentedPhase *prevPhase = previousPhaseData();
1288  
1289     if(phase_id%2 == 0){
1290       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1291       twoAgoPhase->print();
1292       CkPrintf("\n");
1293       fflush(stdout);
1294       
1295       // See if memory usage is low:
1296       double memUsage = twoAgoPhase->memoryUsageMB;
1297       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1298       fflush(stdout);
1299       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1300         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1301         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1302         
1303         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1304         newControlPoints = twoAgoPhase->controlPoints;
1305
1306
1307         CkPrintf("Steering (memory based) initialized plan\n");
1308         fflush(stdout);
1309
1310         // look for a possible control point knob to turn
1311         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1312         
1313         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1314         bool found = false;
1315         std::string cpName;
1316         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1317         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1318         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1319           cpName = iter->first;
1320           info = &iter->second;
1321           found = true;
1322           break;
1323         }
1324
1325         // Adapt the control point value
1326         if(found){
1327           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1328           fflush(stdout);
1329           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1330           const int maxValue = controlPointSpace[cpName].second;
1331           
1332           if(twoAgoValue+1 <= maxValue){
1333             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1334           }
1335         }
1336         
1337       }
1338     }
1339
1340     CkPrintf("Steering (memory based) done for this phase\n");
1341     fflush(stdout);
1342
1343   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1344
1345     // -----------------------------------------------------------
1346     //  USE BEST KNOWN TIME  
1347     static bool firstTime = true;
1348     if(firstTime){
1349       firstTime = false;
1350       instrumentedPhase *best = allData.findBest(); 
1351       CkPrintf("Best known phase is:\n");
1352       best->print();
1353       CkPrintf("\n");
1354       
1355       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1356       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1357         const std::string &name = cpsIter->first;
1358         newControlPoints[name] =  best->controlPoints[name];
1359       }
1360     }
1361   } else if( whichTuningScheme == LDBPeriod) {
1362     // Assume this is used in this manner:
1363     //  1) go to next phase
1364     //  2) request control point
1365     //  3) load balancing
1366     //  4) computation
1367     
1368     
1369     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1370     instrumentedPhase *prevPhase = previousPhaseData();
1371     
1372     
1373     // Build model of execution time based on two phases ago
1374     // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1375     const std::vector<double> &times = twoAgoPhase->times;
1376     const int oldNumTimings = times.size();
1377     double oldSum = 0;
1378         
1379     for(int i=2; i<oldNumTimings; i++){
1380       oldSum += times[i];
1381     }
1382     
1383     const double oldAvg = oldSum / (oldNumTimings-2);
1384     
1385     
1386     // Compute the estimated time for the last phase's data
1387     const std::vector<double> &timesNew = prevPhase->times;
1388     const int newNumTimings = timesNew.size();
1389     
1390     
1391     // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1392     const double expectedTotalTime = oldAvg * newNumTimings;
1393     
1394     
1395     // Measure actual time
1396     double newSum = 0.0;
1397     for(int i=2; i<newNumTimings; ++i){
1398       newSum += timesNew[i];
1399     }
1400     
1401     const double newAvg = newSum / (newNumTimings-2);
1402     const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1403     
1404     const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1405     
1406     // Determine load balance cost
1407     const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1408     
1409     const double benefitAfterLB = benefit - lbcost;
1410     
1411     
1412     // Determine whether LB cost outweights the estimated benefit
1413     CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1414     
1415     
1416     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1417     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1418       const std::string &name = cpsIter->first;
1419       const std::pair<int,int> &bounds = cpsIter->second;
1420       const int lb = bounds.first;
1421       const int ub = bounds.second;
1422       
1423       if(benefitAfterLB > 0){
1424         CkPrintf("Constant Model: Beneficial LB\n");
1425         int newval = newControlPoints[name] / 2;
1426         if(newval > lb)
1427           newControlPoints[name] = newval;
1428         else 
1429           newControlPoints[name] = lb;
1430       } else {
1431         CkPrintf("Constant Model: Detrimental LB\n");
1432         int newval = newControlPoints[name] * 2;
1433         if(newval < ub)
1434           newControlPoints[name] = newval;
1435         else
1436           newControlPoints[name] = ub;
1437       }
1438       
1439     }
1440     
1441     
1442   }  else if( whichTuningScheme == LDBPeriodLinear) {
1443     // Assume this is used in this manner:
1444     //  1) go to next phase
1445     //  2) request control point
1446     //  3) load balancing
1447     //  4) computation
1448
1449
1450     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1451     instrumentedPhase *prevPhase = previousPhaseData();
1452     
1453
1454     // Build model of execution time based on two phases ago
1455     // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1456     const std::vector<double> &times = twoAgoPhase->times;
1457     const int oldNumTimings = times.size();
1458     const int b1 = 2 + (oldNumTimings-2)/2;
1459     double s1 = 0;
1460     double s2 = 0;
1461     
1462     const double ldbStepsTime = times[0] + times[1];
1463     
1464     for(int i=2; i<b1; i++){
1465       s1 += times[i];
1466     }
1467     for(int i=b1; i<oldNumTimings; i++){
1468       s2 += times[i];
1469     }
1470     
1471     
1472     // Compute the estimated time for the last phase's data
1473     const std::vector<double> &timesNew = prevPhase->times;
1474     const int newNumTimings = timesNew.size();
1475     
1476     const double a1 = s1 / ((newNumTimings-2)/2);
1477     const double a2 = s2 / ((newNumTimings-2)/2);
1478     
1479     const double expectedTotalTime = ((5.0*a2-3.0*a1)/2.0 ) * newNumTimings;
1480         
1481     
1482     // Measure actual time
1483     double sum = 0.0;
1484     for(int i=2; i<newNumTimings; ++i){
1485       sum += timesNew[i];
1486     }
1487
1488     const double avg = sum / ((double)(newNumTimings-2));
1489     const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1490
1491     const double benefit = expectedTotalTime - actualTotalTime;
1492
1493     // Determine load balance cost
1494     const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1495
1496     const double benefitAfterLB = benefit - lbcost;
1497
1498     
1499     // Determine whether LB cost outweights the estimated benefit
1500     CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1501     
1502     
1503     
1504     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1505     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1506       const std::string &name = cpsIter->first;
1507       const std::pair<int,int> &bounds = cpsIter->second;
1508       const int lb = bounds.first;
1509       const int ub = bounds.second;
1510       
1511       if(benefitAfterLB > 0){
1512         CkPrintf("Linear Model: Beneficial LB\n");
1513         int newval = newControlPoints[name] /= 2;
1514         if(newval > lb)
1515           newControlPoints[name] = newval;
1516         else 
1517           newControlPoints[name] = lb;
1518       } else {
1519         CkPrintf("Linear Model: Detrimental LB\n");
1520         int newval = newControlPoints[name] *= 2;
1521         if(newval < ub)
1522           newControlPoints[name] = newval;
1523         else 
1524           newControlPoints[name] = ub;
1525       }
1526       
1527     }
1528
1529   }
1530
1531   else if( whichTuningScheme == LDBPeriodQuadratic) {
1532     // Assume this is used in this manner:
1533     //  1) go to next phase
1534     //  2) request control point
1535     //  3) load balancing
1536     //  4) computation
1537
1538
1539     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1540     instrumentedPhase *prevPhase = previousPhaseData();
1541     
1542
1543     // Build model of execution time based on two phases ago
1544     // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1545     const std::vector<double> &times = twoAgoPhase->times;
1546     const int oldNumTimings = times.size();
1547     const int b1 = 2 + (oldNumTimings-2)/3;
1548     const int b2 = 2 + (2*(oldNumTimings-2))/3;
1549     double s1 = 0;
1550     double s2 = 0;
1551     double s3 = 0;
1552
1553     const double ldbStepsTime = times[0] + times[1];
1554     
1555     for(int i=2; i<b1; i++){
1556       s1 += times[i];
1557     }
1558     for(int i=b1; i<b2; i++){
1559       s2 += times[i];
1560     }
1561     for(int i=b2; i<oldNumTimings; i++){
1562       s3 += times[i];
1563     }
1564
1565     
1566     // Compute the estimated time for the last phase's data
1567     const std::vector<double> &timesNew = prevPhase->times;
1568     const int newNumTimings = timesNew.size();
1569     
1570     const double a1 = s1 / ((newNumTimings-2)/3);
1571     const double a2 = s2 / ((newNumTimings-2)/3);
1572     const double a3 = s3 / ((newNumTimings-2)/3);
1573     
1574     const double a = (a1-2.0*a2+a3)/2.0;
1575     const double b = (a1-4.0*a2+3.0*a3)/2.0;
1576     const double c = a3;
1577     
1578     // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1579     const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1580     const double x2 = 0.5;
1581     const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1582    
1583     
1584     // Measure actual time
1585     double sum = 0.0;
1586     for(int i=2; i<newNumTimings; ++i){
1587       sum += timesNew[i];
1588     }
1589
1590     const double avg = sum / ((double)(newNumTimings-2));
1591     const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1592
1593     const double benefit = expectedTotalTime - actualTotalTime;
1594
1595     // Determine load balance cost
1596     const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1597
1598     const double benefitAfterLB = benefit - lbcost;
1599
1600     
1601     // Determine whether LB cost outweights the estimated benefit
1602     CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1603     
1604     
1605     
1606     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1607     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1608       const std::string &name = cpsIter->first;
1609       const std::pair<int,int> &bounds = cpsIter->second;
1610       const int lb = bounds.first;
1611       const int ub = bounds.second;
1612       
1613       if(benefitAfterLB > 0){
1614         CkPrintf("QuadraticModel: Beneficial LB\n");
1615         int newval = newControlPoints[name] /= 2;
1616         if(newval > lb)
1617           newControlPoints[name] = newval;
1618         else 
1619           newControlPoints[name] = lb;
1620       } else {
1621         CkPrintf("QuadraticModel: Detrimental LB\n");
1622         int newval = newControlPoints[name] *= 2;
1623         if(newval < ub)
1624           newControlPoints[name] = newval;
1625         else 
1626           newControlPoints[name] = ub;
1627       }
1628       
1629     }
1630     
1631     
1632   } else if ( whichTuningScheme == UseSteering ) {
1633           // -----------------------------------------------------------
1634           //  STEERING BASED ON KNOWLEDGE
1635
1636           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1637           // plans are only generated after 3 phases
1638
1639           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1640           instrumentedPhase *prevPhase = previousPhaseData();
1641
1642           if(phase_id%4 == 0){
1643                   CkPrintf("Steering based on 2 phases ago:\n");
1644                   twoAgoPhase->print();
1645                   CkPrintf("\n");
1646                   fflush(stdout);
1647
1648                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1649
1650
1651                   // ========================================= Concurrency =============================================
1652                   // See if idle time is high:
1653                   {
1654                           double idleTime = twoAgoPhase->idleTime.avg;
1655                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1656                           fflush(stdout);
1657                           if(idleTime > 0.10){
1658                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1659                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1660
1661                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1662
1663                                   bool found = false;
1664                                   std::string cpName;
1665                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1666                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1667                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1668                                           cpName = iter->first;
1669                                           info = &iter->second;
1670
1671                                           // Initialize a new plan based on two phases ago
1672                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1673
1674                                           CkPrintf("Steering found knob to turn\n");
1675                                           fflush(stdout);
1676
1677                                           if(info->first == ControlPoint::EFF_INC){
1678                                                   const int maxValue = controlPointSpace[cpName].second;
1679                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1680                                                   if(twoAgoValue+1 <= maxValue){
1681                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1682                                                   }
1683                                           } else {
1684                                                   const int minValue = controlPointSpace[cpName].second;
1685                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1686                                                   if(twoAgoValue-1 >= minValue){
1687                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1688                                                   }
1689                                           }
1690
1691                                           possibleNextStepPlans.push_back(aNewPlan);
1692
1693                                   }
1694                           }
1695                   }
1696
1697                   // ========================================= Grain Size =============================================
1698                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1699                   {
1700                           double overheadTime = twoAgoPhase->overheadTime.avg;
1701                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1702                           fflush(stdout);
1703                           if(overheadTime > 0.10){
1704                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1705                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1706
1707                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1708
1709                                   bool found = false;
1710                                   std::string cpName;
1711                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1712                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1713                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1714                                           cpName = iter->first;
1715                                           info = &iter->second;
1716
1717                                           // Initialize a new plan based on two phases ago
1718                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1719
1720
1721
1722                                           CkPrintf("Steering found knob to turn\n");
1723                                           fflush(stdout);
1724
1725                                           if(info->first == ControlPoint::EFF_INC){
1726                                                   const int maxValue = controlPointSpace[cpName].second;
1727                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1728                                                   if(twoAgoValue+1 <= maxValue){
1729                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1730                                                   }
1731                                           } else {
1732                                                   const int minValue = controlPointSpace[cpName].second;
1733                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1734                                                   if(twoAgoValue-1 >= minValue){
1735                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1736                                                   }
1737                                           }
1738
1739                                           possibleNextStepPlans.push_back(aNewPlan);
1740
1741                                   }
1742
1743                           }
1744                   }
1745                   // ========================================= GPU Offload =============================================
1746                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1747                   {
1748                           double idleTime = twoAgoPhase->idleTime.avg;
1749                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1750                           fflush(stdout);
1751                           if(idleTime > 0.10){
1752                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1753                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1754
1755                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1756
1757                                   bool found = false;
1758                                   std::string cpName;
1759                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1760                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1761                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1762                                           cpName = iter->first;
1763                                           info = &iter->second;
1764
1765                                           // Initialize a new plan based on two phases ago
1766                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1767
1768
1769                                           CkPrintf("Steering found knob to turn\n");
1770                                           fflush(stdout);
1771
1772                                           if(info->first == ControlPoint::EFF_DEC){
1773                                                   const int maxValue = controlPointSpace[cpName].second;
1774                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1775                                                   if(twoAgoValue+1 <= maxValue){
1776                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1777                                                   }
1778                                           } else {
1779                                                   const int minValue = controlPointSpace[cpName].second;
1780                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1781                                                   if(twoAgoValue-1 >= minValue){
1782                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1783                                                   }
1784                                           }
1785
1786                                           possibleNextStepPlans.push_back(aNewPlan);
1787
1788                                   }
1789
1790                           }
1791                   }
1792
1793                   // ========================================= Done =============================================
1794
1795
1796                   if(possibleNextStepPlans.size() > 0){
1797                           newControlPoints = possibleNextStepPlans[0];
1798                   }
1799
1800
1801                   CkPrintf("Steering done for this phase\n");
1802                   fflush(stdout);
1803
1804           }  else {
1805                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1806                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1807                   fflush(stdout);
1808           }
1809
1810
1811
1812   } else if( whichTuningScheme == SimulatedAnnealing ) {
1813
1814     // -----------------------------------------------------------
1815     //  SIMULATED ANNEALING
1816     //  Simulated Annealing style hill climbing method
1817     //
1818     //  Find the best search space configuration, and try something
1819     //  nearby it, with a radius decreasing as phases increase
1820
1821     CkPrintf("Finding best phase\n");
1822     instrumentedPhase *bestPhase = allData.findBest();  
1823     CkPrintf("best found:\n"); 
1824     bestPhase->print(); 
1825     CkPrintf("\n"); 
1826     
1827
1828     const int convergeByPhase = 100;
1829     // Determine from 0.0 to 1.0 how far along we are
1830     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1831
1832     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1833     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1834       const std::string &name = cpsIter->first;
1835       const std::pair<int,int> &bounds = cpsIter->second;
1836       const int minValue = bounds.first;
1837       const int maxValue = bounds.second;
1838       
1839       const int before = bestPhase->controlPoints[name];   
1840   
1841       const int range = (maxValue-minValue+1)*(1.0-progress);
1842
1843       int high = min(before+range, maxValue);
1844       int low = max(before-range, minValue);
1845       
1846       newControlPoints[name] = low;
1847       if(high-low > 0){
1848         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1849       } 
1850       
1851     }
1852
1853   } else if ( whichTuningScheme == DivideAndConquer ) {
1854
1855           // -----------------------------------------------------------
1856           //  STEERING FOR Divide & Conquer Programs
1857           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1858           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1859           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1860
1861           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1862           // plans are only generated after 3 phases
1863
1864           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1865           instrumentedPhase *prevPhase = previousPhaseData();
1866
1867           if(phase_id%4 == 0){
1868                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1869                   twoAgoPhase->print();
1870                   CkPrintf("\n");
1871                   fflush(stdout);
1872
1873                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1874
1875
1876                   // ========================================= Concurrency =============================================
1877                   // See if idle time is high:
1878                   {
1879                           double idleTime = twoAgoPhase->idleTime.avg;
1880                           double overheadTime = twoAgoPhase->overheadTime.avg;
1881
1882
1883                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1884                           fflush(stdout);
1885                           if(idleTime+overheadTime > 0.10){
1886                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1887                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1888
1889                                   int direction = -1;
1890                                   if (idleTime>overheadTime){
1891                                           // We need to decrease the grain size, or increase the available concurrency
1892                                           direction = 1;
1893                                   }
1894
1895                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1896
1897                                   bool found = false;
1898                                   std::string cpName;
1899                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1900                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1901                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1902                                           cpName = iter->first;
1903                                           info = &iter->second;
1904                                           
1905                                           // Initialize a new plan based on two phases ago
1906                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1907                                           bool newPlanExists = false;
1908
1909                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1910                                           fflush(stdout);
1911
1912
1913
1914                                           if(info->first == ControlPoint::EFF_INC){
1915                                                   const int minValue = controlPointSpace[cpName].first;
1916                                                   const int maxValue = controlPointSpace[cpName].second;
1917                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1918                                                   const int newVal = twoAgoValue+1*direction;
1919                                                   if(newVal <= maxValue && newVal >= minValue){
1920                                                     aNewPlan[cpName] = newVal;
1921                                                     newPlanExists = true;
1922                                                   } else {
1923                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1924                                                   }
1925                                           } else {
1926                                                   const int minValue = controlPointSpace[cpName].first;
1927                                                   const int maxValue = controlPointSpace[cpName].second;
1928                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1929                                                   const int newVal = twoAgoValue-1*direction;
1930                                                   if(newVal <= maxValue && newVal >= minValue){
1931                                                           aNewPlan[cpName] = newVal;
1932                                                           newPlanExists = true;
1933                                                   } else {
1934                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1935                                                   }
1936                                           }
1937
1938                                           if(newPlanExists){
1939                                             possibleNextStepPlans.push_back(aNewPlan);
1940                                           }
1941                                   }
1942                           }
1943                   }
1944
1945                   if(possibleNextStepPlans.size() > 0){
1946                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1947                     newControlPoints = possibleNextStepPlans[0];
1948                   } else {
1949                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1950                   }
1951           }
1952
1953   } else if( whichTuningScheme == Simplex ) {
1954
1955           // -----------------------------------------------------------
1956           //  Nelder Mead Simplex Algorithm
1957           //
1958           //  A scheme that takes a simplex (n+1 points) and moves it
1959           //  toward the minimum, eventually converging there.
1960
1961           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1962
1963   } else if( whichTuningScheme == ExhaustiveSearch ) {
1964     
1965     // -----------------------------------------------------------
1966     // EXHAUSTIVE SEARCH
1967    
1968     int numDimensions = controlPointSpace.size();
1969     CkAssert(numDimensions > 0);
1970   
1971     vector<int> lowerBounds(numDimensions);
1972     vector<int> upperBounds(numDimensions); 
1973   
1974     int d=0;
1975     std::map<std::string, pair<int,int> >::iterator iter;
1976     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1977       //    CkPrintf("Examining dimension %d\n", d);
1978       lowerBounds[d] = iter->second.first;
1979       upperBounds[d] = iter->second.second;
1980       d++;
1981     }
1982    
1983     // get names for each dimension (control point)
1984     vector<std::string> names(numDimensions);
1985     d=0;
1986     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
1987       names[d] = niter->first;
1988       d++;
1989     }
1990   
1991   
1992     // Create the first possible configuration
1993     vector<int> config = lowerBounds;
1994     config.push_back(0);
1995   
1996     // Increment until finding an unused configuration
1997     allData.cleanupNames(); // put -1 values in for any control points missing
1998     std::vector<instrumentedPhase*> &phases = allData.phases;     
1999
2000     while(true){
2001     
2002       std::vector<instrumentedPhase*>::iterator piter; 
2003       bool testedConfiguration = false; 
2004       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2005       
2006         // Test if the configuration matches this phase
2007         bool match = true;
2008         for(int j=0;j<numDimensions;j++){
2009           match &= (*piter)->controlPoints[names[j]] == config[j];
2010         }
2011       
2012         if(match){
2013           testedConfiguration = true; 
2014           break;
2015         } 
2016       } 
2017     
2018       if(testedConfiguration == false){ 
2019         break;
2020       } 
2021     
2022       // go to next configuration
2023       config[0] ++;
2024       // Propagate "carrys"
2025       for(int i=0;i<numDimensions;i++){
2026         if(config[i] > upperBounds[i]){
2027           config[i+1]++;
2028           config[i] = lowerBounds[i];
2029         }
2030       }
2031       // check if we have exhausted all possible values
2032       if(config[numDimensions] > 0){
2033         break;
2034       }
2035        
2036     }
2037   
2038     if(config[numDimensions] > 0){
2039       for(int i=0;i<numDimensions;i++){ 
2040         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2041       }
2042     }
2043
2044     // results are now in config[i]
2045     
2046     for(int i=0; i<numDimensions; i++){
2047       newControlPoints[names[i]] = config[i];
2048       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2049     }
2050
2051
2052   } else {
2053     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2054   }
2055
2056
2057   const double endGenerateTime = CmiWallTimer();
2058   
2059   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2060   
2061 }
2062
2063
2064
2065
2066 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2067
2068
2069 /// Get control point value from range of integers [lb,ub]
2070 int controlPoint(const char *name, int lb, int ub){
2071   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2072   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2073   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2074   int result;
2075
2076   // if we already have control point values for phase, return them
2077   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2078     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2079     return thisPhaseData->controlPoints[std::string(name)];
2080   }
2081   
2082
2083   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2084     // For the first few phases, just use the lower bound, or the default if one was provided 
2085     // This ensures that the ranges for all the control points are known before we do anything fancy
2086     result = lb;
2087
2088     if(defaultControlPointValues.count(std::string(name)) > 0){
2089       int v = defaultControlPointValues[std::string(name)];
2090       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2091       result = v;
2092     }
2093
2094   } else if(controlPointSpace.count(std::string(name)) == 0){
2095     // if this is the first time we've seen the CP, then return the lower bound
2096     result = lb;
2097     
2098   }  else {
2099     // Generate a plan one time for each phase
2100     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2101     
2102     // Use the planned value:
2103     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2104     
2105   }
2106
2107   if(!isInRange(result,ub,lb)){
2108     std::cerr << "control point out of range: " << result << " " << lb << " " << ub << std::endl;
2109     fflush(stdout);
2110     fflush(stderr);
2111   }
2112   CkAssert(isInRange(result,ub,lb));
2113   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2114
2115   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2116
2117   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2118   //  thisPhaseData->print();
2119   
2120   return result;
2121 }
2122
2123
2124 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2125   CkAssert(sizeof(lb) == 4);
2126   CkAssert(CkMyPe() == 0);
2127   return controlPoint("FortranCP", *lb, *ub);
2128 }
2129
2130
2131
2132
2133 /// Inform the control point framework that a named control point affects the priorities of some array  
2134 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2135 //   CkGroupID aid = arraybase.ckGetArrayID();
2136 //   int groupIdx = aid.idx;
2137 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2138 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2139 // }
2140
2141
2142 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2143 // void controlPointPriorityEntry(const char *name, int idx){
2144 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2145 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2146 // }
2147
2148
2149
2150
2151
2152 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2153
2154     This function decomposes the algorithm into a state machine that allows it to
2155     evaluate one or more configurations through subsequent clls to this function.
2156     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2157
2158     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2159     so a list of them will be created and they will be evaluated, one per call.
2160
2161     Currently there is no stopping criteria, but the simplex ought to contract down
2162     to a few close configurations, and hence not much change will happen after this 
2163     point.
2164
2165  */
2166 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2167
2168         if(useBestKnown){
2169                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2170                 return;
2171         }
2172
2173
2174         if(firstSimplexPhase< 0){
2175                 firstSimplexPhase = allData.phases.size()-1;
2176                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2177         }
2178
2179         int n = controlPointSpace.size();
2180
2181         CkAssert(n>=2);
2182
2183
2184         if(simplexState == beginning){
2185                 // First we evaluate n+1 random points, then we go to a different state
2186                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2187                         CkPrintf("Simplex Tuning: chose random configuration\n");
2188
2189                         // Choose random values from the middle third of the range in each dimension
2190                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2191                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2192                                 const std::string &name = cpsIter->first;
2193                                 const std::pair<int,int> &bounds = cpsIter->second;
2194                                 const int lb = bounds.first;
2195                                 const int ub = bounds.second;
2196                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2197                         }
2198                 } else {
2199                         // Set initial simplex:
2200                         for(int i=0; i<n+1; i++){
2201                                 simplexIndices.insert(firstSimplexPhase+i);
2202                         }
2203                         CkAssert(simplexIndices.size() == n+1);
2204
2205                         // Transition to reflecting state
2206                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2207
2208                 }
2209
2210         } else if (simplexState == reflecting){
2211                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2212                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2213
2214                 // Find the highest time from other points in the simplex
2215                 double highestTimeForOthersInSimplex = 0.0;
2216                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2217                         double t = allData.phases[*iter]->medianTime();
2218                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2219                                 highestTimeForOthersInSimplex = t;
2220                         }
2221                 }
2222
2223                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2224
2225                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2226                         // if y* < yl,  transition to "expanding"
2227                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2228
2229                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2230                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2231                         CkAssert(simplexIndices.size() == n+1);
2232                         simplexIndices.erase(worstPhase);
2233                         CkAssert(simplexIndices.size() == n);
2234                         simplexIndices.insert(pPhase);
2235                         CkAssert(simplexIndices.size() == n+1);
2236                         // Transition to reflecting state
2237                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2238
2239                 } else {
2240                         // if y* > yh
2241                         if(recentPhaseTime <= worstTime){
2242                                 // replace Ph with P*
2243                                 CkAssert(simplexIndices.size() == n+1);
2244                                 simplexIndices.erase(worstPhase);
2245                                 CkAssert(simplexIndices.size() == n);
2246                                 simplexIndices.insert(pPhase);
2247                                 CkAssert(simplexIndices.size() == n+1);
2248                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2249                                 worstPhase = pPhase;
2250                                 // Just as a sanity check, make sure we don't use the non-updated values.
2251                                 worst.clear();
2252                         }
2253
2254                         // Now, form P** and do contracting phase
2255                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2256
2257                 }
2258
2259         } else if (simplexState == doneExpanding){
2260                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2261                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2262                 // A new configuration has been evaluated
2263
2264                 // Check to see if y** < y1
2265                 if(recentPhaseTime < bestTime){
2266                         // replace Ph by P**
2267                         CkAssert(simplexIndices.size() == n+1);
2268                         simplexIndices.erase(worstPhase);
2269                         CkAssert(simplexIndices.size() == n);
2270                         simplexIndices.insert(p2Phase);
2271                         CkAssert(simplexIndices.size() == n+1);
2272                 } else {
2273                         //      replace Ph by P*
2274                         CkAssert(simplexIndices.size() == n+1);
2275                         simplexIndices.erase(worstPhase);
2276                         CkAssert(simplexIndices.size() == n);
2277                         simplexIndices.insert(pPhase);
2278                         CkAssert(simplexIndices.size() == n+1);
2279                 }
2280
2281                 // Transition to reflecting state
2282                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2283
2284         }  else if (simplexState == contracting){
2285                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2286                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2287                 // A new configuration has been evaluated
2288
2289                 // Check to see if y** > yh
2290                 if(recentPhaseTime <= worstTime){
2291                         // replace Ph by P**
2292                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2293                         CkAssert(simplexIndices.size() == n+1);
2294                         simplexIndices.erase(worstPhase);
2295                         CkAssert(simplexIndices.size() == n);
2296                         simplexIndices.insert(p2Phase);
2297                         CkAssert(simplexIndices.size() == n+1);
2298                         // Transition to reflecting state
2299                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2300
2301                 } else {
2302                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2303                         simplexState = stillContracting;
2304
2305                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2306                         stillMustContractList = simplexIndices;
2307
2308                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2309                 }
2310
2311         } else if (simplexState == stillContracting){
2312                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2313
2314                 if(stillMustContractList.size()>0){
2315                         int c = *stillMustContractList.begin();
2316                         stillMustContractList.erase(c);
2317                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2318
2319                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2320
2321                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2322                         int v = 0;
2323                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2324                                 const std::string &name = cpsIter->first;
2325                                 const std::pair<int,int> &bounds = cpsIter->second;
2326                                 const int lb = bounds.first;
2327                                 const int ub = bounds.second;
2328
2329                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2330
2331                                 newControlPoints[name] = keepInRange(val,lb,ub);
2332                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2333                                 v++;
2334                         }
2335                 } else {
2336                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2337                         CkAssert(stillMustContractList.size() == 0);
2338                         simplexIndices.clear();
2339                         CkAssert(simplexIndices.size()==0);
2340                         for(int i=0; i<n+1; i++){
2341                                 simplexIndices.insert(allData.phases.size()-2-i);
2342                         }
2343                         CkAssert(simplexIndices.size()==n+1);
2344
2345                         // Transition to reflecting state
2346                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2347
2348                 }
2349
2350
2351         } else if (simplexState == expanding){
2352                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2353                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2354                 // A new configuration has been evaluated
2355
2356                 // determine if y** < yl
2357                 if(recentPhaseTime < bestTime){
2358                         // replace Ph by P**
2359                         CkAssert(simplexIndices.size() == n+1);
2360                         simplexIndices.erase(worstPhase);
2361                         CkAssert(simplexIndices.size() == n);
2362                         simplexIndices.insert(p2Phase);
2363                         CkAssert(simplexIndices.size() == n+1);
2364                         // Transition to reflecting state
2365                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2366
2367                 } else {
2368                         // else, replace ph with p*
2369                         CkAssert(simplexIndices.size() == n+1);
2370                         simplexIndices.erase(worstPhase);
2371                         CkAssert(simplexIndices.size() == n);
2372                         simplexIndices.insert(pPhase);
2373                         CkAssert(simplexIndices.size() == n+1);
2374                         // Transition to reflecting state
2375                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2376                 }
2377
2378
2379         } else {
2380                 CkAbort("Unknown simplexState");
2381         }
2382
2383 }
2384
2385
2386
2387 /** Replace the worst point with its reflection across the centroid. */
2388 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2389
2390         int n = controlPointSpace.size();
2391
2392         printSimplex(allData);
2393
2394         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2395
2396
2397         // Quit if the diameter of our simplex is small
2398         double maxr = 0.0;
2399         for(int i=0; i<n+1; i++){
2400                 //              Compute r^2 of this simplex point from the centroid
2401                 double r2 = 0.0;
2402                 std::vector<double> p = pointCoords(allData, i);
2403                 for(int d=0; d<p.size(); d++){
2404                         double r1 = (p[d] * centroid[d]);
2405                         r2 += r1*r1;
2406                 }
2407                 if(r2 > maxr)
2408                         maxr = r2;
2409         }
2410
2411 #if 0
2412         // At some point we quit this tuning
2413         if(maxr < 10){
2414                 useBestKnown = true;
2415                 instrumentedPhase *best = allData.findBest();
2416                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2417
2418                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2419                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2420                         const std::string &name = cpsIter->first;
2421                         newControlPoints[name] =  best->controlPoints[name];
2422                 }
2423         }
2424 #endif
2425
2426         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2427
2428         pPhase = allData.phases.size()-1;
2429         P.resize(n);
2430         for(int i=0; i<n; i++){
2431                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2432         }
2433
2434         for(int i=0; i<P.size(); i++){
2435                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2436         }
2437
2438
2439         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2440         int v = 0;
2441         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2442                 const std::string &name = cpsIter->first;
2443                 const std::pair<int,int> &bounds = cpsIter->second;
2444                 const int lb = bounds.first;
2445                 const int ub = bounds.second;
2446                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2447                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2448                 v++;
2449         }
2450
2451
2452         // Transition to "reflecting" state
2453         simplexState = reflecting;
2454         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2455
2456 }
2457
2458
2459
2460
2461 /** Replace the newly tested reflection with a further expanded version of itself. */
2462 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2463         int n = controlPointSpace.size();
2464         printSimplex(allData);
2465
2466         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2467         // I believe the equation for P** in the text on page 308 is correct.
2468
2469         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2470
2471
2472         p2Phase = allData.phases.size()-1;
2473         P2.resize(n);
2474         for(int i=0; i<n; i++){
2475                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2476         }
2477
2478         for(int i=0; i<P2.size(); i++){
2479                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2480         }
2481
2482
2483         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2484         int v = 0;
2485         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2486                 const std::string &name = cpsIter->first;
2487                 const std::pair<int,int> &bounds = cpsIter->second;
2488                 const int lb = bounds.first;
2489                 const int ub = bounds.second;
2490                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2491                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2492                 v++;
2493         }
2494
2495
2496         // Transition to "doneExpanding" state
2497         simplexState = doneExpanding;
2498         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2499
2500 }
2501
2502
2503
2504
2505 /** Replace the newly tested reflection with a further expanded version of itself. */
2506 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2507         int n = controlPointSpace.size();
2508         printSimplex(allData);
2509
2510         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2511
2512
2513         p2Phase = allData.phases.size()-1;
2514         P2.resize(n);
2515         for(int i=0; i<n; i++){
2516                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2517         }
2518
2519         for(int i=0; i<P2.size(); i++){
2520                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2521         }
2522
2523
2524         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2525         int v = 0;
2526         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2527                 const std::string &name = cpsIter->first;
2528                 const std::pair<int,int> &bounds = cpsIter->second;
2529                 const int lb = bounds.first;
2530                 const int ub = bounds.second;
2531                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2532                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2533                 v++;
2534         }
2535         
2536         
2537         // Transition to "contracting" state
2538         simplexState = contracting;
2539         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2540
2541 }
2542
2543
2544
2545
2546 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2547         int n = controlPointSpace.size();
2548
2549         // Find worst performing point in the simplex
2550         worstPhase = -1;
2551         worstTime = -1.0;
2552         bestPhase = 10000000;
2553         bestTime = 10000000;
2554         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2555                 double t = allData.phases[*iter]->medianTime();
2556                 if(t > worstTime){
2557                         worstTime = t;
2558                         worstPhase = *iter;
2559                 }
2560                 if(t < bestTime){
2561                         bestTime = t;
2562                         bestPhase = *iter;
2563                 }
2564         }
2565         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2566
2567         best = pointCoords(allData, bestPhase);
2568         CkAssert(best.size() == n);
2569
2570         worst = pointCoords(allData, worstPhase);
2571         CkAssert(worst.size() == n);
2572
2573         // Calculate centroid of the remaining points in the simplex
2574         centroid.resize(n);
2575         for(int i=0; i<n; i++){
2576                 centroid[i] = 0.0;
2577         }
2578
2579         int numPts = 0;
2580
2581         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2582                 if(*iter != worstPhase){
2583                         numPts ++;
2584                         // Accumulate into the result vector
2585                         int c = 0;
2586                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2587                                 centroid[c] += citer->second;
2588                                 c++;
2589                         }
2590
2591                 }
2592         }
2593
2594         // Now divide the sums by the number of points.
2595         for(int v = 0; v<centroid.size(); v++) {
2596                 centroid[v] /= (double)numPts;
2597         }
2598
2599         CkAssert(centroid.size() == n);
2600
2601         for(int i=0; i<centroid.size(); i++){
2602                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2603         }
2604
2605
2606 }
2607
2608
2609
2610 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2611         std::vector<double> result;
2612         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2613                 result.push_back((double)citer->second);
2614         }
2615         return result;
2616 }
2617
2618
2619
2620
2621 void ControlPointWriteOutputToDisk(){
2622         CkAssert(CkMyPe() == 0);
2623         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2624 }
2625
2626
2627
2628 /*! @} */
2629
2630 #ifdef CP_DISABLE_TRACING
2631 #include "ControlPointsNoTrace.def.h"
2632 #else
2633 #include "ControlPoints.def.h"
2634 #endif