suppress some compiler warnings.
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15 #include <math.h>
16
17
18 #include <climits>
19 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
20  
21
22 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
23 /**  @{ */
24
25 using namespace std;
26
27 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
28
29
30 //#undef DEBUGPRINT
31 //#define DEBUGPRINT 4
32
33 static void periodicProcessControlPoints(void* ptr, double currWallTime);
34
35
36 // A pointer to this PE's controlpoint manager Proxy
37 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
38 /* readonly */ int random_seed;
39 /* readonly */ long controlPointSamplePeriod;
40 /* readonly */ int whichTuningScheme;
41 /* readonly */ bool writeDataFileAtShutdown;
42 /* readonly */ bool shouldFilterOutputData;
43 /* readonly */ bool loadDataFileAtStartup;
44 /* readonly */ bool shouldGatherMemoryUsage;
45 /* readonly */ bool shouldGatherUtilization;
46 /* readonly */ bool shouldGatherAll;
47 /* readonly */ char CPDataFilename[512];
48
49
50
51 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
52 /// These probably come from the command line arguments, so are available only on PE 0
53 std::map<std::string, int> defaultControlPointValues;
54
55
56
57 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
58
59
60
61 void printTuningScheme(){
62   switch(whichTuningScheme){
63   case RandomSelection:
64     CkPrintf("Tuning Scheme: RandomSelection\n");
65     break;
66   case AlwaysDefaults:
67     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
68     break;
69   case SimulatedAnnealing:
70     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
71     break;
72   case ExhaustiveSearch:
73     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
74     break;
75   case CriticalPathAutoPrioritization:
76     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
77     break;
78   case UseBestKnownTiming:
79     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
80     break;
81   case UseSteering:
82     CkPrintf("Tuning Scheme: UseSteering\n");
83     break;
84   case MemoryAware:
85     CkPrintf("Tuning Scheme: MemoryAware\n");
86     break;
87   case Simplex:
88     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
89     break;
90   case DivideAndConquer:
91     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
92     break;
93   case LDBPeriod:
94     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
95     break;
96   case LDBPeriodLinear:
97     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
98     break;
99   case LDBPeriodQuadratic:
100     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
101     break;
102   case LDBPeriodOptimal:
103     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
104     break;
105   default:
106     CkPrintf("Unknown tuning scheme\n");
107     break;
108   }
109   fflush(stdout);
110 }
111
112
113
114 /// A reduction type that combines idle time measurements (min/sum/max etc.)
115 CkReduction::reducerType idleTimeReductionType;
116 /// A reducer that combines idle time measurements (min/sum/max etc.)
117 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
118   double ret[3];
119   if(nMsg > 0){
120     CkAssert(msgs[0]->getSize()==3*sizeof(double));
121     double *m=(double *)msgs[0]->getData();
122     ret[0]=m[0];
123     ret[1]=m[1];
124     ret[2]=m[2];
125   }
126   for (int i=1;i<nMsg;i++) {
127     CkAssert(msgs[i]->getSize()==3*sizeof(double));
128     double *m=(double *)msgs[i]->getData();
129     ret[0]=min(ret[0],m[0]);
130     ret[1]+=m[1];
131     ret[2]=max(ret[2],m[2]);
132   }  
133   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
134 }
135
136
137
138 /// A reduction type that combines idle, overhead, and memory measurements
139 CkReduction::reducerType allMeasuresReductionType;
140 /// A reducer that combines idle, overhead, and memory measurements
141 #define ALL_REDUCTION_SIZE 12
142 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
143   double ret[ALL_REDUCTION_SIZE];
144   if(nMsg > 0){
145     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
146     double *m=(double *)msgs[0]->getData();
147     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
148   }
149   for (int i=1;i<nMsg;i++) {
150     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
151     double *m=(double *)msgs[i]->getData();
152     // idle time (min/sum/max)
153     ret[0]=min(ret[0],m[0]);
154     ret[1]+=m[1];
155     ret[2]=max(ret[2],m[2]);
156     // overhead time (min/sum/max)
157     ret[3]=min(ret[3],m[3]);
158     ret[4]+=m[4];
159     ret[5]=max(ret[5],m[5]);
160     // mem usage (max)
161     ret[6]=max(ret[6],m[6]);
162     // bytes per invocation for two types of entry methods
163     ret[7]+=m[7];
164     ret[8]+=m[8];
165     ret[9]+=m[9];
166     ret[10]+=m[10];
167     // Grain size (avg)
168     ret[11]+=m[11];
169   }  
170   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
171 }
172
173
174 /// Registers the control point framework's reduction handlers at startup on each PE
175 /*initproc*/ void registerCPReductions(void) {
176   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
177   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
178 }
179
180
181
182
183
184
185 /// Return an integer between 0 and num-1 inclusive
186 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
187 unsigned int randInt(unsigned int num, const char* name, int seed=0){
188   CkAssert(num > 0);
189   CkAssert(num < 1000);
190
191   unsigned long hash = 0;
192   unsigned int c;
193   unsigned char * str = (unsigned char*)name;
194
195   while (c = *str++){
196     unsigned int c2 = (c+64)%128;
197     unsigned int c3 = (c2*5953)%127;
198     hash = c3 + (hash << 6) + (hash << 16) - hash;
199   }
200
201   unsigned long shuffled1 = (hash*2083)%7907;
202   unsigned long shuffled2 = (seed*4297)%2017;
203
204   unsigned long shuffled3 = (random_seed*4799)%7919;
205
206   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
207
208   unsigned int result = ((namehash * 6029) % 1117) % num;
209
210   CkAssert(result >=0 && result < num);
211   return result;
212 }
213
214
215
216 controlPointManager::controlPointManager() {
217   generatedPlanForStep = -1;
218
219     exitWhenReady = false;
220     alreadyRequestedMemoryUsage = false;   
221     alreadyRequestedIdleTime = false;
222     alreadyRequestedAll = false;
223     
224     instrumentedPhase * newPhase = new instrumentedPhase();
225     allData.phases.push_back(newPhase);   
226     
227     frameworkShouldAdvancePhase = false;
228     haveControlPointChangeCallback = false;
229 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
230     
231     ControlPoint::initControlPointEffects();
232
233     phase_id = 0;
234
235     if(loadDataFileAtStartup){    
236       loadDataFile();
237     }
238
239     
240     if(CkMyPe() == 0){
241       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
242     }
243
244     traceRegisterUserEvent("No currently executing message", 5000);
245     traceRegisterUserEvent("Zero time along critical path", 5010);
246     traceRegisterUserEvent("Positive total time along critical path", 5020);
247     traceRegisterUserEvent("env->setPathHistory()", 6000);
248     traceRegisterUserEvent("Critical Path", 5900);
249     traceRegisterUserEvent("Table Entry", 5901);
250
251
252 #if USER_EVENT_FOR_REGISTERTERMINALPATH
253     traceRegisterUserEvent("registerTerminalPath", 100); 
254 #endif
255
256   }
257   
258
259  controlPointManager::~controlPointManager(){
260 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
261   }
262
263
264   /// Loads the previous run data file
265   void controlPointManager::loadDataFile(){
266     ifstream infile(CPDataFilename);
267     vector<std::string> names;
268     std::string line;
269   
270     while(getline(infile,line)){
271       if(line[0] != '#')
272         break;
273     }
274   
275     int numTimings = 0;
276     std::istringstream n(line);
277     n >> numTimings;
278   
279     while(getline(infile,line)){ 
280       if(line[0] != '#') 
281         break; 
282     }
283
284     int numControlPointNames = 0;
285     std::istringstream n2(line);
286     n2 >> numControlPointNames;
287   
288     for(int i=0; i<numControlPointNames; i++){
289       getline(infile,line);
290       names.push_back(line);
291     }
292
293     for(int i=0;i<numTimings;i++){
294       getline(infile,line);
295       while(line[0] == '#')
296         getline(infile,line); 
297
298       instrumentedPhase * ips = new instrumentedPhase();
299
300       std::istringstream iss(line);
301
302       // Read memory usage for phase
303       iss >> ips->memoryUsageMB;
304       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
305
306       // Read idle time
307       iss >> ips->idleTime.min;
308       iss >> ips->idleTime.avg;
309       iss >> ips->idleTime.max;
310
311       // Read overhead time
312       iss >> ips->overheadTime.min;
313       iss >> ips->overheadTime.avg;
314       iss >> ips->overheadTime.max;
315
316       // read bytePerInvoke
317       iss >> ips->bytesPerInvoke;
318
319       // read grain size
320       iss >> ips->grainSize;
321
322       // Read control point values
323       for(int cp=0;cp<numControlPointNames;cp++){
324         int cpvalue;
325         iss >> cpvalue;
326         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
327       }
328
329       // ignore median time
330       double mt;
331       iss >> mt;
332
333       // Read all times
334
335       double time;
336
337       while(iss >> time){
338         ips->times.push_back(time);
339 #if DEBUGPRINT > 5
340         CkPrintf("read time %lf from file\n", time);
341 #endif
342       }
343
344       allData.phases.push_back(ips);
345
346     }
347
348     infile.close();
349   }
350
351
352
353   /// Add the current data to allData and output it to a file
354   void controlPointManager::writeDataFile(){
355     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
356     ofstream outfile(CPDataFilename);
357     allData.cleanupNames();
358
359 //    string s = allData.toString();
360 //    CkPrintf("At end: \n %s\n", s.c_str());
361
362     if(shouldFilterOutputData){
363       allData.verify();
364       allData.filterOutIncompletePhases();
365     }
366
367 //    string s2 = allData.toString();
368 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
369     if(allData.toString().length() > 10){
370       outfile << allData.toString();
371     } else {
372       outfile << " No data available to save to disk " << endl;
373     }
374     outfile.close();
375   }
376
377   /// User can register a callback that is called when application should advance to next phase
378   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
379     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
380     controlPointChangeCallback = cb;
381     haveControlPointChangeCallback = true;
382   }
383
384
385 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
386 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
387   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
388 }
389
390   /// Called periodically by the runtime to handle the control points
391   /// Currently called on each PE
392   void controlPointManager::processControlPoints(){
393
394     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
395
396
397     //==========================================================================================
398     // Print the data for each phase
399
400     const int s = allData.phases.size();
401
402 #if DEBUGPRINT
403     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
404     for(int p=0;p<s;++p){
405       const instrumentedPhase &phase = allData.phases[p];
406       const idleTimeContainer &idle = phase.idleTime;
407       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
408       vector<double> const &times = phase.times;
409
410       CkPrintf("Phase %d:\n", p);
411       idle.print();
412      //   CkPrintf("critical paths: (* affected by control point)\n");
413         //   for(int i=0;i<criticalPaths.size(); i++){
414         // If affected by a control point
415         //      criticalPaths[i].print();
416
417       //        criticalPaths[i].print();
418       //        CkPrintf("\n");
419         
420
421       //   }
422       CkPrintf("Timings:\n");
423       for(int i=0;i<times.size(); i++){
424         CkPrintf("%lf ", times[i]);
425       }
426       CkPrintf("\n");
427
428     }
429     
430     CkPrintf("\n\n");
431
432
433 #endif
434
435
436
437     //==========================================================================================
438     // If this is a phase during which we try to adapt control point values based on critical path
439 #if 0
440     if( s%5 == 4) {
441
442       // Find the most recent phase with valid critical path data and idle time measurements      
443       int whichPhase=-1;
444       for(int p=0; p<s; ++p){
445         const instrumentedPhase &phase = allData.phases[p];
446         const idleTimeContainer &idle = phase.idleTime;
447         if(idle.isValid() && phase.criticalPaths.size()>0 ){
448           whichPhase = p;
449         }
450       }
451       
452       
453       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
454       const instrumentedPhase &phase = allData.phases[whichPhase];
455       const idleTimeContainer &idle = phase.idleTime;
456       
457       if(idle.min > 0.1){
458         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
459         
460         // Determine the median critical path for this phase
461         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
462         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
463         CkAssert(phase.criticalPaths.size() > 0);
464         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
465         
466         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
467         
468         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
469           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
470
471
472           CkPrintf("Finding control points related to the critical path\n");
473           int cpcount = 0;
474           std::set<std::string> controlPointsAffectingCriticalPath;
475
476           
477           for(int e=0;e<path.getNumUsed();e++){
478             if(path.getUsedCount(e)>0){
479               int ep = path.getUsedEp(e);
480
481               std::map<std::string, std::set<int> >::iterator iter;
482               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
483                 if(iter->second.count(ep)>0){
484                   controlPointsAffectingCriticalPath.insert(iter->first);
485                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
486                   cpcount++;
487                 }
488               }
489               
490             }
491           }
492           
493
494           if(cpcount==0){
495             CkPrintf("No control points are known to affect the critical path\n");
496           } else {
497             double beginTime = CmiWallTimer();
498
499             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
500             
501             newControlPoints = phase.controlPoints;
502             
503             if(frameworkShouldAdvancePhase){
504               gotoNextPhase();  
505             }
506             
507             if(haveControlPointChangeCallback){ 
508 #if DEBUGPRINT
509               CkPrintf("Calling control point change callback\n");
510 #endif
511               // Create a high priority message and send it to the callback
512               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
513               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
514               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
515               controlPointChangeCallback.send(msg);
516               
517             }
518             
519             
520             // adjust the control points that can affect the critical path
521
522             char textDescription[4096*2];
523             textDescription[0] = '\0';
524
525             std::map<std::string,int>::iterator newCP;
526             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
527               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
528                 // decrease the value (increase priority) if within range
529                 int lowerbound = controlPointSpace[newCP->first].first;
530                 if(newCP->second > lowerbound){
531                   newControlPointsAvailable = true;
532                   newCP->second --;
533                 }
534               }
535             }
536            
537             // Create a string for a projections user event
538             if(1){
539               std::map<std::string,int>::iterator newCP;
540               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
541                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
542               }
543             }
544
545             char *userEventString = new char[4096*2];
546             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
547             
548             static int userEventCounter = 20000;
549             CkPrintf("USER EVENT: %s\n", userEventString);
550             
551             traceRegisterUserEvent(userEventString, userEventCounter); 
552             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
553             userEventCounter++;
554             
555           }
556           
557         } else {
558           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
559         }
560         
561         
562       }
563       
564     } else {
565       
566       
567     }
568    
569 #endif
570
571
572
573     if(frameworkShouldAdvancePhase){
574       gotoNextPhase();  
575     }
576     
577     if(haveControlPointChangeCallback){ 
578       // Create a high priority message and send it to the callback
579       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
580       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
581       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
582       controlPointChangeCallback.send(msg);
583     }
584     
585   }
586   
587   /// Determine if any control point is known to affect an entry method
588   bool controlPointManager::controlPointAffectsThisEP(int ep){
589     std::map<std::string, std::set<int> >::iterator iter;
590     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
591       if(iter->second.count(ep)>0){
592         return true;
593       }
594     }
595     return false;    
596   }
597   
598   /// Determine if any control point is known to affect a chare array  
599   bool controlPointManager::controlPointAffectsThisArray(int array){
600     std::map<std::string, std::set<int> >::iterator iter;
601     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
602       if(iter->second.count(array)>0){
603         return true;
604       }
605     }
606     return false;   
607   }
608   
609
610   /// The data from the current phase
611   instrumentedPhase * controlPointManager::currentPhaseData(){
612     int s = allData.phases.size();
613     CkAssert(s>=1);
614     return allData.phases[s-1];
615   }
616  
617
618   /// The data from the previous phase
619   instrumentedPhase * controlPointManager::previousPhaseData(){
620     int s = allData.phases.size();
621     if(s >= 2 && phase_id > 0) {
622       return allData.phases[s-2];
623     } else {
624       return NULL;
625     }
626   }
627  
628   /// The data from two phases back
629   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
630     int s = allData.phases.size();
631     if(s >= 3 && phase_id > 0) {
632       return allData.phases[s-3];
633     } else {
634       return NULL;
635     }
636   }
637   
638
639   /// Called by either the application or the Control Point Framework to advance to the next phase  
640   void controlPointManager::gotoNextPhase(){
641     
642 #ifndef CP_DISABLE_TRACING
643     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
644     fflush(stdout);
645
646     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
647       alreadyRequestedAll = true;
648       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
649       CkPrintf("Requesting all measurements\n");
650       thisProxy.requestAll(*cb);
651       delete cb;
652     
653     } else {
654       
655       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
656         alreadyRequestedMemoryUsage = true;
657         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
658         thisProxy.requestMemoryUsage(*cb);
659         delete cb;
660       }
661       
662       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
663         alreadyRequestedIdleTime = true;
664         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
665         thisProxy.requestIdleTime(*cb);
666         delete cb;
667       }
668     }
669     
670
671 #endif
672
673
674
675 #if CMK_LBDB_ON && 0
676     LBDatabase * myLBdatabase = LBDatabaseObj();
677     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
678     const CkVec<LBObj*> objs = myLBDB->getObjs();
679     const int objCount = myLBDB->getObjCount();
680     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
681     
682     floatType maxObjWallTime = -1.0;
683     
684     for(int i=0;i<objs.length();i++){
685       LBObj* o = objs[i];
686       const LDObjData d = o->ObjData();
687       floatType cpuTime = d.cpuTime;
688       floatType wallTime = d.wallTime;
689       // can also get object handles from the LDObjData struct
690       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
691       if(wallTime > maxObjWallTime){
692
693       }
694       
695     }
696
697     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
698     
699 #endif    
700
701
702     
703     // increment phase id
704     phase_id++;
705     
706
707     // Create new entry for the phase we are starting now
708     instrumentedPhase * newPhase = new instrumentedPhase();
709     allData.phases.push_back(newPhase);
710     
711     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
712
713   }
714
715   /// An application uses this to register an instrumented timing for this phase
716   void controlPointManager::setTiming(double time){
717     currentPhaseData()->times.push_back(time);
718
719 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
720        
721     // First we should register this currently executing message as a path, because it is likely an important one to consider.
722     //    registerTerminalEntryMethod();
723     
724     // save the critical path for this phase
725     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
726     //    maxTerminalPathHistory.reset();
727     
728     
729     // Reset the counts for the currently executing message
730     //resetThisEntryPath();
731         
732 #endif
733     
734   }
735   
736   /// Entry method called on all PEs to request CPU utilization statistics
737   void controlPointManager::requestIdleTime(CkCallback cb){
738 #ifndef CP_DISABLE_TRACING
739     double i = localControlPointTracingInstance()->idleRatio();
740     double idle[3];
741     idle[0] = i;
742     idle[1] = i;
743     idle[2] = i;
744     
745     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
746     
747     localControlPointTracingInstance()->resetTimings();
748
749     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
750 #else
751     CkAbort("Should not get here\n");
752 #endif
753   }
754   
755   /// All processors reduce their memory usages in requestIdleTime() to this method
756   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
757 #ifndef CP_DISABLE_TRACING
758     int size=msg->getSize() / sizeof(double);
759     CkAssert(size==3);
760     double *r=(double *) msg->getData();
761         
762     instrumentedPhase* prevPhase = previousPhaseData();
763     if(prevPhase != NULL){
764       prevPhase->idleTime.min = r[0];
765       prevPhase->idleTime.avg = r[1]/CkNumPes();
766       prevPhase->idleTime.max = r[2];
767       prevPhase->idleTime.print();
768       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
769     } else {
770       CkPrintf("There is no previous phase to store the idle time measurements\n");
771     }
772     
773     alreadyRequestedIdleTime = false;
774     checkForShutdown();
775     delete msg;
776 #else
777     CkAbort("Should not get here\n");
778 #endif
779   }
780
781
782
783
784
785
786   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
787   void controlPointManager::requestAll(CkCallback cb){
788 #ifndef CP_DISABLE_TRACING
789     TraceControlPoints *t = localControlPointTracingInstance();
790
791     double data[ALL_REDUCTION_SIZE];
792
793     double *idle = data;
794     double *over = data+3;
795     double *mem = data+6;
796     double *msgBytes = data+7;  
797     double *grainsize = data+11;  
798
799     const double i = t->idleRatio();
800     idle[0] = i;
801     idle[1] = i;
802     idle[2] = i;
803
804     const double o = t->overheadRatio();
805     over[0] = o;
806     over[1] = o;
807     over[2] = o;
808
809     const double m = t->memoryUsageMB();
810     mem[0] = m;
811
812     msgBytes[0] = t->b2;
813     msgBytes[1] = t->b3;
814     msgBytes[2] = t->b2mlen;
815     msgBytes[3] = t->b3mlen;
816
817     grainsize[0] = t->grainSize();
818     
819     localControlPointTracingInstance()->resetAll();
820
821     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
822 #else
823     CkAbort("Should not get here\n");
824 #endif
825   }
826   
827   /// All processors reduce their memory usages in requestIdleTime() to this method
828   void controlPointManager::gatherAll(CkReductionMsg *msg){
829 #ifndef CP_DISABLE_TRACING
830     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
831     int size=msg->getSize() / sizeof(double);
832     double *data=(double *) msg->getData();
833         
834     double *idle = data;
835     double *over = data+3;
836     double *mem = data+6;
837     double *msgBytes = data+7;
838     double *granularity = data+11;
839
840
841     //    std::string b = allData.toString();
842
843     instrumentedPhase* prevPhase = previousPhaseData();
844     if(prevPhase != NULL){
845       prevPhase->idleTime.min = idle[0];
846       prevPhase->idleTime.avg = idle[1]/CkNumPes();
847       prevPhase->idleTime.max = idle[2];
848       
849       prevPhase->overheadTime.min = over[0];
850       prevPhase->overheadTime.avg = over[1]/CkNumPes();
851       prevPhase->overheadTime.max = over[2];
852       
853       prevPhase->memoryUsageMB = mem[0];
854       
855       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
856
857       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
858       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
859
860       /** The average of the grain sizes on all PEs in us */
861       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
862
863       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
864       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
865
866       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
867       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
868
869       if(bytesPerInvoke2 > bytesPerInvoke3)
870         prevPhase->bytesPerInvoke = bytesPerInvoke2;
871       else
872         prevPhase->bytesPerInvoke = bytesPerInvoke3;
873
874       //      prevPhase->print();
875       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
876
877       //      std::string a = allData.toString();
878
879       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
880       
881     } else {
882       CkPrintf("There is no previous phase to store measurements\n");
883     }
884     
885     alreadyRequestedAll = false;
886     checkForShutdown();
887     delete msg;
888 #else
889     CkAbort("Should not get here\n");
890 #endif
891   }
892
893
894
895
896   void controlPointManager::checkForShutdown(){
897     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
898       doExitNow();
899     }
900   }
901
902
903   void controlPointManager::exitIfReady(){
904      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
905        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
906        doExitNow();
907      } else {
908        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
909        exitWhenReady = true;
910      }
911   }
912
913
914
915   void controlPointManager::doExitNow(){
916           writeOutputToDisk();
917           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
918           CkExit();
919   }
920
921   void controlPointManager::writeOutputToDisk(){
922           if(writeDataFileAtShutdown){
923                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
924           }
925   }
926
927
928   /// Entry method called on all PEs to request memory usage
929   void controlPointManager::requestMemoryUsage(CkCallback cb){
930     int m = CmiMaxMemoryUsage() / 1024 / 1024;
931     CmiResetMaxMemory();
932     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
933     contribute(sizeof(int),&m,CkReduction::max_int, cb);
934   }
935
936   /// All processors reduce their memory usages to this method
937   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
938     int size=msg->getSize() / sizeof(int);
939     CkAssert(size==1);
940     int *m=(int *) msg->getData();
941
942     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
943     
944     instrumentedPhase* prevPhase = previousPhaseData();
945     if(prevPhase != NULL){
946       prevPhase->memoryUsageMB = m[0];
947     } else {
948       CkPrintf("No place to store memory usage");
949     }
950
951     alreadyRequestedMemoryUsage = false;
952     checkForShutdown();
953     delete msg;
954   }
955
956
957 //   /// Inform the control point framework that a named control point affects the priorities of some array  
958 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
959 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
960     
961 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
962 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
963 //     } else {
964 //       std::set<int> s;
965 //       s.insert(groupIdx);
966 //       affectsPrioritiesArray[std::string(name)] = s;
967 //     }
968     
969 // #if DEBUGPRINT   
970 //     std::map<std::string, std::set<int> >::iterator f;
971 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
972 //       std::string name = f->first;
973 //       std::set<int> &vals = f->second;
974 //       cout << "Control point " << name << " affects arrays: ";
975 //       std::set<int>::iterator i;
976 //       for(i=vals.begin(); i!=vals.end();++i){
977 //      cout << *i << " ";
978 //       }
979 //       cout << endl;
980 //     }
981 // #endif
982     
983 //   }
984   
985 //   /// Inform the control point framework that a named control point affects the priority of some entry method
986 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
987 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
988
989 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
990 //       affectsPrioritiesEP[std::string(name)].insert(idx);
991 //     } else {
992 //       std::set<int> s;
993 //       s.insert(idx);
994 //       affectsPrioritiesEP[std::string(name)] = s;
995 //     }
996     
997 // #if DEBUGPRINT
998 //     std::map<std::string, std::set<int> >::iterator f;
999 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1000 //       std::string name = f->first;
1001 //       std::set<int> &vals = f->second;
1002 //       cout << "Control point " << name << " affects EP: ";
1003 //       std::set<int>::iterator i;
1004 //       for(i=vals.begin(); i!=vals.end();++i){
1005 //      cout << *i << " ";
1006 //       }
1007 //       cout << endl;
1008 //     }
1009 // #endif
1010
1011
1012 //   }
1013   
1014
1015
1016 /// An interface callable by the application.
1017 void gotoNextPhase(){
1018   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1019 }
1020
1021 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1022 {
1023   gotoNextPhase();
1024 }
1025
1026
1027 /// A mainchare that is used just to create our controlPointManager group at startup
1028 class controlPointMain : public CBase_controlPointMain {
1029 public:
1030   controlPointMain(CkArgMsg* args){
1031 #if OLDRANDSEED
1032     struct timeval tp;
1033     gettimeofday(& tp, NULL);
1034     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1035 #else
1036     double time = CmiWallTimer();
1037     int sec = (int)time;
1038     int usec = (int)((time - (double)sec)*1000000.0);
1039     random_seed =  sec ^ usec;
1040 #endif
1041     
1042     
1043     double period;
1044     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1045     if(haveSamplePeriod){
1046       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1047       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1048     } else {
1049       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1050     }
1051     
1052     
1053     
1054     whichTuningScheme = RandomSelection;
1055
1056
1057     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1058       whichTuningScheme = RandomSelection;
1059     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1060       whichTuningScheme = ExhaustiveSearch;
1061     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1062       whichTuningScheme = AlwaysDefaults;
1063     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1064       whichTuningScheme = SimulatedAnnealing;
1065     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1066       whichTuningScheme = CriticalPathAutoPrioritization;
1067     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1068       whichTuningScheme = UseBestKnownTiming;
1069     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1070       whichTuningScheme = UseSteering;
1071     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1072       whichTuningScheme = MemoryAware;
1073     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1074       whichTuningScheme = Simplex;
1075     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1076       whichTuningScheme = DivideAndConquer;
1077     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1078       whichTuningScheme = LDBPeriod;
1079     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1080       whichTuningScheme = LDBPeriodLinear;
1081     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1082       whichTuningScheme = LDBPeriodQuadratic;
1083     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1084       whichTuningScheme = LDBPeriodOptimal;
1085     }
1086
1087     char *defValStr = NULL;
1088     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1089       CkPrintf("You specified default value string: %s\n", defValStr);
1090       
1091       // Parse the string, looking for commas
1092      
1093
1094       char *tok = strtok(defValStr, ",");
1095       while (tok) {
1096         // split on the equal sign
1097         int len = strlen(tok);
1098         char *eqsign = strchr(tok, '=');
1099         if(eqsign != NULL && eqsign != tok){
1100           *eqsign = '\0';
1101           char *cpname = tok;
1102           std::string cpName(tok);
1103           char *cpDefVal = eqsign+1;      
1104           int v=-1;
1105           if(sscanf(cpDefVal, "%d", &v) == 1){
1106             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1107             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1108             defaultControlPointValues[cpName] = v;
1109           }
1110         }
1111         tok = strtok(NULL, ",");
1112       }
1113
1114     }
1115
1116     shouldGatherAll = false;
1117     shouldGatherMemoryUsage = false;
1118     shouldGatherUtilization = false;
1119     
1120     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1121       shouldGatherAll = true;
1122     } else {
1123       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1124         shouldGatherMemoryUsage = true;
1125       }
1126       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1127         shouldGatherUtilization = true;
1128       }
1129     }
1130     
1131     writeDataFileAtShutdown = false;   
1132     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1133       writeDataFileAtShutdown = true;
1134     }
1135
1136     shouldFilterOutputData = true;
1137     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1138       shouldFilterOutputData = false;
1139     }
1140
1141
1142    loadDataFileAtStartup = false;   
1143     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1144       loadDataFileAtStartup = true;
1145     }
1146
1147     char *cpdatafile;
1148     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1149       sprintf(CPDataFilename, "%s", cpdatafile);
1150     } else {
1151       sprintf(CPDataFilename, "controlPointData.txt");
1152     }
1153
1154
1155     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1156   }
1157   ~controlPointMain(){}
1158 };
1159
1160 /// An interface callable by the application.
1161 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1162   CkAssert(CkMyPe() == 0);
1163   CkPrintf("Application has registered a control point change callback\n");
1164   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1165 }
1166
1167 /// An interface callable by the application.
1168 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1169   if(CkMyPe() == 0){
1170     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1171     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1172   }
1173 }
1174
1175 /// An interface callable by the application.
1176 void registerControlPointTiming(double time){
1177   CkAssert(CkMyPe() == 0);
1178 #if DEBUGPRINT>0
1179   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1180 #endif
1181   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1182 }
1183
1184 /// An interface callable by the application.
1185 void controlPointTimingStamp() {
1186   CkAssert(CkMyPe() == 0);
1187 #if DEBUGPRINT>0
1188   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1189 #endif
1190   
1191   static double prev_time = 0.0;
1192   double t = CmiWallTimer();
1193   double duration = t - prev_time;
1194   prev_time = t;
1195     
1196   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1197 }
1198
1199 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1200 {
1201   controlPointTimingStamp();
1202 }
1203
1204
1205
1206
1207
1208 /// Shutdown the control point framework, writing data to disk if necessary
1209 extern "C" void controlPointShutdown(){
1210   if(CkMyPe() == 0){
1211
1212     // wait for gathering of idle time & memory usage to complete
1213     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1214
1215   }
1216 }
1217
1218 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1219 void controlPointInitNode(){
1220 //  CkPrintf("controlPointInitNode()\n");
1221   registerExitFn(controlPointShutdown);
1222 }
1223
1224 /// Called periodically to allow control point framework to do things periodically
1225 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1226 #ifdef DEBUGPRINT
1227   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1228 #endif
1229   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1230   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1231 }
1232
1233
1234
1235
1236
1237 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1238 /// user observed characteristic to adapt specific control point values.
1239 /// This function must return valid values for newControlPoints.
1240 void controlPointManager::generatePlan() {
1241   const double startGenerateTime = CmiWallTimer();
1242   const int phase_id = this->phase_id;
1243   const int effective_phase = allData.phases.size();
1244
1245   // Only generate a plan once per phase
1246   if(generatedPlanForStep == phase_id) 
1247     return;
1248   generatedPlanForStep = phase_id;
1249  
1250   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1251   printTuningScheme();
1252
1253   // By default lets put the previous phase data into newControlPoints
1254   instrumentedPhase *prevPhase = previousPhaseData();
1255   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1256           newControlPoints[cpsIter->first] = cpsIter->second;
1257   }
1258
1259
1260   if( whichTuningScheme == RandomSelection ){
1261     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1262     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1263       const std::string &name = cpsIter->first;
1264       const std::pair<int,int> &bounds = cpsIter->second;
1265       const int lb = bounds.first;
1266       const int ub = bounds.second;
1267       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1268     }
1269   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1270     // -----------------------------------------------------------
1271     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1272     //   
1273     // This scheme will return the median value for the range 
1274     // early on, and then will transition over to the new control points
1275     // determined by the critical path adapting code
1276
1277     // This won't work until the periodic function is fixed up a bit
1278
1279     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1280     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1281       const std::string &name = cpsIter->first;
1282       const std::pair<int,int> &bounds = cpsIter->second;
1283       const int lb = bounds.first;
1284       const int ub = bounds.second;
1285       newControlPoints[name] =  (lb+ub)/2;
1286     }
1287
1288   } else if ( whichTuningScheme == MemoryAware ) {
1289
1290     // -----------------------------------------------------------
1291     //  STEERING BASED ON MEMORY USAGE
1292
1293     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1294     instrumentedPhase *prevPhase = previousPhaseData();
1295  
1296     if(phase_id%2 == 0){
1297       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1298       twoAgoPhase->print();
1299       CkPrintf("\n");
1300       fflush(stdout);
1301       
1302       // See if memory usage is low:
1303       double memUsage = twoAgoPhase->memoryUsageMB;
1304       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1305       fflush(stdout);
1306       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1307         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1308         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1309         
1310         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1311         newControlPoints = twoAgoPhase->controlPoints;
1312
1313
1314         CkPrintf("Steering (memory based) initialized plan\n");
1315         fflush(stdout);
1316
1317         // look for a possible control point knob to turn
1318         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1319         
1320         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1321         bool found = false;
1322         std::string cpName;
1323         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1324         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1325         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1326           cpName = iter->first;
1327           info = &iter->second;
1328           found = true;
1329           break;
1330         }
1331
1332         // Adapt the control point value
1333         if(found){
1334           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1335           fflush(stdout);
1336           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1337           const int maxValue = controlPointSpace[cpName].second;
1338           
1339           if(twoAgoValue+1 <= maxValue){
1340             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1341           }
1342         }
1343         
1344       }
1345     }
1346
1347     CkPrintf("Steering (memory based) done for this phase\n");
1348     fflush(stdout);
1349
1350   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1351
1352     // -----------------------------------------------------------
1353     //  USE BEST KNOWN TIME  
1354     static bool firstTime = true;
1355     if(firstTime){
1356       firstTime = false;
1357       instrumentedPhase *best = allData.findBest(); 
1358       CkPrintf("Best known phase is:\n");
1359       best->print();
1360       CkPrintf("\n");
1361       
1362       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1363       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1364         const std::string &name = cpsIter->first;
1365         newControlPoints[name] =  best->controlPoints[name];
1366       }
1367     }
1368   } else if( whichTuningScheme == LDBPeriod) {
1369     // Assume this is used in this manner:
1370     //  1) go to next phase
1371     //  2) request control point
1372     //  3) load balancing
1373     //  4) computation
1374     
1375     
1376     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1377     instrumentedPhase *prevPhase = previousPhaseData();
1378     
1379     
1380     const std::vector<double> &times = twoAgoPhase->times;
1381     const int oldNumTimings = times.size();
1382
1383
1384     const std::vector<double> &timesNew = prevPhase->times;
1385     const int newNumTimings = timesNew.size();
1386
1387
1388     if(oldNumTimings > 4 && newNumTimings > 4){
1389       
1390       // Build model of execution time based on two phases ago
1391       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1392       
1393       double oldSum = 0;
1394       
1395       for(int i=2; i<oldNumTimings; i++){
1396         oldSum += times[i];
1397       }
1398       
1399       const double oldAvg = oldSum / (oldNumTimings-2);
1400       
1401       
1402       
1403       
1404       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1405       const double expectedTotalTime = oldAvg * newNumTimings;
1406       
1407       
1408       // Measure actual time
1409       double newSum = 0.0;
1410       for(int i=2; i<newNumTimings; ++i){
1411         newSum += timesNew[i];
1412       }
1413       
1414       const double newAvg = newSum / (newNumTimings-2);
1415       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1416       
1417       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1418       
1419       // Determine load balance cost
1420       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1421       
1422       const double benefitAfterLB = benefit - lbcost;
1423     
1424     
1425       // Determine whether LB cost outweights the estimated benefit
1426       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1427     
1428     
1429       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1430       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1431         const std::string &name = cpsIter->first;
1432         const std::pair<int,int> &bounds = cpsIter->second;
1433         const int lb = bounds.first;
1434         const int ub = bounds.second;
1435       
1436         if(benefitAfterLB > 0){
1437           CkPrintf("Constant Model: Beneficial LB\n");
1438           int newval = newControlPoints[name] / 2;
1439           if(newval > lb)
1440             newControlPoints[name] = newval;
1441           else 
1442             newControlPoints[name] = lb;
1443         } else {
1444           CkPrintf("Constant Model: Detrimental LB\n");
1445           int newval = newControlPoints[name] * 2;
1446           if(newval < ub)
1447             newControlPoints[name] = newval;
1448           else
1449             newControlPoints[name] = ub;
1450         }
1451       }
1452     }
1453     
1454     
1455   }  else if( whichTuningScheme == LDBPeriodLinear) {
1456     // Assume this is used in this manner:
1457     //  1) go to next phase
1458     //  2) request control point
1459     //  3) load balancing
1460     //  4) computation
1461
1462
1463     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1464     instrumentedPhase *prevPhase = previousPhaseData();
1465     
1466     const std::vector<double> &times = twoAgoPhase->times;
1467     const int oldNumTimings = times.size();
1468
1469     const std::vector<double> &timesNew = prevPhase->times;
1470     const int newNumTimings = timesNew.size();
1471     
1472
1473     if(oldNumTimings > 4 && newNumTimings > 4){
1474
1475       // Build model of execution time based on two phases ago
1476       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1477       const int b1 = 2 + (oldNumTimings-2)/2;
1478       double s1 = 0;
1479       double s2 = 0;
1480     
1481       const double ldbStepsTime = times[0] + times[1];
1482     
1483       for(int i=2; i<b1; i++){
1484         s1 += times[i];
1485       }
1486       for(int i=b1; i<oldNumTimings; i++){
1487         s2 += times[i];
1488       }
1489       
1490       
1491       // Compute the estimated time for the last phase's data
1492     
1493       const double a1 = s1 / (double)(b1-2);
1494       const double a2 = s2 / (double)(oldNumTimings-b1);
1495       const double aold = (a1+a2)/2.0;
1496
1497       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1498         
1499     
1500       // Measure actual time
1501       double sum = 0.0;
1502       for(int i=2; i<newNumTimings; ++i){
1503         sum += timesNew[i];
1504       }
1505
1506       const double avg = sum / ((double)(newNumTimings-2));
1507       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1508
1509       const double benefit = expectedTotalTime - actualTotalTime;
1510
1511       // Determine load balance cost
1512       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1513
1514       const double benefitAfterLB = benefit - lbcost;
1515
1516     
1517       // Determine whether LB cost outweights the estimated benefit
1518       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1519     
1520     
1521     
1522       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1523       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1524         const std::string &name = cpsIter->first;
1525         const std::pair<int,int> &bounds = cpsIter->second;
1526         const int lb = bounds.first;
1527         const int ub = bounds.second;
1528       
1529         if(benefitAfterLB > 0){
1530           CkPrintf("Linear Model: Beneficial LB\n");
1531           int newval = newControlPoints[name] / 2;
1532           if(newval > lb)
1533             newControlPoints[name] = newval;
1534           else 
1535             newControlPoints[name] = lb;
1536         } else {
1537           CkPrintf("Linear Model: Detrimental LB\n");
1538           int newval = newControlPoints[name] * 2;
1539           if(newval < ub)
1540             newControlPoints[name] = newval;
1541           else 
1542             newControlPoints[name] = ub;
1543         }
1544       }
1545     }
1546
1547   }
1548
1549   else if( whichTuningScheme == LDBPeriodQuadratic) {
1550     // Assume this is used in this manner:
1551     //  1) go to next phase
1552     //  2) request control point
1553     //  3) load balancing
1554     //  4) computation
1555
1556
1557     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1558     instrumentedPhase *prevPhase = previousPhaseData();
1559         
1560     const std::vector<double> &times = twoAgoPhase->times;
1561     const int oldNumTimings = times.size();
1562
1563     const std::vector<double> &timesNew = prevPhase->times;
1564     const int newNumTimings = timesNew.size();
1565
1566     
1567     if(oldNumTimings > 4 && newNumTimings > 4){
1568
1569
1570       // Build model of execution time based on two phases ago
1571       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1572       const int b1 = 2 + (oldNumTimings-2)/3;
1573       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1574       double s1 = 0;
1575       double s2 = 0;
1576       double s3 = 0;
1577
1578       const double ldbStepsTime = times[0] + times[1];
1579     
1580       for(int i=2; i<b1; i++){
1581         s1 += times[i];
1582       }
1583       for(int i=b1; i<b2; i++){
1584         s2 += times[i];
1585       }
1586       for(int i=b2; i<oldNumTimings; i++){
1587         s3 += times[i];
1588       }
1589
1590     
1591       // Compute the estimated time for the last phase's data
1592     
1593       const double a1 = s1 / (double)(b1-2);
1594       const double a2 = s2 / (double)(b2-b1);
1595       const double a3 = s3 / (double)(oldNumTimings-b2);
1596     
1597       const double a = (a1-2.0*a2+a3)/2.0;
1598       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1599       const double c = a3;
1600     
1601       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1602       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1603       const double x2 = 0.5;
1604       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1605    
1606     
1607       // Measure actual time
1608       double sum = 0.0;
1609       for(int i=2; i<newNumTimings; ++i){
1610         sum += timesNew[i];
1611       }
1612
1613       const double avg = sum / ((double)(newNumTimings-2));
1614       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1615
1616       const double benefit = expectedTotalTime - actualTotalTime;
1617
1618       // Determine load balance cost
1619       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1620
1621       const double benefitAfterLB = benefit - lbcost;
1622
1623     
1624       // Determine whether LB cost outweights the estimated benefit
1625       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1626     
1627     
1628     
1629       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1630       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1631         const std::string &name = cpsIter->first;
1632         const std::pair<int,int> &bounds = cpsIter->second;
1633         const int lb = bounds.first;
1634         const int ub = bounds.second;
1635       
1636         if(benefitAfterLB > 0){
1637           CkPrintf("QuadraticModel: Beneficial LB\n");
1638           int newval = newControlPoints[name] / 2;
1639           if(newval > lb)
1640             newControlPoints[name] = newval;
1641           else 
1642             newControlPoints[name] = lb;
1643         } else {
1644           CkPrintf("QuadraticModel: Detrimental LB\n");
1645           int newval = newControlPoints[name] * 2;
1646           if(newval < ub)
1647             newControlPoints[name] = newval;
1648           else 
1649             newControlPoints[name] = ub;
1650         }
1651       
1652       }
1653     }
1654     
1655
1656   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1657     // Assume this is used in this manner:
1658     //  1) go to next phase
1659     //  2) request control point
1660     //  3) load balancing
1661     //  4) computation
1662
1663
1664
1665     instrumentedPhase *prevPhase = previousPhaseData();
1666     
1667     const std::vector<double> &times = prevPhase->times;
1668     const int numTimings = times.size();
1669     
1670     if( numTimings > 4){
1671
1672       const int b1 = 2 + (numTimings-2)/2;
1673       double s1 = 0;
1674       double s2 = 0;
1675     
1676     
1677       for(int i=2; i<b1; i++){
1678         s1 += times[i];
1679       }
1680       for(int i=b1; i<numTimings; i++){
1681         s2 += times[i];
1682       }
1683       
1684     
1685       const double a1 = s1 / (double)(b1-2);
1686       const double a2 = s2 / (double)(numTimings-b1);
1687       const double avg = (a1+a1) / 2.0;
1688
1689       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1690
1691       const double ldbStepsTime = times[0] + times[1];
1692       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1693       
1694       int newval = lround(sqrt(2.0*lbcost/m));
1695       
1696
1697       CkPrintf("Optimal Model: lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1698     
1699     
1700       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1701       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1702         // TODO: lookup only control points that are relevant instead of all of them
1703         const std::string &name = cpsIter->first;
1704         const std::pair<int,int> &bounds = cpsIter->second;
1705         const int lb = bounds.first;
1706         const int ub = bounds.second;
1707         
1708         if(newval < lb){
1709           newControlPoints[name] = lb;
1710         } else if(newval > ub){
1711           newControlPoints[name] = ub;
1712         } else {
1713           newControlPoints[name] = newval;
1714         } 
1715         
1716       }
1717       
1718       
1719     }
1720     
1721  
1722
1723   } else if ( whichTuningScheme == UseSteering ) {
1724           // -----------------------------------------------------------
1725           //  STEERING BASED ON KNOWLEDGE
1726
1727           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1728           // plans are only generated after 3 phases
1729
1730           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1731           instrumentedPhase *prevPhase = previousPhaseData();
1732
1733           if(phase_id%4 == 0){
1734                   CkPrintf("Steering based on 2 phases ago:\n");
1735                   twoAgoPhase->print();
1736                   CkPrintf("\n");
1737                   fflush(stdout);
1738
1739                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1740
1741
1742                   // ========================================= Concurrency =============================================
1743                   // See if idle time is high:
1744                   {
1745                           double idleTime = twoAgoPhase->idleTime.avg;
1746                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1747                           fflush(stdout);
1748                           if(idleTime > 0.10){
1749                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1750                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1751
1752                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1753
1754                                   bool found = false;
1755                                   std::string cpName;
1756                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1757                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1758                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1759                                           cpName = iter->first;
1760                                           info = &iter->second;
1761
1762                                           // Initialize a new plan based on two phases ago
1763                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1764
1765                                           CkPrintf("Steering found knob to turn\n");
1766                                           fflush(stdout);
1767
1768                                           if(info->first == ControlPoint::EFF_INC){
1769                                                   const int maxValue = controlPointSpace[cpName].second;
1770                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1771                                                   if(twoAgoValue+1 <= maxValue){
1772                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1773                                                   }
1774                                           } else {
1775                                                   const int minValue = controlPointSpace[cpName].second;
1776                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1777                                                   if(twoAgoValue-1 >= minValue){
1778                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1779                                                   }
1780                                           }
1781
1782                                           possibleNextStepPlans.push_back(aNewPlan);
1783
1784                                   }
1785                           }
1786                   }
1787
1788                   // ========================================= Grain Size =============================================
1789                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1790                   {
1791                           double overheadTime = twoAgoPhase->overheadTime.avg;
1792                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1793                           fflush(stdout);
1794                           if(overheadTime > 0.10){
1795                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1796                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1797
1798                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1799
1800                                   bool found = false;
1801                                   std::string cpName;
1802                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1803                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1804                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1805                                           cpName = iter->first;
1806                                           info = &iter->second;
1807
1808                                           // Initialize a new plan based on two phases ago
1809                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1810
1811
1812
1813                                           CkPrintf("Steering found knob to turn\n");
1814                                           fflush(stdout);
1815
1816                                           if(info->first == ControlPoint::EFF_INC){
1817                                                   const int maxValue = controlPointSpace[cpName].second;
1818                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1819                                                   if(twoAgoValue+1 <= maxValue){
1820                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1821                                                   }
1822                                           } else {
1823                                                   const int minValue = controlPointSpace[cpName].second;
1824                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1825                                                   if(twoAgoValue-1 >= minValue){
1826                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1827                                                   }
1828                                           }
1829
1830                                           possibleNextStepPlans.push_back(aNewPlan);
1831
1832                                   }
1833
1834                           }
1835                   }
1836                   // ========================================= GPU Offload =============================================
1837                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1838                   {
1839                           double idleTime = twoAgoPhase->idleTime.avg;
1840                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1841                           fflush(stdout);
1842                           if(idleTime > 0.10){
1843                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1844                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1845
1846                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1847
1848                                   bool found = false;
1849                                   std::string cpName;
1850                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1851                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1852                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1853                                           cpName = iter->first;
1854                                           info = &iter->second;
1855
1856                                           // Initialize a new plan based on two phases ago
1857                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1858
1859
1860                                           CkPrintf("Steering found knob to turn\n");
1861                                           fflush(stdout);
1862
1863                                           if(info->first == ControlPoint::EFF_DEC){
1864                                                   const int maxValue = controlPointSpace[cpName].second;
1865                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1866                                                   if(twoAgoValue+1 <= maxValue){
1867                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1868                                                   }
1869                                           } else {
1870                                                   const int minValue = controlPointSpace[cpName].second;
1871                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1872                                                   if(twoAgoValue-1 >= minValue){
1873                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1874                                                   }
1875                                           }
1876
1877                                           possibleNextStepPlans.push_back(aNewPlan);
1878
1879                                   }
1880
1881                           }
1882                   }
1883
1884                   // ========================================= Done =============================================
1885
1886
1887                   if(possibleNextStepPlans.size() > 0){
1888                           newControlPoints = possibleNextStepPlans[0];
1889                   }
1890
1891
1892                   CkPrintf("Steering done for this phase\n");
1893                   fflush(stdout);
1894
1895           }  else {
1896                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1897                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1898                   fflush(stdout);
1899           }
1900
1901
1902
1903   } else if( whichTuningScheme == SimulatedAnnealing ) {
1904
1905     // -----------------------------------------------------------
1906     //  SIMULATED ANNEALING
1907     //  Simulated Annealing style hill climbing method
1908     //
1909     //  Find the best search space configuration, and try something
1910     //  nearby it, with a radius decreasing as phases increase
1911
1912     CkPrintf("Finding best phase\n");
1913     instrumentedPhase *bestPhase = allData.findBest();  
1914     CkPrintf("best found:\n"); 
1915     bestPhase->print(); 
1916     CkPrintf("\n"); 
1917     
1918
1919     const int convergeByPhase = 100;
1920     // Determine from 0.0 to 1.0 how far along we are
1921     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1922
1923     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1924     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1925       const std::string &name = cpsIter->first;
1926       const std::pair<int,int> &bounds = cpsIter->second;
1927       const int minValue = bounds.first;
1928       const int maxValue = bounds.second;
1929       
1930       const int before = bestPhase->controlPoints[name];   
1931   
1932       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1933
1934       int high = min(before+range, maxValue);
1935       int low = max(before-range, minValue);
1936       
1937       newControlPoints[name] = low;
1938       if(high-low > 0){
1939         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1940       } 
1941       
1942     }
1943
1944   } else if ( whichTuningScheme == DivideAndConquer ) {
1945
1946           // -----------------------------------------------------------
1947           //  STEERING FOR Divide & Conquer Programs
1948           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1949           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1950           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1951
1952           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1953           // plans are only generated after 3 phases
1954
1955           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1956           instrumentedPhase *prevPhase = previousPhaseData();
1957
1958           if(phase_id%4 == 0){
1959                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1960                   twoAgoPhase->print();
1961                   CkPrintf("\n");
1962                   fflush(stdout);
1963
1964                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1965
1966
1967                   // ========================================= Concurrency =============================================
1968                   // See if idle time is high:
1969                   {
1970                           double idleTime = twoAgoPhase->idleTime.avg;
1971                           double overheadTime = twoAgoPhase->overheadTime.avg;
1972
1973
1974                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1975                           fflush(stdout);
1976                           if(idleTime+overheadTime > 0.10){
1977                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1978                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1979
1980                                   int direction = -1;
1981                                   if (idleTime>overheadTime){
1982                                           // We need to decrease the grain size, or increase the available concurrency
1983                                           direction = 1;
1984                                   }
1985
1986                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1987
1988                                   bool found = false;
1989                                   std::string cpName;
1990                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1991                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1992                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1993                                           cpName = iter->first;
1994                                           info = &iter->second;
1995                                           
1996                                           // Initialize a new plan based on two phases ago
1997                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1998                                           bool newPlanExists = false;
1999
2000                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2001                                           fflush(stdout);
2002
2003
2004
2005                                           if(info->first == ControlPoint::EFF_INC){
2006                                                   const int minValue = controlPointSpace[cpName].first;
2007                                                   const int maxValue = controlPointSpace[cpName].second;
2008                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2009                                                   const int newVal = twoAgoValue+1*direction;
2010                                                   if(newVal <= maxValue && newVal >= minValue){
2011                                                     aNewPlan[cpName] = newVal;
2012                                                     newPlanExists = true;
2013                                                   } else {
2014                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2015                                                   }
2016                                           } else {
2017                                                   const int minValue = controlPointSpace[cpName].first;
2018                                                   const int maxValue = controlPointSpace[cpName].second;
2019                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2020                                                   const int newVal = twoAgoValue-1*direction;
2021                                                   if(newVal <= maxValue && newVal >= minValue){
2022                                                           aNewPlan[cpName] = newVal;
2023                                                           newPlanExists = true;
2024                                                   } else {
2025                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2026                                                   }
2027                                           }
2028
2029                                           if(newPlanExists){
2030                                             possibleNextStepPlans.push_back(aNewPlan);
2031                                           }
2032                                   }
2033                           }
2034                   }
2035
2036                   if(possibleNextStepPlans.size() > 0){
2037                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2038                     newControlPoints = possibleNextStepPlans[0];
2039                   } else {
2040                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2041                   }
2042           }
2043
2044   } else if( whichTuningScheme == Simplex ) {
2045
2046           // -----------------------------------------------------------
2047           //  Nelder Mead Simplex Algorithm
2048           //
2049           //  A scheme that takes a simplex (n+1 points) and moves it
2050           //  toward the minimum, eventually converging there.
2051
2052           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2053
2054   } else if( whichTuningScheme == ExhaustiveSearch ) {
2055     
2056     // -----------------------------------------------------------
2057     // EXHAUSTIVE SEARCH
2058    
2059     int numDimensions = controlPointSpace.size();
2060     CkAssert(numDimensions > 0);
2061   
2062     vector<int> lowerBounds(numDimensions);
2063     vector<int> upperBounds(numDimensions); 
2064   
2065     int d=0;
2066     std::map<std::string, pair<int,int> >::iterator iter;
2067     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2068       //    CkPrintf("Examining dimension %d\n", d);
2069       lowerBounds[d] = iter->second.first;
2070       upperBounds[d] = iter->second.second;
2071       d++;
2072     }
2073    
2074     // get names for each dimension (control point)
2075     vector<std::string> names(numDimensions);
2076     d=0;
2077     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2078       names[d] = niter->first;
2079       d++;
2080     }
2081   
2082   
2083     // Create the first possible configuration
2084     vector<int> config = lowerBounds;
2085     config.push_back(0);
2086   
2087     // Increment until finding an unused configuration
2088     allData.cleanupNames(); // put -1 values in for any control points missing
2089     std::vector<instrumentedPhase*> &phases = allData.phases;     
2090
2091     while(true){
2092     
2093       std::vector<instrumentedPhase*>::iterator piter; 
2094       bool testedConfiguration = false; 
2095       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2096       
2097         // Test if the configuration matches this phase
2098         bool match = true;
2099         for(int j=0;j<numDimensions;j++){
2100           match &= (*piter)->controlPoints[names[j]] == config[j];
2101         }
2102       
2103         if(match){
2104           testedConfiguration = true; 
2105           break;
2106         } 
2107       } 
2108     
2109       if(testedConfiguration == false){ 
2110         break;
2111       } 
2112     
2113       // go to next configuration
2114       config[0] ++;
2115       // Propagate "carrys"
2116       for(int i=0;i<numDimensions;i++){
2117         if(config[i] > upperBounds[i]){
2118           config[i+1]++;
2119           config[i] = lowerBounds[i];
2120         }
2121       }
2122       // check if we have exhausted all possible values
2123       if(config[numDimensions] > 0){
2124         break;
2125       }
2126        
2127     }
2128   
2129     if(config[numDimensions] > 0){
2130       for(int i=0;i<numDimensions;i++){ 
2131         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2132       }
2133     }
2134
2135     // results are now in config[i]
2136     
2137     for(int i=0; i<numDimensions; i++){
2138       newControlPoints[names[i]] = config[i];
2139       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2140     }
2141
2142
2143   } else {
2144     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2145   }
2146
2147
2148   const double endGenerateTime = CmiWallTimer();
2149   
2150   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2151   
2152 }
2153
2154
2155
2156
2157 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2158
2159
2160 /// Get control point value from range of integers [lb,ub]
2161 int controlPoint(const char *name, int lb, int ub){
2162   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2163   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2164   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2165   int result;
2166
2167   // if we already have control point values for phase, return them
2168   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2169     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2170     return thisPhaseData->controlPoints[std::string(name)];
2171   }
2172   
2173
2174   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2175     // For the first few phases, just use the lower bound, or the default if one was provided 
2176     // This ensures that the ranges for all the control points are known before we do anything fancy
2177     result = lb;
2178
2179     if(defaultControlPointValues.count(std::string(name)) > 0){
2180       int v = defaultControlPointValues[std::string(name)];
2181       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2182       result = v;
2183     }
2184
2185   } else if(controlPointSpace.count(std::string(name)) == 0){
2186     // if this is the first time we've seen the CP, then return the lower bound
2187     result = lb;
2188     
2189   }  else {
2190     // Generate a plan one time for each phase
2191     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2192     
2193     // Use the planned value:
2194     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2195     
2196   }
2197
2198   if(!isInRange(result,ub,lb)){
2199     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2200     fflush(stdout);
2201     fflush(stderr);
2202   }
2203   CkAssert(isInRange(result,ub,lb));
2204   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2205
2206   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2207
2208   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2209   //  thisPhaseData->print();
2210   
2211   return result;
2212 }
2213
2214
2215 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2216   CkAssert(sizeof(lb) == 4);
2217   CkAssert(CkMyPe() == 0);
2218   return controlPoint("FortranCP", *lb, *ub);
2219 }
2220
2221
2222
2223
2224 /// Inform the control point framework that a named control point affects the priorities of some array  
2225 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2226 //   CkGroupID aid = arraybase.ckGetArrayID();
2227 //   int groupIdx = aid.idx;
2228 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2229 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2230 // }
2231
2232
2233 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2234 // void controlPointPriorityEntry(const char *name, int idx){
2235 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2236 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2237 // }
2238
2239
2240
2241
2242
2243 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2244
2245     This function decomposes the algorithm into a state machine that allows it to
2246     evaluate one or more configurations through subsequent clls to this function.
2247     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2248
2249     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2250     so a list of them will be created and they will be evaluated, one per call.
2251
2252     Currently there is no stopping criteria, but the simplex ought to contract down
2253     to a few close configurations, and hence not much change will happen after this 
2254     point.
2255
2256  */
2257 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2258
2259         if(useBestKnown){
2260                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2261                 return;
2262         }
2263
2264
2265         if(firstSimplexPhase< 0){
2266                 firstSimplexPhase = allData.phases.size()-1;
2267                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2268         }
2269
2270         int n = controlPointSpace.size();
2271
2272         CkAssert(n>=2);
2273
2274
2275         if(simplexState == beginning){
2276                 // First we evaluate n+1 random points, then we go to a different state
2277                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2278                         CkPrintf("Simplex Tuning: chose random configuration\n");
2279
2280                         // Choose random values from the middle third of the range in each dimension
2281                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2282                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2283                                 const std::string &name = cpsIter->first;
2284                                 const std::pair<int,int> &bounds = cpsIter->second;
2285                                 const int lb = bounds.first;
2286                                 const int ub = bounds.second;
2287                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2288                         }
2289                 } else {
2290                         // Set initial simplex:
2291                         for(int i=0; i<n+1; i++){
2292                                 simplexIndices.insert(firstSimplexPhase+i);
2293                         }
2294                         CkAssert(simplexIndices.size() == n+1);
2295
2296                         // Transition to reflecting state
2297                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2298
2299                 }
2300
2301         } else if (simplexState == reflecting){
2302                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2303                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2304
2305                 // Find the highest time from other points in the simplex
2306                 double highestTimeForOthersInSimplex = 0.0;
2307                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2308                         double t = allData.phases[*iter]->medianTime();
2309                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2310                                 highestTimeForOthersInSimplex = t;
2311                         }
2312                 }
2313
2314                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2315
2316                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2317                         // if y* < yl,  transition to "expanding"
2318                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2319
2320                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2321                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2322                         CkAssert(simplexIndices.size() == n+1);
2323                         simplexIndices.erase(worstPhase);
2324                         CkAssert(simplexIndices.size() == n);
2325                         simplexIndices.insert(pPhase);
2326                         CkAssert(simplexIndices.size() == n+1);
2327                         // Transition to reflecting state
2328                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2329
2330                 } else {
2331                         // if y* > yh
2332                         if(recentPhaseTime <= worstTime){
2333                                 // replace Ph with P*
2334                                 CkAssert(simplexIndices.size() == n+1);
2335                                 simplexIndices.erase(worstPhase);
2336                                 CkAssert(simplexIndices.size() == n);
2337                                 simplexIndices.insert(pPhase);
2338                                 CkAssert(simplexIndices.size() == n+1);
2339                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2340                                 worstPhase = pPhase;
2341                                 // Just as a sanity check, make sure we don't use the non-updated values.
2342                                 worst.clear();
2343                         }
2344
2345                         // Now, form P** and do contracting phase
2346                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2347
2348                 }
2349
2350         } else if (simplexState == doneExpanding){
2351                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2352                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2353                 // A new configuration has been evaluated
2354
2355                 // Check to see if y** < y1
2356                 if(recentPhaseTime < bestTime){
2357                         // replace Ph by P**
2358                         CkAssert(simplexIndices.size() == n+1);
2359                         simplexIndices.erase(worstPhase);
2360                         CkAssert(simplexIndices.size() == n);
2361                         simplexIndices.insert(p2Phase);
2362                         CkAssert(simplexIndices.size() == n+1);
2363                 } else {
2364                         //      replace Ph by P*
2365                         CkAssert(simplexIndices.size() == n+1);
2366                         simplexIndices.erase(worstPhase);
2367                         CkAssert(simplexIndices.size() == n);
2368                         simplexIndices.insert(pPhase);
2369                         CkAssert(simplexIndices.size() == n+1);
2370                 }
2371
2372                 // Transition to reflecting state
2373                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2374
2375         }  else if (simplexState == contracting){
2376                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2377                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2378                 // A new configuration has been evaluated
2379
2380                 // Check to see if y** > yh
2381                 if(recentPhaseTime <= worstTime){
2382                         // replace Ph by P**
2383                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2384                         CkAssert(simplexIndices.size() == n+1);
2385                         simplexIndices.erase(worstPhase);
2386                         CkAssert(simplexIndices.size() == n);
2387                         simplexIndices.insert(p2Phase);
2388                         CkAssert(simplexIndices.size() == n+1);
2389                         // Transition to reflecting state
2390                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2391
2392                 } else {
2393                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2394                         simplexState = stillContracting;
2395
2396                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2397                         stillMustContractList = simplexIndices;
2398
2399                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2400                 }
2401
2402         } else if (simplexState == stillContracting){
2403                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2404
2405                 if(stillMustContractList.size()>0){
2406                         int c = *stillMustContractList.begin();
2407                         stillMustContractList.erase(c);
2408                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2409
2410                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2411
2412                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2413                         int v = 0;
2414                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2415                                 const std::string &name = cpsIter->first;
2416                                 const std::pair<int,int> &bounds = cpsIter->second;
2417                                 const int lb = bounds.first;
2418                                 const int ub = bounds.second;
2419
2420                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2421
2422                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2423                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2424                                 v++;
2425                         }
2426                 } else {
2427                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2428                         CkAssert(stillMustContractList.size() == 0);
2429                         simplexIndices.clear();
2430                         CkAssert(simplexIndices.size()==0);
2431                         for(int i=0; i<n+1; i++){
2432                                 simplexIndices.insert(allData.phases.size()-2-i);
2433                         }
2434                         CkAssert(simplexIndices.size()==n+1);
2435
2436                         // Transition to reflecting state
2437                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2438
2439                 }
2440
2441
2442         } else if (simplexState == expanding){
2443                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2444                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2445                 // A new configuration has been evaluated
2446
2447                 // determine if y** < yl
2448                 if(recentPhaseTime < bestTime){
2449                         // replace Ph by P**
2450                         CkAssert(simplexIndices.size() == n+1);
2451                         simplexIndices.erase(worstPhase);
2452                         CkAssert(simplexIndices.size() == n);
2453                         simplexIndices.insert(p2Phase);
2454                         CkAssert(simplexIndices.size() == n+1);
2455                         // Transition to reflecting state
2456                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2457
2458                 } else {
2459                         // else, replace ph with p*
2460                         CkAssert(simplexIndices.size() == n+1);
2461                         simplexIndices.erase(worstPhase);
2462                         CkAssert(simplexIndices.size() == n);
2463                         simplexIndices.insert(pPhase);
2464                         CkAssert(simplexIndices.size() == n+1);
2465                         // Transition to reflecting state
2466                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2467                 }
2468
2469
2470         } else {
2471                 CkAbort("Unknown simplexState");
2472         }
2473
2474 }
2475
2476
2477
2478 /** Replace the worst point with its reflection across the centroid. */
2479 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2480
2481         int n = controlPointSpace.size();
2482
2483         printSimplex(allData);
2484
2485         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2486
2487
2488         // Quit if the diameter of our simplex is small
2489         double maxr = 0.0;
2490         for(int i=0; i<n+1; i++){
2491                 //              Compute r^2 of this simplex point from the centroid
2492                 double r2 = 0.0;
2493                 std::vector<double> p = pointCoords(allData, i);
2494                 for(int d=0; d<p.size(); d++){
2495                         double r1 = (p[d] * centroid[d]);
2496                         r2 += r1*r1;
2497                 }
2498                 if(r2 > maxr)
2499                         maxr = r2;
2500         }
2501
2502 #if 0
2503         // At some point we quit this tuning
2504         if(maxr < 10){
2505                 useBestKnown = true;
2506                 instrumentedPhase *best = allData.findBest();
2507                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2508
2509                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2510                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2511                         const std::string &name = cpsIter->first;
2512                         newControlPoints[name] =  best->controlPoints[name];
2513                 }
2514         }
2515 #endif
2516
2517         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2518
2519         pPhase = allData.phases.size()-1;
2520         P.resize(n);
2521         for(int i=0; i<n; i++){
2522                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2523         }
2524
2525         for(int i=0; i<P.size(); i++){
2526                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2527         }
2528
2529
2530         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2531         int v = 0;
2532         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2533                 const std::string &name = cpsIter->first;
2534                 const std::pair<int,int> &bounds = cpsIter->second;
2535                 const int lb = bounds.first;
2536                 const int ub = bounds.second;
2537                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2538                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2539                 v++;
2540         }
2541
2542
2543         // Transition to "reflecting" state
2544         simplexState = reflecting;
2545         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2546
2547 }
2548
2549
2550
2551
2552 /** Replace the newly tested reflection with a further expanded version of itself. */
2553 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2554         int n = controlPointSpace.size();
2555         printSimplex(allData);
2556
2557         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2558         // I believe the equation for P** in the text on page 308 is correct.
2559
2560         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2561
2562
2563         p2Phase = allData.phases.size()-1;
2564         P2.resize(n);
2565         for(int i=0; i<n; i++){
2566                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2567         }
2568
2569         for(int i=0; i<P2.size(); i++){
2570                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2571         }
2572
2573
2574         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2575         int v = 0;
2576         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2577                 const std::string &name = cpsIter->first;
2578                 const std::pair<int,int> &bounds = cpsIter->second;
2579                 const int lb = bounds.first;
2580                 const int ub = bounds.second;
2581                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2582                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2583                 v++;
2584         }
2585
2586
2587         // Transition to "doneExpanding" state
2588         simplexState = doneExpanding;
2589         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2590
2591 }
2592
2593
2594
2595
2596 /** Replace the newly tested reflection with a further expanded version of itself. */
2597 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2598         int n = controlPointSpace.size();
2599         printSimplex(allData);
2600
2601         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2602
2603
2604         p2Phase = allData.phases.size()-1;
2605         P2.resize(n);
2606         for(int i=0; i<n; i++){
2607                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2608         }
2609
2610         for(int i=0; i<P2.size(); i++){
2611                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2612         }
2613
2614
2615         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2616         int v = 0;
2617         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2618                 const std::string &name = cpsIter->first;
2619                 const std::pair<int,int> &bounds = cpsIter->second;
2620                 const int lb = bounds.first;
2621                 const int ub = bounds.second;
2622                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2623                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2624                 v++;
2625         }
2626         
2627         
2628         // Transition to "contracting" state
2629         simplexState = contracting;
2630         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2631
2632 }
2633
2634
2635
2636
2637 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2638         int n = controlPointSpace.size();
2639
2640         // Find worst performing point in the simplex
2641         worstPhase = -1;
2642         worstTime = -1.0;
2643         bestPhase = 10000000;
2644         bestTime = 10000000;
2645         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2646                 double t = allData.phases[*iter]->medianTime();
2647                 if(t > worstTime){
2648                         worstTime = t;
2649                         worstPhase = *iter;
2650                 }
2651                 if(t < bestTime){
2652                         bestTime = t;
2653                         bestPhase = *iter;
2654                 }
2655         }
2656         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2657
2658         best = pointCoords(allData, bestPhase);
2659         CkAssert(best.size() == n);
2660
2661         worst = pointCoords(allData, worstPhase);
2662         CkAssert(worst.size() == n);
2663
2664         // Calculate centroid of the remaining points in the simplex
2665         centroid.resize(n);
2666         for(int i=0; i<n; i++){
2667                 centroid[i] = 0.0;
2668         }
2669
2670         int numPts = 0;
2671
2672         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2673                 if(*iter != worstPhase){
2674                         numPts ++;
2675                         // Accumulate into the result vector
2676                         int c = 0;
2677                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2678                                 centroid[c] += citer->second;
2679                                 c++;
2680                         }
2681
2682                 }
2683         }
2684
2685         // Now divide the sums by the number of points.
2686         for(int v = 0; v<centroid.size(); v++) {
2687                 centroid[v] /= (double)numPts;
2688         }
2689
2690         CkAssert(centroid.size() == n);
2691
2692         for(int i=0; i<centroid.size(); i++){
2693                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2694         }
2695
2696
2697 }
2698
2699
2700
2701 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2702         std::vector<double> result;
2703         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2704                 result.push_back((double)citer->second);
2705         }
2706         return result;
2707 }
2708
2709
2710
2711
2712 void ControlPointWriteOutputToDisk(){
2713         CkAssert(CkMyPe() == 0);
2714         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2715 }
2716
2717
2718
2719 /*! @} */
2720
2721 #ifdef CP_DISABLE_TRACING
2722 #include "ControlPointsNoTrace.def.h"
2723 #else
2724 #include "ControlPoints.def.h"
2725 #endif