Charj: Merged changes.
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15 #include <math.h>
16 #include <climits>
17
18 #define roundDouble(x)        ((long)(x+0.5))
19 #define myAbs(x)   (((x)>=0.0)?(x):(-1.0*(x)))
20 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
21
22 inline double closestInRange(double v, double a, double b){
23   return (v<a) ? a : ((v>b)?b:v);
24 }
25
26
27 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
28  
29
30 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
31 /**  @{ */
32
33 using namespace std;
34
35 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
36
37
38 //#undef DEBUGPRINT
39 //#define DEBUGPRINT 4
40
41 static void periodicProcessControlPoints(void* ptr, double currWallTime);
42
43
44 // A pointer to this PE's controlpoint manager Proxy
45 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
46 /* readonly */ int random_seed;
47 /* readonly */ long controlPointSamplePeriod;
48 /* readonly */ int whichTuningScheme;
49 /* readonly */ bool writeDataFileAtShutdown;
50 /* readonly */ bool shouldFilterOutputData;
51 /* readonly */ bool loadDataFileAtStartup;
52 /* readonly */ bool shouldGatherMemoryUsage;
53 /* readonly */ bool shouldGatherUtilization;
54 /* readonly */ bool shouldGatherAll;
55 /* readonly */ char CPDataFilename[512];
56
57
58
59 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
60 /// These probably come from the command line arguments, so are available only on PE 0
61 std::map<std::string, int> defaultControlPointValues;
62
63
64
65 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
66
67
68
69 void printTuningScheme(){
70   switch(whichTuningScheme){
71   case RandomSelection:
72     CkPrintf("Tuning Scheme: RandomSelection\n");
73     break;
74   case AlwaysDefaults:
75     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
76     break;
77   case SimulatedAnnealing:
78     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
79     break;
80   case ExhaustiveSearch:
81     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
82     break;
83   case CriticalPathAutoPrioritization:
84     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
85     break;
86   case UseBestKnownTiming:
87     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
88     break;
89   case UseSteering:
90     CkPrintf("Tuning Scheme: UseSteering\n");
91     break;
92   case MemoryAware:
93     CkPrintf("Tuning Scheme: MemoryAware\n");
94     break;
95   case Simplex:
96     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
97     break;
98   case DivideAndConquer:
99     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
100     break;
101   case LDBPeriod:
102     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
103     break;
104   case LDBPeriodLinear:
105     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
106     break;
107   case LDBPeriodQuadratic:
108     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
109     break;
110   case LDBPeriodOptimal:
111     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
112     break;
113   default:
114     CkPrintf("Unknown tuning scheme\n");
115     break;
116   }
117   fflush(stdout);
118 }
119
120
121
122 /// A reduction type that combines idle time measurements (min/sum/max etc.)
123 CkReduction::reducerType idleTimeReductionType;
124 /// A reducer that combines idle time measurements (min/sum/max etc.)
125 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
126   double ret[3];
127   if(nMsg > 0){
128     CkAssert(msgs[0]->getSize()==3*sizeof(double));
129     double *m=(double *)msgs[0]->getData();
130     ret[0]=m[0];
131     ret[1]=m[1];
132     ret[2]=m[2];
133   }
134   for (int i=1;i<nMsg;i++) {
135     CkAssert(msgs[i]->getSize()==3*sizeof(double));
136     double *m=(double *)msgs[i]->getData();
137     ret[0]=min(ret[0],m[0]);
138     ret[1]+=m[1];
139     ret[2]=max(ret[2],m[2]);
140   }  
141   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
142 }
143
144
145
146 /// A reduction type that combines idle, overhead, and memory measurements
147 CkReduction::reducerType allMeasuresReductionType;
148 /// A reducer that combines idle, overhead, and memory measurements
149 #define ALL_REDUCTION_SIZE 12
150 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
151   double ret[ALL_REDUCTION_SIZE];
152   if(nMsg > 0){
153     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
154     double *m=(double *)msgs[0]->getData();
155     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
156   }
157   for (int i=1;i<nMsg;i++) {
158     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
159     double *m=(double *)msgs[i]->getData();
160     // idle time (min/sum/max)
161     ret[0]=min(ret[0],m[0]);
162     ret[1]+=m[1];
163     ret[2]=max(ret[2],m[2]);
164     // overhead time (min/sum/max)
165     ret[3]=min(ret[3],m[3]);
166     ret[4]+=m[4];
167     ret[5]=max(ret[5],m[5]);
168     // mem usage (max)
169     ret[6]=max(ret[6],m[6]);
170     // bytes per invocation for two types of entry methods
171     ret[7]+=m[7];
172     ret[8]+=m[8];
173     ret[9]+=m[9];
174     ret[10]+=m[10];
175     // Grain size (avg)
176     ret[11]+=m[11];
177   }  
178   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
179 }
180
181
182 /// Registers the control point framework's reduction handlers at startup on each PE
183 /*initproc*/ void registerCPReductions(void) {
184   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
185   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
186 }
187
188
189
190
191
192
193 /// Return an integer between 0 and num-1 inclusive
194 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
195 unsigned int randInt(unsigned int num, const char* name, int seed=0){
196   CkAssert(num > 0);
197
198   unsigned long hash = 0;
199   unsigned int c;
200   unsigned char * str = (unsigned char*)name;
201
202   while (c = *str++){
203     unsigned int c2 = (c+64)%128;
204     unsigned int c3 = (c2*5953)%127;
205     hash = c3 + (hash << 6) + (hash << 16) - hash;
206   }
207
208   unsigned long shuffled1 = (hash*2083)%7907;
209   unsigned long shuffled2 = (seed*4297)%2017;
210
211   unsigned long shuffled3 = (random_seed*4799)%7919;
212
213   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
214
215   unsigned int result = ((namehash * 6029) % 1117) % num;
216
217   CkAssert(result >=0 && result < num);
218   return result;
219 }
220
221
222
223 controlPointManager::controlPointManager() {
224   generatedPlanForStep = -1;
225
226     exitWhenReady = false;
227     alreadyRequestedMemoryUsage = false;   
228     alreadyRequestedIdleTime = false;
229     alreadyRequestedAll = false;
230     
231     instrumentedPhase * newPhase = new instrumentedPhase();
232     allData.phases.push_back(newPhase);   
233     
234     frameworkShouldAdvancePhase = false;
235     haveControlPointChangeCallback = false;
236 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
237     
238     phase_id = 0;
239
240     if(loadDataFileAtStartup){    
241       loadDataFile();
242     }
243
244     
245     if(CkMyPe() == 0){
246       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
247     }
248
249     traceRegisterUserEvent("No currently executing message", 5000);
250     traceRegisterUserEvent("Zero time along critical path", 5010);
251     traceRegisterUserEvent("Positive total time along critical path", 5020);
252     traceRegisterUserEvent("env->setPathHistory()", 6000);
253     traceRegisterUserEvent("Critical Path", 5900);
254     traceRegisterUserEvent("Table Entry", 5901);
255
256
257 #if USER_EVENT_FOR_REGISTERTERMINALPATH
258     traceRegisterUserEvent("registerTerminalPath", 100); 
259 #endif
260
261   }
262   
263
264  controlPointManager::~controlPointManager(){
265 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
266   }
267
268
269   /// Loads the previous run data file
270   void controlPointManager::loadDataFile(){
271     ifstream infile(CPDataFilename);
272     vector<std::string> names;
273     std::string line;
274   
275     while(getline(infile,line)){
276       if(line[0] != '#')
277         break;
278     }
279   
280     int numTimings = 0;
281     std::istringstream n(line);
282     n >> numTimings;
283   
284     while(getline(infile,line)){ 
285       if(line[0] != '#') 
286         break; 
287     }
288
289     int numControlPointNames = 0;
290     std::istringstream n2(line);
291     n2 >> numControlPointNames;
292   
293     for(int i=0; i<numControlPointNames; i++){
294       getline(infile,line);
295       names.push_back(line);
296     }
297
298     for(int i=0;i<numTimings;i++){
299       getline(infile,line);
300       while(line[0] == '#')
301         getline(infile,line); 
302
303       instrumentedPhase * ips = new instrumentedPhase();
304
305       std::istringstream iss(line);
306
307       // Read memory usage for phase
308       iss >> ips->memoryUsageMB;
309       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
310
311       // Read idle time
312       iss >> ips->idleTime.min;
313       iss >> ips->idleTime.avg;
314       iss >> ips->idleTime.max;
315
316       // Read overhead time
317       iss >> ips->overheadTime.min;
318       iss >> ips->overheadTime.avg;
319       iss >> ips->overheadTime.max;
320
321       // read bytePerInvoke
322       iss >> ips->bytesPerInvoke;
323
324       // read grain size
325       iss >> ips->grainSize;
326
327       // Read control point values
328       for(int cp=0;cp<numControlPointNames;cp++){
329         int cpvalue;
330         iss >> cpvalue;
331         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
332       }
333
334       // ignore median time
335       double mt;
336       iss >> mt;
337
338       // Read all times
339
340       double time;
341
342       while(iss >> time){
343         ips->times.push_back(time);
344 #if DEBUGPRINT > 5
345         CkPrintf("read time %lf from file\n", time);
346 #endif
347       }
348
349       allData.phases.push_back(ips);
350
351     }
352
353     infile.close();
354   }
355
356
357
358   /// Add the current data to allData and output it to a file
359   void controlPointManager::writeDataFile(){
360     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
361     ofstream outfile(CPDataFilename);
362     allData.cleanupNames();
363
364 //    string s = allData.toString();
365 //    CkPrintf("At end: \n %s\n", s.c_str());
366
367     if(shouldFilterOutputData){
368       allData.verify();
369       allData.filterOutIncompletePhases();
370     }
371
372 //    string s2 = allData.toString();
373 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
374     if(allData.toString().length() > 10){
375       outfile << allData.toString();
376     } else {
377       outfile << " No data available to save to disk " << endl;
378     }
379     outfile.close();
380   }
381
382   /// User can register a callback that is called when application should advance to next phase
383   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
384     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
385     controlPointChangeCallback = cb;
386     haveControlPointChangeCallback = true;
387   }
388
389
390 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
391 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
392   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
393 }
394
395   /// Called periodically by the runtime to handle the control points
396   /// Currently called on each PE
397   void controlPointManager::processControlPoints(){
398
399     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
400
401
402     //==========================================================================================
403     // Print the data for each phase
404
405     const int s = allData.phases.size();
406
407 #if DEBUGPRINT
408     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
409     for(int p=0;p<s;++p){
410       const instrumentedPhase &phase = allData.phases[p];
411       const idleTimeContainer &idle = phase.idleTime;
412       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
413       vector<double> const &times = phase.times;
414
415       CkPrintf("Phase %d:\n", p);
416       idle.print();
417      //   CkPrintf("critical paths: (* affected by control point)\n");
418         //   for(int i=0;i<criticalPaths.size(); i++){
419         // If affected by a control point
420         //      criticalPaths[i].print();
421
422       //        criticalPaths[i].print();
423       //        CkPrintf("\n");
424         
425
426       //   }
427       CkPrintf("Timings:\n");
428       for(int i=0;i<times.size(); i++){
429         CkPrintf("%lf ", times[i]);
430       }
431       CkPrintf("\n");
432
433     }
434     
435     CkPrintf("\n\n");
436
437
438 #endif
439
440
441
442     //==========================================================================================
443     // If this is a phase during which we try to adapt control point values based on critical path
444 #if 0
445     if( s%5 == 4) {
446
447       // Find the most recent phase with valid critical path data and idle time measurements      
448       int whichPhase=-1;
449       for(int p=0; p<s; ++p){
450         const instrumentedPhase &phase = allData.phases[p];
451         const idleTimeContainer &idle = phase.idleTime;
452         if(idle.isValid() && phase.criticalPaths.size()>0 ){
453           whichPhase = p;
454         }
455       }
456       
457       
458       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
459       const instrumentedPhase &phase = allData.phases[whichPhase];
460       const idleTimeContainer &idle = phase.idleTime;
461       
462       if(idle.min > 0.1){
463         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
464         
465         // Determine the median critical path for this phase
466         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
467         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
468         CkAssert(phase.criticalPaths.size() > 0);
469         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
470         
471         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
472         
473         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
474           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
475
476
477           CkPrintf("Finding control points related to the critical path\n");
478           int cpcount = 0;
479           std::set<std::string> controlPointsAffectingCriticalPath;
480
481           
482           for(int e=0;e<path.getNumUsed();e++){
483             if(path.getUsedCount(e)>0){
484               int ep = path.getUsedEp(e);
485
486               std::map<std::string, std::set<int> >::iterator iter;
487               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
488                 if(iter->second.count(ep)>0){
489                   controlPointsAffectingCriticalPath.insert(iter->first);
490                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
491                   cpcount++;
492                 }
493               }
494               
495             }
496           }
497           
498
499           if(cpcount==0){
500             CkPrintf("No control points are known to affect the critical path\n");
501           } else {
502             double beginTime = CmiWallTimer();
503
504             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
505             
506             newControlPoints = phase.controlPoints;
507             
508             if(frameworkShouldAdvancePhase){
509               gotoNextPhase();  
510             }
511             
512             if(haveControlPointChangeCallback){ 
513 #if DEBUGPRINT
514               CkPrintf("Calling control point change callback\n");
515 #endif
516               // Create a high priority message and send it to the callback
517               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
518               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
519               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
520               controlPointChangeCallback.send(msg);
521               
522             }
523             
524             
525             // adjust the control points that can affect the critical path
526
527             char textDescription[4096*2];
528             textDescription[0] = '\0';
529
530             std::map<std::string,int>::iterator newCP;
531             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
532               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
533                 // decrease the value (increase priority) if within range
534                 int lowerbound = controlPointSpace[newCP->first].first;
535                 if(newCP->second > lowerbound){
536                   newControlPointsAvailable = true;
537                   newCP->second --;
538                 }
539               }
540             }
541            
542             // Create a string for a projections user event
543             if(1){
544               std::map<std::string,int>::iterator newCP;
545               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
546                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
547               }
548             }
549
550             char *userEventString = new char[4096*2];
551             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
552             
553             static int userEventCounter = 20000;
554             CkPrintf("USER EVENT: %s\n", userEventString);
555             
556             traceRegisterUserEvent(userEventString, userEventCounter); 
557             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
558             userEventCounter++;
559             
560           }
561           
562         } else {
563           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
564         }
565         
566         
567       }
568       
569     } else {
570       
571       
572     }
573    
574 #endif
575
576
577
578     if(frameworkShouldAdvancePhase){
579       gotoNextPhase();  
580     }
581     
582     if(haveControlPointChangeCallback){ 
583       // Create a high priority message and send it to the callback
584       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
585       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
586       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
587       controlPointChangeCallback.send(msg);
588     }
589     
590   }
591   
592   /// Determine if any control point is known to affect an entry method
593   bool controlPointManager::controlPointAffectsThisEP(int ep){
594     std::map<std::string, std::set<int> >::iterator iter;
595     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
596       if(iter->second.count(ep)>0){
597         return true;
598       }
599     }
600     return false;    
601   }
602   
603   /// Determine if any control point is known to affect a chare array  
604   bool controlPointManager::controlPointAffectsThisArray(int array){
605     std::map<std::string, std::set<int> >::iterator iter;
606     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
607       if(iter->second.count(array)>0){
608         return true;
609       }
610     }
611     return false;   
612   }
613   
614
615   /// The data from the current phase
616   instrumentedPhase * controlPointManager::currentPhaseData(){
617     int s = allData.phases.size();
618     CkAssert(s>=1);
619     return allData.phases[s-1];
620   }
621  
622
623   /// The data from the previous phase
624   instrumentedPhase * controlPointManager::previousPhaseData(){
625     int s = allData.phases.size();
626     if(s >= 2 && phase_id > 0) {
627       return allData.phases[s-2];
628     } else {
629       return NULL;
630     }
631   }
632  
633   /// The data from two phases back
634   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
635     int s = allData.phases.size();
636     if(s >= 3 && phase_id > 0) {
637       return allData.phases[s-3];
638     } else {
639       return NULL;
640     }
641   }
642   
643
644   /// Called by either the application or the Control Point Framework to advance to the next phase  
645   void controlPointManager::gotoNextPhase(){
646     
647 #ifndef CP_DISABLE_TRACING
648     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
649     fflush(stdout);
650
651     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
652       alreadyRequestedAll = true;
653       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
654       CkPrintf("Requesting all measurements\n");
655       thisProxy.requestAll(*cb);
656       delete cb;
657     
658     } else {
659       
660       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
661         alreadyRequestedMemoryUsage = true;
662         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
663         thisProxy.requestMemoryUsage(*cb);
664         delete cb;
665       }
666       
667       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
668         alreadyRequestedIdleTime = true;
669         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
670         thisProxy.requestIdleTime(*cb);
671         delete cb;
672       }
673     }
674     
675
676 #endif
677
678
679
680 #if CMK_LBDB_ON && 0
681     LBDatabase * myLBdatabase = LBDatabaseObj();
682     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
683     const CkVec<LBObj*> objs = myLBDB->getObjs();
684     const int objCount = myLBDB->getObjCount();
685     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
686     
687     floatType maxObjWallTime = -1.0;
688     
689     for(int i=0;i<objs.length();i++){
690       LBObj* o = objs[i];
691       const LDObjData d = o->ObjData();
692       floatType cpuTime = d.cpuTime;
693       floatType wallTime = d.wallTime;
694       // can also get object handles from the LDObjData struct
695       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
696       if(wallTime > maxObjWallTime){
697
698       }
699       
700     }
701
702     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
703     
704 #endif    
705
706
707     
708     // increment phase id
709     phase_id++;
710     
711
712     // Create new entry for the phase we are starting now
713     instrumentedPhase * newPhase = new instrumentedPhase();
714     allData.phases.push_back(newPhase);
715     
716     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
717
718   }
719
720   /// An application uses this to register an instrumented timing for this phase
721   void controlPointManager::setTiming(double time){
722     currentPhaseData()->times.push_back(time);
723
724 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
725        
726     // First we should register this currently executing message as a path, because it is likely an important one to consider.
727     //    registerTerminalEntryMethod();
728     
729     // save the critical path for this phase
730     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
731     //    maxTerminalPathHistory.reset();
732     
733     
734     // Reset the counts for the currently executing message
735     //resetThisEntryPath();
736         
737 #endif
738     
739   }
740   
741   /// Entry method called on all PEs to request CPU utilization statistics
742   void controlPointManager::requestIdleTime(CkCallback cb){
743 #ifndef CP_DISABLE_TRACING
744     double i = localControlPointTracingInstance()->idleRatio();
745     double idle[3];
746     idle[0] = i;
747     idle[1] = i;
748     idle[2] = i;
749     
750     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
751     
752     localControlPointTracingInstance()->resetTimings();
753
754     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
755 #else
756     CkAbort("Should not get here\n");
757 #endif
758   }
759   
760   /// All processors reduce their memory usages in requestIdleTime() to this method
761   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
762 #ifndef CP_DISABLE_TRACING
763     int size=msg->getSize() / sizeof(double);
764     CkAssert(size==3);
765     double *r=(double *) msg->getData();
766         
767     instrumentedPhase* prevPhase = previousPhaseData();
768     if(prevPhase != NULL){
769       prevPhase->idleTime.min = r[0];
770       prevPhase->idleTime.avg = r[1]/CkNumPes();
771       prevPhase->idleTime.max = r[2];
772       prevPhase->idleTime.print();
773       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
774     } else {
775       CkPrintf("There is no previous phase to store the idle time measurements\n");
776     }
777     
778     alreadyRequestedIdleTime = false;
779     checkForShutdown();
780     delete msg;
781 #else
782     CkAbort("Should not get here\n");
783 #endif
784   }
785
786
787
788
789
790
791   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
792   void controlPointManager::requestAll(CkCallback cb){
793 #ifndef CP_DISABLE_TRACING
794     TraceControlPoints *t = localControlPointTracingInstance();
795
796     double data[ALL_REDUCTION_SIZE];
797
798     double *idle = data;
799     double *over = data+3;
800     double *mem = data+6;
801     double *msgBytes = data+7;  
802     double *grainsize = data+11;  
803
804     const double i = t->idleRatio();
805     idle[0] = i;
806     idle[1] = i;
807     idle[2] = i;
808
809     const double o = t->overheadRatio();
810     over[0] = o;
811     over[1] = o;
812     over[2] = o;
813
814     const double m = t->memoryUsageMB();
815     mem[0] = m;
816
817     msgBytes[0] = t->b2;
818     msgBytes[1] = t->b3;
819     msgBytes[2] = t->b2mlen;
820     msgBytes[3] = t->b3mlen;
821
822     grainsize[0] = t->grainSize();
823     
824     localControlPointTracingInstance()->resetAll();
825
826     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
827 #else
828     CkAbort("Should not get here\n");
829 #endif
830   }
831   
832   /// All processors reduce their memory usages in requestIdleTime() to this method
833   void controlPointManager::gatherAll(CkReductionMsg *msg){
834 #ifndef CP_DISABLE_TRACING
835     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
836     int size=msg->getSize() / sizeof(double);
837     double *data=(double *) msg->getData();
838         
839     double *idle = data;
840     double *over = data+3;
841     double *mem = data+6;
842     double *msgBytes = data+7;
843     double *granularity = data+11;
844
845
846     //    std::string b = allData.toString();
847
848     instrumentedPhase* prevPhase = previousPhaseData();
849     if(prevPhase != NULL){
850       prevPhase->idleTime.min = idle[0];
851       prevPhase->idleTime.avg = idle[1]/CkNumPes();
852       prevPhase->idleTime.max = idle[2];
853       
854       prevPhase->overheadTime.min = over[0];
855       prevPhase->overheadTime.avg = over[1]/CkNumPes();
856       prevPhase->overheadTime.max = over[2];
857       
858       prevPhase->memoryUsageMB = mem[0];
859       
860       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
861
862       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
863       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
864
865       /** The average of the grain sizes on all PEs in us */
866       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
867
868       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
869       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
870
871       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
872       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
873
874       if(bytesPerInvoke2 > bytesPerInvoke3)
875         prevPhase->bytesPerInvoke = bytesPerInvoke2;
876       else
877         prevPhase->bytesPerInvoke = bytesPerInvoke3;
878
879       //      prevPhase->print();
880       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
881
882       //      std::string a = allData.toString();
883
884       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
885       
886     } else {
887       CkPrintf("There is no previous phase to store measurements\n");
888     }
889     
890     alreadyRequestedAll = false;
891     checkForShutdown();
892     delete msg;
893 #else
894     CkAbort("Should not get here\n");
895 #endif
896   }
897
898
899
900
901   void controlPointManager::checkForShutdown(){
902     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
903       doExitNow();
904     }
905   }
906
907
908   void controlPointManager::exitIfReady(){
909      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
910        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
911        doExitNow();
912      } else {
913        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
914        exitWhenReady = true;
915      }
916   }
917
918
919
920   void controlPointManager::doExitNow(){
921           writeOutputToDisk();
922           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
923           CkExit();
924   }
925
926   void controlPointManager::writeOutputToDisk(){
927           if(writeDataFileAtShutdown){
928                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
929           }
930   }
931
932
933   /// Entry method called on all PEs to request memory usage
934   void controlPointManager::requestMemoryUsage(CkCallback cb){
935     int m = CmiMaxMemoryUsage() / 1024 / 1024;
936     CmiResetMaxMemory();
937     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
938     contribute(sizeof(int),&m,CkReduction::max_int, cb);
939   }
940
941   /// All processors reduce their memory usages to this method
942   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
943     int size=msg->getSize() / sizeof(int);
944     CkAssert(size==1);
945     int *m=(int *) msg->getData();
946
947     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
948     
949     instrumentedPhase* prevPhase = previousPhaseData();
950     if(prevPhase != NULL){
951       prevPhase->memoryUsageMB = m[0];
952     } else {
953       CkPrintf("No place to store memory usage");
954     }
955
956     alreadyRequestedMemoryUsage = false;
957     checkForShutdown();
958     delete msg;
959   }
960
961
962 //   /// Inform the control point framework that a named control point affects the priorities of some array  
963 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
964 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
965     
966 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
967 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
968 //     } else {
969 //       std::set<int> s;
970 //       s.insert(groupIdx);
971 //       affectsPrioritiesArray[std::string(name)] = s;
972 //     }
973     
974 // #if DEBUGPRINT   
975 //     std::map<std::string, std::set<int> >::iterator f;
976 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
977 //       std::string name = f->first;
978 //       std::set<int> &vals = f->second;
979 //       cout << "Control point " << name << " affects arrays: ";
980 //       std::set<int>::iterator i;
981 //       for(i=vals.begin(); i!=vals.end();++i){
982 //      cout << *i << " ";
983 //       }
984 //       cout << endl;
985 //     }
986 // #endif
987     
988 //   }
989   
990 //   /// Inform the control point framework that a named control point affects the priority of some entry method
991 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
992 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
993
994 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
995 //       affectsPrioritiesEP[std::string(name)].insert(idx);
996 //     } else {
997 //       std::set<int> s;
998 //       s.insert(idx);
999 //       affectsPrioritiesEP[std::string(name)] = s;
1000 //     }
1001     
1002 // #if DEBUGPRINT
1003 //     std::map<std::string, std::set<int> >::iterator f;
1004 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1005 //       std::string name = f->first;
1006 //       std::set<int> &vals = f->second;
1007 //       cout << "Control point " << name << " affects EP: ";
1008 //       std::set<int>::iterator i;
1009 //       for(i=vals.begin(); i!=vals.end();++i){
1010 //      cout << *i << " ";
1011 //       }
1012 //       cout << endl;
1013 //     }
1014 // #endif
1015
1016
1017 //   }
1018   
1019
1020
1021 /// An interface callable by the application.
1022 void gotoNextPhase(){
1023   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1024 }
1025
1026 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1027 {
1028   gotoNextPhase();
1029 }
1030
1031
1032 /// A mainchare that is used just to create our controlPointManager group at startup
1033 class controlPointMain : public CBase_controlPointMain {
1034 public:
1035   controlPointMain(CkArgMsg* args){
1036 #if OLDRANDSEED
1037     struct timeval tp;
1038     gettimeofday(& tp, NULL);
1039     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1040 #else
1041     double time = CmiWallTimer();
1042     int sec = (int)time;
1043     int usec = (int)((time - (double)sec)*1000000.0);
1044     random_seed =  sec ^ usec;
1045 #endif
1046     
1047     
1048     double period, periodms;
1049     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1050     bool haveSamplePeriodMs = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriodMs", &periodms,"The time between Control Point Framework samples (in milliseconds)");
1051     if(haveSamplePeriod){
1052       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1053       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1054     } else if(haveSamplePeriodMs){
1055       CkPrintf("controlPointSamplePeriodMs = %lf ms\n", periodms);
1056       controlPointSamplePeriod = periodms; /**< A readonly */
1057     } else {
1058       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1059     }
1060
1061     
1062     
1063     
1064     whichTuningScheme = RandomSelection;
1065
1066
1067     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1068       whichTuningScheme = RandomSelection;
1069     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1070       whichTuningScheme = ExhaustiveSearch;
1071     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1072       whichTuningScheme = AlwaysDefaults;
1073     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1074       whichTuningScheme = SimulatedAnnealing;
1075     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1076       whichTuningScheme = CriticalPathAutoPrioritization;
1077     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1078       whichTuningScheme = UseBestKnownTiming;
1079     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1080       whichTuningScheme = UseSteering;
1081     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1082       whichTuningScheme = MemoryAware;
1083     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1084       whichTuningScheme = Simplex;
1085     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1086       whichTuningScheme = DivideAndConquer;
1087     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1088       whichTuningScheme = LDBPeriod;
1089     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1090       whichTuningScheme = LDBPeriodLinear;
1091     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1092       whichTuningScheme = LDBPeriodQuadratic;
1093     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1094       whichTuningScheme = LDBPeriodOptimal;
1095     }
1096
1097     char *defValStr = NULL;
1098     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1099       CkPrintf("You specified default value string: %s\n", defValStr);
1100       
1101       // Parse the string, looking for commas
1102      
1103
1104       char *tok = strtok(defValStr, ",");
1105       while (tok) {
1106         // split on the equal sign
1107         int len = strlen(tok);
1108         char *eqsign = strchr(tok, '=');
1109         if(eqsign != NULL && eqsign != tok){
1110           *eqsign = '\0';
1111           char *cpname = tok;
1112           std::string cpName(tok);
1113           char *cpDefVal = eqsign+1;      
1114           int v=-1;
1115           if(sscanf(cpDefVal, "%d", &v) == 1){
1116             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1117             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1118             defaultControlPointValues[cpName] = v;
1119           }
1120         }
1121         tok = strtok(NULL, ",");
1122       }
1123
1124     }
1125
1126     shouldGatherAll = false;
1127     shouldGatherMemoryUsage = false;
1128     shouldGatherUtilization = false;
1129     
1130     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1131       shouldGatherAll = true;
1132     } else {
1133       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1134         shouldGatherMemoryUsage = true;
1135       }
1136       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1137         shouldGatherUtilization = true;
1138       }
1139     }
1140     
1141     writeDataFileAtShutdown = false;   
1142     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1143       writeDataFileAtShutdown = true;
1144     }
1145
1146     shouldFilterOutputData = true;
1147     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1148       shouldFilterOutputData = false;
1149     }
1150
1151
1152    loadDataFileAtStartup = false;   
1153     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1154       loadDataFileAtStartup = true;
1155     }
1156
1157     char *cpdatafile;
1158     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1159       sprintf(CPDataFilename, "%s", cpdatafile);
1160     } else {
1161       sprintf(CPDataFilename, "controlPointData.txt");
1162     }
1163
1164
1165     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1166   }
1167   ~controlPointMain(){}
1168 };
1169
1170 /// An interface callable by the application.
1171 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1172   CkAssert(CkMyPe() == 0);
1173   CkPrintf("Application has registered a control point change callback\n");
1174   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1175 }
1176
1177 /// An interface callable by the application.
1178 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1179   if(CkMyPe() == 0){
1180     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1181     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1182   }
1183 }
1184
1185 /// An interface callable by the application.
1186 void registerControlPointTiming(double time){
1187   CkAssert(CkMyPe() == 0);
1188 #if DEBUGPRINT>0
1189   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1190 #endif
1191   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1192 }
1193
1194 /// An interface callable by the application.
1195 void controlPointTimingStamp() {
1196   CkAssert(CkMyPe() == 0);
1197 #if DEBUGPRINT>0
1198   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1199 #endif
1200   
1201   static double prev_time = 0.0;
1202   double t = CmiWallTimer();
1203   double duration = t - prev_time;
1204   prev_time = t;
1205     
1206   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1207 }
1208
1209 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1210 {
1211   controlPointTimingStamp();
1212 }
1213
1214
1215 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value) 
1216 {
1217   setFrameworkAdvancePhase(*value);
1218 }
1219
1220
1221
1222
1223 /// Shutdown the control point framework, writing data to disk if necessary
1224 extern "C" void controlPointShutdown(){
1225   if(CkMyPe() == 0){
1226
1227     // wait for gathering of idle time & memory usage to complete
1228     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1229
1230   }
1231 }
1232
1233 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1234 void controlPointInitNode(){
1235 //  CkPrintf("controlPointInitNode()\n");
1236   registerExitFn(controlPointShutdown);
1237 }
1238
1239 /// Called periodically to allow control point framework to do things periodically
1240 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1241 #ifdef DEBUGPRINT
1242   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1243 #endif
1244   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1245   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1246 }
1247
1248
1249
1250
1251
1252 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1253 /// user observed characteristic to adapt specific control point values.
1254 /// This function must return valid values for newControlPoints.
1255 void controlPointManager::generatePlan() {
1256   const double startGenerateTime = CmiWallTimer();
1257   const int phase_id = this->phase_id;
1258   const int effective_phase = allData.phases.size();
1259
1260   // Only generate a plan once per phase
1261   if(generatedPlanForStep == phase_id) 
1262     return;
1263   generatedPlanForStep = phase_id;
1264  
1265   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1266   printTuningScheme();
1267
1268   // By default lets put the previous phase data into newControlPoints
1269   instrumentedPhase *prevPhase = previousPhaseData();
1270   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1271           newControlPoints[cpsIter->first] = cpsIter->second;
1272   }
1273
1274
1275   if( whichTuningScheme == RandomSelection ){
1276     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1277     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1278       const std::string &name = cpsIter->first;
1279       const std::pair<int,int> &bounds = cpsIter->second;
1280       const int lb = bounds.first;
1281       const int ub = bounds.second;
1282       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1283     }
1284   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1285     // -----------------------------------------------------------
1286     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1287     //   
1288     // This scheme will return the median value for the range 
1289     // early on, and then will transition over to the new control points
1290     // determined by the critical path adapting code
1291
1292     // This won't work until the periodic function is fixed up a bit
1293
1294     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1295     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1296       const std::string &name = cpsIter->first;
1297       const std::pair<int,int> &bounds = cpsIter->second;
1298       const int lb = bounds.first;
1299       const int ub = bounds.second;
1300       newControlPoints[name] =  (lb+ub)/2;
1301     }
1302
1303   } else if ( whichTuningScheme == MemoryAware ) {
1304
1305     // -----------------------------------------------------------
1306     //  STEERING BASED ON MEMORY USAGE
1307
1308     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1309     instrumentedPhase *prevPhase = previousPhaseData();
1310  
1311     if(phase_id%2 == 0){
1312       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1313       twoAgoPhase->print();
1314       CkPrintf("\n");
1315       fflush(stdout);
1316       
1317       // See if memory usage is low:
1318       double memUsage = twoAgoPhase->memoryUsageMB;
1319       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1320       fflush(stdout);
1321       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1322         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1323         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1324         
1325         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1326         newControlPoints = twoAgoPhase->controlPoints;
1327
1328
1329         CkPrintf("Steering (memory based) initialized plan\n");
1330         fflush(stdout);
1331
1332         // look for a possible control point knob to turn
1333         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1334         
1335         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1336         bool found = false;
1337         std::string cpName;
1338         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1339         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1340         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1341           cpName = iter->first;
1342           info = &iter->second;
1343           found = true;
1344           break;
1345         }
1346
1347         // Adapt the control point value
1348         if(found){
1349           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1350           fflush(stdout);
1351           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1352           const int maxValue = controlPointSpace[cpName].second;
1353           
1354           if(twoAgoValue+1 <= maxValue){
1355             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1356           }
1357         }
1358         
1359       }
1360     }
1361
1362     CkPrintf("Steering (memory based) done for this phase\n");
1363     fflush(stdout);
1364
1365   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1366
1367     // -----------------------------------------------------------
1368     //  USE BEST KNOWN TIME  
1369     static bool firstTime = true;
1370     if(firstTime){
1371       firstTime = false;
1372       instrumentedPhase *best = allData.findBest(); 
1373       CkPrintf("Best known phase is:\n");
1374       best->print();
1375       CkPrintf("\n");
1376       
1377       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1378       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1379         const std::string &name = cpsIter->first;
1380         newControlPoints[name] =  best->controlPoints[name];
1381       }
1382     }
1383   } else if( whichTuningScheme == LDBPeriod) {
1384     // Assume this is used in this manner:
1385     //  1) go to next phase
1386     //  2) request control point
1387     //  3) load balancing
1388     //  4) computation
1389     
1390     
1391     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1392     instrumentedPhase *prevPhase = previousPhaseData();
1393     
1394     
1395     const std::vector<double> &times = twoAgoPhase->times;
1396     const int oldNumTimings = times.size();
1397
1398
1399     const std::vector<double> &timesNew = prevPhase->times;
1400     const int newNumTimings = timesNew.size();
1401
1402
1403     if(oldNumTimings > 4 && newNumTimings > 4){
1404       
1405       // Build model of execution time based on two phases ago
1406       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1407       
1408       double oldSum = 0;
1409       
1410       for(int i=2; i<oldNumTimings; i++){
1411         oldSum += times[i];
1412       }
1413       
1414       const double oldAvg = oldSum / (oldNumTimings-2);
1415       
1416       
1417       
1418       
1419       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1420       const double expectedTotalTime = oldAvg * newNumTimings;
1421       
1422       
1423       // Measure actual time
1424       double newSum = 0.0;
1425       for(int i=2; i<newNumTimings; ++i){
1426         newSum += timesNew[i];
1427       }
1428       
1429       const double newAvg = newSum / (newNumTimings-2);
1430       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1431       
1432       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1433       
1434       // Determine load balance cost
1435       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1436       
1437       const double benefitAfterLB = benefit - lbcost;
1438     
1439     
1440       // Determine whether LB cost outweights the estimated benefit
1441       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1442     
1443     
1444       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1445       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1446         const std::string &name = cpsIter->first;
1447         const std::pair<int,int> &bounds = cpsIter->second;
1448         const int lb = bounds.first;
1449         const int ub = bounds.second;
1450       
1451         if(benefitAfterLB > 0){
1452           CkPrintf("Constant Model: Beneficial LB\n");
1453           int newval = newControlPoints[name] / 2;
1454           if(newval > lb)
1455             newControlPoints[name] = newval;
1456           else 
1457             newControlPoints[name] = lb;
1458         } else {
1459           CkPrintf("Constant Model: Detrimental LB\n");
1460           int newval = newControlPoints[name] * 2;
1461           if(newval < ub)
1462             newControlPoints[name] = newval;
1463           else
1464             newControlPoints[name] = ub;
1465         }
1466       }
1467     }
1468     
1469     
1470   }  else if( whichTuningScheme == LDBPeriodLinear) {
1471     // Assume this is used in this manner:
1472     //  1) go to next phase
1473     //  2) request control point
1474     //  3) load balancing
1475     //  4) computation
1476
1477
1478     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1479     instrumentedPhase *prevPhase = previousPhaseData();
1480     
1481     const std::vector<double> &times = twoAgoPhase->times;
1482     const int oldNumTimings = times.size();
1483
1484     const std::vector<double> &timesNew = prevPhase->times;
1485     const int newNumTimings = timesNew.size();
1486     
1487
1488     if(oldNumTimings > 4 && newNumTimings > 4){
1489
1490       // Build model of execution time based on two phases ago
1491       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1492       const int b1 = 2 + (oldNumTimings-2)/2;
1493       double s1 = 0;
1494       double s2 = 0;
1495     
1496       const double ldbStepsTime = times[0] + times[1];
1497     
1498       for(int i=2; i<b1; i++){
1499         s1 += times[i];
1500       }
1501       for(int i=b1; i<oldNumTimings; i++){
1502         s2 += times[i];
1503       }
1504       
1505       
1506       // Compute the estimated time for the last phase's data
1507     
1508       const double a1 = s1 / (double)(b1-2);
1509       const double a2 = s2 / (double)(oldNumTimings-b1);
1510       const double aold = (a1+a2)/2.0;
1511
1512       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1513         
1514     
1515       // Measure actual time
1516       double sum = 0.0;
1517       for(int i=2; i<newNumTimings; ++i){
1518         sum += timesNew[i];
1519       }
1520
1521       const double avg = sum / ((double)(newNumTimings-2));
1522       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1523
1524       const double benefit = expectedTotalTime - actualTotalTime;
1525
1526       // Determine load balance cost
1527       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1528
1529       const double benefitAfterLB = benefit - lbcost;
1530
1531     
1532       // Determine whether LB cost outweights the estimated benefit
1533       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1534     
1535     
1536     
1537       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1538       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1539         const std::string &name = cpsIter->first;
1540         const std::pair<int,int> &bounds = cpsIter->second;
1541         const int lb = bounds.first;
1542         const int ub = bounds.second;
1543       
1544         if(benefitAfterLB > 0){
1545           CkPrintf("Linear Model: Beneficial LB\n");
1546           int newval = newControlPoints[name] / 2;
1547           if(newval > lb)
1548             newControlPoints[name] = newval;
1549           else 
1550             newControlPoints[name] = lb;
1551         } else {
1552           CkPrintf("Linear Model: Detrimental LB\n");
1553           int newval = newControlPoints[name] * 2;
1554           if(newval < ub)
1555             newControlPoints[name] = newval;
1556           else 
1557             newControlPoints[name] = ub;
1558         }
1559       }
1560     }
1561
1562   }
1563
1564   else if( whichTuningScheme == LDBPeriodQuadratic) {
1565     // Assume this is used in this manner:
1566     //  1) go to next phase
1567     //  2) request control point
1568     //  3) load balancing
1569     //  4) computation
1570
1571
1572     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1573     instrumentedPhase *prevPhase = previousPhaseData();
1574         
1575     const std::vector<double> &times = twoAgoPhase->times;
1576     const int oldNumTimings = times.size();
1577
1578     const std::vector<double> &timesNew = prevPhase->times;
1579     const int newNumTimings = timesNew.size();
1580
1581     
1582     if(oldNumTimings > 4 && newNumTimings > 4){
1583
1584
1585       // Build model of execution time based on two phases ago
1586       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1587       const int b1 = 2 + (oldNumTimings-2)/3;
1588       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1589       double s1 = 0;
1590       double s2 = 0;
1591       double s3 = 0;
1592
1593       const double ldbStepsTime = times[0] + times[1];
1594     
1595       for(int i=2; i<b1; i++){
1596         s1 += times[i];
1597       }
1598       for(int i=b1; i<b2; i++){
1599         s2 += times[i];
1600       }
1601       for(int i=b2; i<oldNumTimings; i++){
1602         s3 += times[i];
1603       }
1604
1605     
1606       // Compute the estimated time for the last phase's data
1607     
1608       const double a1 = s1 / (double)(b1-2);
1609       const double a2 = s2 / (double)(b2-b1);
1610       const double a3 = s3 / (double)(oldNumTimings-b2);
1611     
1612       const double a = (a1-2.0*a2+a3)/2.0;
1613       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1614       const double c = a3;
1615     
1616       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1617       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1618       const double x2 = 0.5;
1619       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1620    
1621     
1622       // Measure actual time
1623       double sum = 0.0;
1624       for(int i=2; i<newNumTimings; ++i){
1625         sum += timesNew[i];
1626       }
1627
1628       const double avg = sum / ((double)(newNumTimings-2));
1629       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1630
1631       const double benefit = expectedTotalTime - actualTotalTime;
1632
1633       // Determine load balance cost
1634       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1635
1636       const double benefitAfterLB = benefit - lbcost;
1637
1638     
1639       // Determine whether LB cost outweights the estimated benefit
1640       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1641     
1642     
1643     
1644       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1645       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1646         const std::string &name = cpsIter->first;
1647         const std::pair<int,int> &bounds = cpsIter->second;
1648         const int lb = bounds.first;
1649         const int ub = bounds.second;
1650       
1651         if(benefitAfterLB > 0){
1652           CkPrintf("QuadraticModel: Beneficial LB\n");
1653           int newval = newControlPoints[name] / 2;
1654           if(newval > lb)
1655             newControlPoints[name] = newval;
1656           else 
1657             newControlPoints[name] = lb;
1658         } else {
1659           CkPrintf("QuadraticModel: Detrimental LB\n");
1660           int newval = newControlPoints[name] * 2;
1661           if(newval < ub)
1662             newControlPoints[name] = newval;
1663           else 
1664             newControlPoints[name] = ub;
1665         }
1666       
1667       }
1668     }
1669     
1670
1671   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1672     // Assume this is used in this manner:
1673     //  1) go to next phase
1674     //  2) request control point
1675     //  3) load balancing
1676     //  4) computation
1677
1678
1679
1680     instrumentedPhase *prevPhase = previousPhaseData();
1681     
1682     const std::vector<double> &times = prevPhase->times;
1683     const int numTimings = times.size();
1684     
1685     if( numTimings > 4){
1686
1687       const int b1 = 2 + (numTimings-2)/2;
1688       double s1 = 0;
1689       double s2 = 0;
1690     
1691     
1692       for(int i=2; i<b1; i++){
1693         s1 += times[i];
1694       }
1695       for(int i=b1; i<numTimings; i++){
1696         s2 += times[i];
1697       }
1698       
1699     
1700       const double a1 = s1 / (double)(b1-2);
1701       const double a2 = s2 / (double)(numTimings-b1);
1702       const double avg = (a1+a1) / 2.0;
1703
1704       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1705
1706       const double ldbStepsTime = times[0] + times[1];
1707       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1708       
1709
1710       int newval = roundDouble(sqrt(2.0*lbcost/m));
1711       
1712       // We don't really know what to do if m<=0, so we'll just double the period
1713       if(m<=0)
1714         newval = 2*numTimings;     
1715       
1716       CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1717     
1718     
1719       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1720       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1721         // TODO: lookup only control points that are relevant instead of all of them
1722         const std::string &name = cpsIter->first;
1723         const std::pair<int,int> &bounds = cpsIter->second;
1724         const int lb = bounds.first;
1725         const int ub = bounds.second;
1726         
1727         if(newval < lb){
1728           newControlPoints[name] = lb;
1729         } else if(newval > ub){
1730           newControlPoints[name] = ub;
1731         } else {
1732           newControlPoints[name] = newval;
1733         } 
1734         
1735       }
1736       
1737       
1738     }
1739     
1740  
1741
1742   } else if ( whichTuningScheme == UseSteering ) {
1743           // -----------------------------------------------------------
1744           //  STEERING BASED ON KNOWLEDGE
1745
1746           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1747           // plans are only generated after 3 phases
1748
1749           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1750           instrumentedPhase *prevPhase = previousPhaseData();
1751
1752           if(phase_id%4 == 0){
1753                   CkPrintf("Steering based on 2 phases ago:\n");
1754                   twoAgoPhase->print();
1755                   CkPrintf("\n");
1756                   fflush(stdout);
1757
1758                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1759
1760
1761                   // ========================================= Concurrency =============================================
1762                   // See if idle time is high:
1763                   {
1764                           double idleTime = twoAgoPhase->idleTime.avg;
1765                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1766                           fflush(stdout);
1767                           if(idleTime > 0.10){
1768                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1769                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1770
1771                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1772
1773                                   bool found = false;
1774                                   std::string cpName;
1775                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1776                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1777                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1778                                           cpName = iter->first;
1779                                           info = &iter->second;
1780
1781                                           // Initialize a new plan based on two phases ago
1782                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1783
1784                                           CkPrintf("Steering found knob to turn\n");
1785                                           fflush(stdout);
1786
1787                                           if(info->first == ControlPoint::EFF_INC){
1788                                                   const int maxValue = controlPointSpace[cpName].second;
1789                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1790                                                   if(twoAgoValue+1 <= maxValue){
1791                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1792                                                   }
1793                                           } else {
1794                                                   const int minValue = controlPointSpace[cpName].second;
1795                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1796                                                   if(twoAgoValue-1 >= minValue){
1797                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1798                                                   }
1799                                           }
1800
1801                                           possibleNextStepPlans.push_back(aNewPlan);
1802
1803                                   }
1804                           }
1805                   }
1806
1807                   // ========================================= Grain Size =============================================
1808                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1809                   {
1810                           double overheadTime = twoAgoPhase->overheadTime.avg;
1811                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1812                           fflush(stdout);
1813                           if(overheadTime > 0.10){
1814                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1815                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1816
1817                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1818
1819                                   bool found = false;
1820                                   std::string cpName;
1821                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1822                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1823                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1824                                           cpName = iter->first;
1825                                           info = &iter->second;
1826
1827                                           // Initialize a new plan based on two phases ago
1828                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1829
1830
1831
1832                                           CkPrintf("Steering found knob to turn\n");
1833                                           fflush(stdout);
1834
1835                                           if(info->first == ControlPoint::EFF_INC){
1836                                                   const int maxValue = controlPointSpace[cpName].second;
1837                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1838                                                   if(twoAgoValue+1 <= maxValue){
1839                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1840                                                   }
1841                                           } else {
1842                                                   const int minValue = controlPointSpace[cpName].second;
1843                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1844                                                   if(twoAgoValue-1 >= minValue){
1845                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1846                                                   }
1847                                           }
1848
1849                                           possibleNextStepPlans.push_back(aNewPlan);
1850
1851                                   }
1852
1853                           }
1854                   }
1855                   // ========================================= GPU Offload =============================================
1856                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1857                   {
1858                           double idleTime = twoAgoPhase->idleTime.avg;
1859                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1860                           fflush(stdout);
1861                           if(idleTime > 0.10){
1862                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1863                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1864
1865                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1866
1867                                   bool found = false;
1868                                   std::string cpName;
1869                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1870                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1871                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1872                                           cpName = iter->first;
1873                                           info = &iter->second;
1874
1875                                           // Initialize a new plan based on two phases ago
1876                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1877
1878
1879                                           CkPrintf("Steering found knob to turn\n");
1880                                           fflush(stdout);
1881
1882                                           if(info->first == ControlPoint::EFF_DEC){
1883                                                   const int maxValue = controlPointSpace[cpName].second;
1884                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1885                                                   if(twoAgoValue+1 <= maxValue){
1886                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1887                                                   }
1888                                           } else {
1889                                                   const int minValue = controlPointSpace[cpName].second;
1890                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1891                                                   if(twoAgoValue-1 >= minValue){
1892                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1893                                                   }
1894                                           }
1895
1896                                           possibleNextStepPlans.push_back(aNewPlan);
1897
1898                                   }
1899
1900                           }
1901                   }
1902
1903                   // ========================================= Done =============================================
1904
1905
1906                   if(possibleNextStepPlans.size() > 0){
1907                           newControlPoints = possibleNextStepPlans[0];
1908                   }
1909
1910
1911                   CkPrintf("Steering done for this phase\n");
1912                   fflush(stdout);
1913
1914           }  else {
1915                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1916                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1917                   fflush(stdout);
1918           }
1919
1920
1921
1922   } else if( whichTuningScheme == SimulatedAnnealing ) {
1923
1924     // -----------------------------------------------------------
1925     //  SIMULATED ANNEALING
1926     //  Simulated Annealing style hill climbing method
1927     //
1928     //  Find the best search space configuration, and try something
1929     //  nearby it, with a radius decreasing as phases increase
1930
1931     CkPrintf("Finding best phase\n");
1932     instrumentedPhase *bestPhase = allData.findBest();  
1933     CkPrintf("best found:\n"); 
1934     bestPhase->print(); 
1935     CkPrintf("\n"); 
1936     
1937
1938     const int convergeByPhase = 100;
1939     // Determine from 0.0 to 1.0 how far along we are
1940     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1941
1942     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1943     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1944       const std::string &name = cpsIter->first;
1945       const std::pair<int,int> &bounds = cpsIter->second;
1946       const int minValue = bounds.first;
1947       const int maxValue = bounds.second;
1948       
1949       const int before = bestPhase->controlPoints[name];   
1950   
1951       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1952
1953       int high = min(before+range, maxValue);
1954       int low = max(before-range, minValue);
1955       
1956       newControlPoints[name] = low;
1957       if(high-low > 0){
1958         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1959       } 
1960       
1961     }
1962
1963   } else if ( whichTuningScheme == DivideAndConquer ) {
1964
1965           // -----------------------------------------------------------
1966           //  STEERING FOR Divide & Conquer Programs
1967           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1968           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1969           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1970
1971           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1972           // plans are only generated after 3 phases
1973
1974           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1975           instrumentedPhase *prevPhase = previousPhaseData();
1976
1977           if(phase_id%4 == 0){
1978                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1979                   twoAgoPhase->print();
1980                   CkPrintf("\n");
1981                   fflush(stdout);
1982
1983                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1984
1985
1986                   // ========================================= Concurrency =============================================
1987                   // See if idle time is high:
1988                   {
1989                           double idleTime = twoAgoPhase->idleTime.avg;
1990                           double overheadTime = twoAgoPhase->overheadTime.avg;
1991
1992
1993                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1994                           fflush(stdout);
1995                           if(idleTime+overheadTime > 0.10){
1996                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1997                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1998
1999                                   int direction = -1;
2000                                   if (idleTime>overheadTime){
2001                                           // We need to decrease the grain size, or increase the available concurrency
2002                                           direction = 1;
2003                                   }
2004
2005                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
2006
2007                                   bool found = false;
2008                                   std::string cpName;
2009                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2010                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2011                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2012                                           cpName = iter->first;
2013                                           info = &iter->second;
2014                                           
2015                                           // Initialize a new plan based on two phases ago
2016                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2017
2018                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2019                                           fflush(stdout);
2020
2021                                           int adjustByAmount = (int)(myAbs(idleTime-overheadTime)*5.0);
2022                                           
2023                                           if(info->first == ControlPoint::EFF_INC){
2024                                             const int minValue = controlPointSpace[cpName].first;
2025                                             const int maxValue = controlPointSpace[cpName].second;
2026                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2027                                             const int newVal = closestInRange(twoAgoValue+adjustByAmount*direction, minValue, maxValue);                                          
2028                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2029                                             aNewPlan[cpName] = newVal;
2030                                           } else {
2031                                             const int minValue = controlPointSpace[cpName].first;
2032                                             const int maxValue = controlPointSpace[cpName].second;
2033                                             const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2034                                             const int newVal = closestInRange(twoAgoValue-adjustByAmount*direction, minValue, maxValue);
2035                                             CkAssert(newVal <= maxValue && newVal >= minValue);
2036                                             aNewPlan[cpName] = newVal;
2037                                           }
2038                                           
2039                                           possibleNextStepPlans.push_back(aNewPlan);
2040                                   }
2041                           }
2042                   }
2043
2044                   if(possibleNextStepPlans.size() > 0){
2045                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2046                     newControlPoints = possibleNextStepPlans[0];
2047                   } else {
2048                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2049                   }
2050           }
2051
2052   } else if( whichTuningScheme == Simplex ) {
2053
2054           // -----------------------------------------------------------
2055           //  Nelder Mead Simplex Algorithm
2056           //
2057           //  A scheme that takes a simplex (n+1 points) and moves it
2058           //  toward the minimum, eventually converging there.
2059
2060           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2061
2062   } else if( whichTuningScheme == ExhaustiveSearch ) {
2063     
2064     // -----------------------------------------------------------
2065     // EXHAUSTIVE SEARCH
2066    
2067     int numDimensions = controlPointSpace.size();
2068     CkAssert(numDimensions > 0);
2069   
2070     vector<int> lowerBounds(numDimensions);
2071     vector<int> upperBounds(numDimensions); 
2072   
2073     int d=0;
2074     std::map<std::string, pair<int,int> >::iterator iter;
2075     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2076       //    CkPrintf("Examining dimension %d\n", d);
2077       lowerBounds[d] = iter->second.first;
2078       upperBounds[d] = iter->second.second;
2079       d++;
2080     }
2081    
2082     // get names for each dimension (control point)
2083     vector<std::string> names(numDimensions);
2084     d=0;
2085     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2086       names[d] = niter->first;
2087       d++;
2088     }
2089   
2090   
2091     // Create the first possible configuration
2092     vector<int> config = lowerBounds;
2093     config.push_back(0);
2094   
2095     // Increment until finding an unused configuration
2096     allData.cleanupNames(); // put -1 values in for any control points missing
2097     std::vector<instrumentedPhase*> &phases = allData.phases;     
2098
2099     while(true){
2100     
2101       std::vector<instrumentedPhase*>::iterator piter; 
2102       bool testedConfiguration = false; 
2103       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2104       
2105         // Test if the configuration matches this phase
2106         bool match = true;
2107         for(int j=0;j<numDimensions;j++){
2108           match &= (*piter)->controlPoints[names[j]] == config[j];
2109         }
2110       
2111         if(match){
2112           testedConfiguration = true; 
2113           break;
2114         } 
2115       } 
2116     
2117       if(testedConfiguration == false){ 
2118         break;
2119       } 
2120     
2121       // go to next configuration
2122       config[0] ++;
2123       // Propagate "carrys"
2124       for(int i=0;i<numDimensions;i++){
2125         if(config[i] > upperBounds[i]){
2126           config[i+1]++;
2127           config[i] = lowerBounds[i];
2128         }
2129       }
2130       // check if we have exhausted all possible values
2131       if(config[numDimensions] > 0){
2132         break;
2133       }
2134        
2135     }
2136   
2137     if(config[numDimensions] > 0){
2138       for(int i=0;i<numDimensions;i++){ 
2139         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2140       }
2141     }
2142
2143     // results are now in config[i]
2144     
2145     for(int i=0; i<numDimensions; i++){
2146       newControlPoints[names[i]] = config[i];
2147       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2148     }
2149
2150
2151   } else {
2152     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2153   }
2154
2155
2156   const double endGenerateTime = CmiWallTimer();
2157   
2158   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2159   
2160 }
2161
2162
2163
2164
2165
2166 /// Get control point value from range of integers [lb,ub]
2167 int controlPoint(const char *name, int lb, int ub){
2168   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2169   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2170   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2171   int result;
2172
2173   // if we already have control point values for phase, return them
2174   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2175     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2176     return thisPhaseData->controlPoints[std::string(name)];
2177   }
2178   
2179
2180   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2181     // For the first few phases, just use the lower bound, or the default if one was provided 
2182     // This ensures that the ranges for all the control points are known before we do anything fancy
2183     result = lb;
2184
2185     if(defaultControlPointValues.count(std::string(name)) > 0){
2186       int v = defaultControlPointValues[std::string(name)];
2187       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2188       result = v;
2189     }
2190
2191   } else if(controlPointSpace.count(std::string(name)) == 0){
2192     // if this is the first time we've seen the CP, then return the lower bound
2193     result = lb;
2194     
2195   }  else {
2196     // Generate a plan one time for each phase
2197     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2198     
2199     // Use the planned value:
2200     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2201     
2202   }
2203
2204   if(!isInRange(result,ub,lb)){
2205     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2206     fflush(stdout);
2207     fflush(stderr);
2208   }
2209   CkAssert(isInRange(result,ub,lb));
2210   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2211
2212   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2213
2214   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2215   //  thisPhaseData->print();
2216   
2217   return result;
2218 }
2219
2220
2221 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2222   CkAssert(CkMyPe() == 0);
2223   return controlPoint("FortranCP", *lb, *ub);
2224 }
2225
2226
2227
2228
2229 /// Inform the control point framework that a named control point affects the priorities of some array  
2230 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2231 //   CkGroupID aid = arraybase.ckGetArrayID();
2232 //   int groupIdx = aid.idx;
2233 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2234 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2235 // }
2236
2237
2238 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2239 // void controlPointPriorityEntry(const char *name, int idx){
2240 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2241 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2242 // }
2243
2244
2245
2246
2247
2248 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2249
2250     This function decomposes the algorithm into a state machine that allows it to
2251     evaluate one or more configurations through subsequent clls to this function.
2252     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2253
2254     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2255     so a list of them will be created and they will be evaluated, one per call.
2256
2257     Currently there is no stopping criteria, but the simplex ought to contract down
2258     to a few close configurations, and hence not much change will happen after this 
2259     point.
2260
2261  */
2262 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2263
2264         if(useBestKnown){
2265                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2266                 return;
2267         }
2268
2269
2270         if(firstSimplexPhase< 0){
2271                 firstSimplexPhase = allData.phases.size()-1;
2272                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2273         }
2274
2275         int n = controlPointSpace.size();
2276
2277         CkAssert(n>=2);
2278
2279
2280         if(simplexState == beginning){
2281                 // First we evaluate n+1 random points, then we go to a different state
2282                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2283                         CkPrintf("Simplex Tuning: chose random configuration\n");
2284
2285                         // Choose random values from the middle third of the range in each dimension
2286                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2287                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2288                                 const std::string &name = cpsIter->first;
2289                                 const std::pair<int,int> &bounds = cpsIter->second;
2290                                 const int lb = bounds.first;
2291                                 const int ub = bounds.second;
2292                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2293                         }
2294                 } else {
2295                         // Set initial simplex:
2296                         for(int i=0; i<n+1; i++){
2297                                 simplexIndices.insert(firstSimplexPhase+i);
2298                         }
2299                         CkAssert(simplexIndices.size() == n+1);
2300
2301                         // Transition to reflecting state
2302                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2303
2304                 }
2305
2306         } else if (simplexState == reflecting){
2307                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2308                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2309
2310                 // Find the highest time from other points in the simplex
2311                 double highestTimeForOthersInSimplex = 0.0;
2312                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2313                         double t = allData.phases[*iter]->medianTime();
2314                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2315                                 highestTimeForOthersInSimplex = t;
2316                         }
2317                 }
2318
2319                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2320
2321                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2322                         // if y* < yl,  transition to "expanding"
2323                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2324
2325                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2326                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2327                         CkAssert(simplexIndices.size() == n+1);
2328                         simplexIndices.erase(worstPhase);
2329                         CkAssert(simplexIndices.size() == n);
2330                         simplexIndices.insert(pPhase);
2331                         CkAssert(simplexIndices.size() == n+1);
2332                         // Transition to reflecting state
2333                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2334
2335                 } else {
2336                         // if y* > yh
2337                         if(recentPhaseTime <= worstTime){
2338                                 // replace Ph with P*
2339                                 CkAssert(simplexIndices.size() == n+1);
2340                                 simplexIndices.erase(worstPhase);
2341                                 CkAssert(simplexIndices.size() == n);
2342                                 simplexIndices.insert(pPhase);
2343                                 CkAssert(simplexIndices.size() == n+1);
2344                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2345                                 worstPhase = pPhase;
2346                                 // Just as a sanity check, make sure we don't use the non-updated values.
2347                                 worst.clear();
2348                         }
2349
2350                         // Now, form P** and do contracting phase
2351                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2352
2353                 }
2354
2355         } else if (simplexState == doneExpanding){
2356                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2357                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2358                 // A new configuration has been evaluated
2359
2360                 // Check to see if y** < y1
2361                 if(recentPhaseTime < bestTime){
2362                         // replace Ph by P**
2363                         CkAssert(simplexIndices.size() == n+1);
2364                         simplexIndices.erase(worstPhase);
2365                         CkAssert(simplexIndices.size() == n);
2366                         simplexIndices.insert(p2Phase);
2367                         CkAssert(simplexIndices.size() == n+1);
2368                 } else {
2369                         //      replace Ph by P*
2370                         CkAssert(simplexIndices.size() == n+1);
2371                         simplexIndices.erase(worstPhase);
2372                         CkAssert(simplexIndices.size() == n);
2373                         simplexIndices.insert(pPhase);
2374                         CkAssert(simplexIndices.size() == n+1);
2375                 }
2376
2377                 // Transition to reflecting state
2378                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2379
2380         }  else if (simplexState == contracting){
2381                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2382                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2383                 // A new configuration has been evaluated
2384
2385                 // Check to see if y** > yh
2386                 if(recentPhaseTime <= worstTime){
2387                         // replace Ph by P**
2388                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2389                         CkAssert(simplexIndices.size() == n+1);
2390                         simplexIndices.erase(worstPhase);
2391                         CkAssert(simplexIndices.size() == n);
2392                         simplexIndices.insert(p2Phase);
2393                         CkAssert(simplexIndices.size() == n+1);
2394                         // Transition to reflecting state
2395                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2396
2397                 } else {
2398                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2399                         simplexState = stillContracting;
2400
2401                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2402                         stillMustContractList = simplexIndices;
2403
2404                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2405                 }
2406
2407         } else if (simplexState == stillContracting){
2408                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2409
2410                 if(stillMustContractList.size()>0){
2411                         int c = *stillMustContractList.begin();
2412                         stillMustContractList.erase(c);
2413                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2414
2415                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2416
2417                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2418                         int v = 0;
2419                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2420                                 const std::string &name = cpsIter->first;
2421                                 const std::pair<int,int> &bounds = cpsIter->second;
2422                                 const int lb = bounds.first;
2423                                 const int ub = bounds.second;
2424
2425                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2426
2427                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2428                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2429                                 v++;
2430                         }
2431                 } else {
2432                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2433                         CkAssert(stillMustContractList.size() == 0);
2434                         simplexIndices.clear();
2435                         CkAssert(simplexIndices.size()==0);
2436                         for(int i=0; i<n+1; i++){
2437                                 simplexIndices.insert(allData.phases.size()-2-i);
2438                         }
2439                         CkAssert(simplexIndices.size()==n+1);
2440
2441                         // Transition to reflecting state
2442                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2443
2444                 }
2445
2446
2447         } else if (simplexState == expanding){
2448                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2449                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2450                 // A new configuration has been evaluated
2451
2452                 // determine if y** < yl
2453                 if(recentPhaseTime < bestTime){
2454                         // replace Ph by P**
2455                         CkAssert(simplexIndices.size() == n+1);
2456                         simplexIndices.erase(worstPhase);
2457                         CkAssert(simplexIndices.size() == n);
2458                         simplexIndices.insert(p2Phase);
2459                         CkAssert(simplexIndices.size() == n+1);
2460                         // Transition to reflecting state
2461                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2462
2463                 } else {
2464                         // else, replace ph with p*
2465                         CkAssert(simplexIndices.size() == n+1);
2466                         simplexIndices.erase(worstPhase);
2467                         CkAssert(simplexIndices.size() == n);
2468                         simplexIndices.insert(pPhase);
2469                         CkAssert(simplexIndices.size() == n+1);
2470                         // Transition to reflecting state
2471                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2472                 }
2473
2474
2475         } else {
2476                 CkAbort("Unknown simplexState");
2477         }
2478
2479 }
2480
2481
2482
2483 /** Replace the worst point with its reflection across the centroid. */
2484 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2485
2486         int n = controlPointSpace.size();
2487
2488         printSimplex(allData);
2489
2490         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2491
2492
2493         // Quit if the diameter of our simplex is small
2494         double maxr = 0.0;
2495         for(int i=0; i<n+1; i++){
2496                 //              Compute r^2 of this simplex point from the centroid
2497                 double r2 = 0.0;
2498                 std::vector<double> p = pointCoords(allData, i);
2499                 for(int d=0; d<p.size(); d++){
2500                         double r1 = (p[d] * centroid[d]);
2501                         r2 += r1*r1;
2502                 }
2503                 if(r2 > maxr)
2504                         maxr = r2;
2505         }
2506
2507 #if 0
2508         // At some point we quit this tuning
2509         if(maxr < 10){
2510                 useBestKnown = true;
2511                 instrumentedPhase *best = allData.findBest();
2512                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2513
2514                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2515                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2516                         const std::string &name = cpsIter->first;
2517                         newControlPoints[name] =  best->controlPoints[name];
2518                 }
2519         }
2520 #endif
2521
2522         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2523
2524         pPhase = allData.phases.size()-1;
2525         P.resize(n);
2526         for(int i=0; i<n; i++){
2527                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2528         }
2529
2530         for(int i=0; i<P.size(); i++){
2531                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2532         }
2533
2534
2535         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2536         int v = 0;
2537         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2538                 const std::string &name = cpsIter->first;
2539                 const std::pair<int,int> &bounds = cpsIter->second;
2540                 const int lb = bounds.first;
2541                 const int ub = bounds.second;
2542                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2543                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2544                 v++;
2545         }
2546
2547
2548         // Transition to "reflecting" state
2549         simplexState = reflecting;
2550         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2551
2552 }
2553
2554
2555
2556
2557 /** Replace the newly tested reflection with a further expanded version of itself. */
2558 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2559         int n = controlPointSpace.size();
2560         printSimplex(allData);
2561
2562         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2563         // I believe the equation for P** in the text on page 308 is correct.
2564
2565         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2566
2567
2568         p2Phase = allData.phases.size()-1;
2569         P2.resize(n);
2570         for(int i=0; i<n; i++){
2571                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2572         }
2573
2574         for(int i=0; i<P2.size(); i++){
2575                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2576         }
2577
2578
2579         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2580         int v = 0;
2581         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2582                 const std::string &name = cpsIter->first;
2583                 const std::pair<int,int> &bounds = cpsIter->second;
2584                 const int lb = bounds.first;
2585                 const int ub = bounds.second;
2586                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2587                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2588                 v++;
2589         }
2590
2591
2592         // Transition to "doneExpanding" state
2593         simplexState = doneExpanding;
2594         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2595
2596 }
2597
2598
2599
2600
2601 /** Replace the newly tested reflection with a further expanded version of itself. */
2602 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2603         int n = controlPointSpace.size();
2604         printSimplex(allData);
2605
2606         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2607
2608
2609         p2Phase = allData.phases.size()-1;
2610         P2.resize(n);
2611         for(int i=0; i<n; i++){
2612                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2613         }
2614
2615         for(int i=0; i<P2.size(); i++){
2616                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2617         }
2618
2619
2620         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2621         int v = 0;
2622         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2623                 const std::string &name = cpsIter->first;
2624                 const std::pair<int,int> &bounds = cpsIter->second;
2625                 const int lb = bounds.first;
2626                 const int ub = bounds.second;
2627                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2628                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2629                 v++;
2630         }
2631         
2632         
2633         // Transition to "contracting" state
2634         simplexState = contracting;
2635         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2636
2637 }
2638
2639
2640
2641
2642 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2643         int n = controlPointSpace.size();
2644
2645         // Find worst performing point in the simplex
2646         worstPhase = -1;
2647         worstTime = -1.0;
2648         bestPhase = 10000000;
2649         bestTime = 10000000;
2650         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2651                 double t = allData.phases[*iter]->medianTime();
2652                 if(t > worstTime){
2653                         worstTime = t;
2654                         worstPhase = *iter;
2655                 }
2656                 if(t < bestTime){
2657                         bestTime = t;
2658                         bestPhase = *iter;
2659                 }
2660         }
2661         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2662
2663         best = pointCoords(allData, bestPhase);
2664         CkAssert(best.size() == n);
2665
2666         worst = pointCoords(allData, worstPhase);
2667         CkAssert(worst.size() == n);
2668
2669         // Calculate centroid of the remaining points in the simplex
2670         centroid.resize(n);
2671         for(int i=0; i<n; i++){
2672                 centroid[i] = 0.0;
2673         }
2674
2675         int numPts = 0;
2676
2677         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2678                 if(*iter != worstPhase){
2679                         numPts ++;
2680                         // Accumulate into the result vector
2681                         int c = 0;
2682                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2683                                 centroid[c] += citer->second;
2684                                 c++;
2685                         }
2686
2687                 }
2688         }
2689
2690         // Now divide the sums by the number of points.
2691         for(int v = 0; v<centroid.size(); v++) {
2692                 centroid[v] /= (double)numPts;
2693         }
2694
2695         CkAssert(centroid.size() == n);
2696
2697         for(int i=0; i<centroid.size(); i++){
2698                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2699         }
2700
2701
2702 }
2703
2704
2705
2706 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2707         std::vector<double> result;
2708         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2709                 result.push_back((double)citer->second);
2710         }
2711         return result;
2712 }
2713
2714
2715
2716
2717 void ControlPointWriteOutputToDisk(){
2718         CkAssert(CkMyPe() == 0);
2719         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2720 }
2721
2722
2723
2724 /*! @} */
2725
2726 #ifdef CP_DISABLE_TRACING
2727 #include "ControlPointsNoTrace.def.h"
2728 #else
2729 #include "ControlPoints.def.h"
2730 #endif