Merged arrays into Charj mainline.
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15 #include <math.h>
16 #include <climits>
17
18 #define roundDouble(x)        ((long)(x+0.5))
19
20
21 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
22  
23
24 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
25 /**  @{ */
26
27 using namespace std;
28
29 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
30
31
32 //#undef DEBUGPRINT
33 //#define DEBUGPRINT 4
34
35 static void periodicProcessControlPoints(void* ptr, double currWallTime);
36
37
38 // A pointer to this PE's controlpoint manager Proxy
39 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
40 /* readonly */ int random_seed;
41 /* readonly */ long controlPointSamplePeriod;
42 /* readonly */ int whichTuningScheme;
43 /* readonly */ bool writeDataFileAtShutdown;
44 /* readonly */ bool shouldFilterOutputData;
45 /* readonly */ bool loadDataFileAtStartup;
46 /* readonly */ bool shouldGatherMemoryUsage;
47 /* readonly */ bool shouldGatherUtilization;
48 /* readonly */ bool shouldGatherAll;
49 /* readonly */ char CPDataFilename[512];
50
51
52
53 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
54 /// These probably come from the command line arguments, so are available only on PE 0
55 std::map<std::string, int> defaultControlPointValues;
56
57
58
59 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults, LDBPeriod, LDBPeriodLinear, LDBPeriodQuadratic, LDBPeriodOptimal}  tuningScheme;
60
61
62
63 void printTuningScheme(){
64   switch(whichTuningScheme){
65   case RandomSelection:
66     CkPrintf("Tuning Scheme: RandomSelection\n");
67     break;
68   case AlwaysDefaults:
69     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
70     break;
71   case SimulatedAnnealing:
72     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
73     break;
74   case ExhaustiveSearch:
75     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
76     break;
77   case CriticalPathAutoPrioritization:
78     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
79     break;
80   case UseBestKnownTiming:
81     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
82     break;
83   case UseSteering:
84     CkPrintf("Tuning Scheme: UseSteering\n");
85     break;
86   case MemoryAware:
87     CkPrintf("Tuning Scheme: MemoryAware\n");
88     break;
89   case Simplex:
90     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
91     break;
92   case DivideAndConquer:
93     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
94     break;
95   case LDBPeriod:
96     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Constant Prediction)\n");
97     break;
98   case LDBPeriodLinear:
99     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Linear Prediction)\n");
100     break;
101   case LDBPeriodQuadratic:
102     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Quadratic Prediction)\n");
103     break;
104   case LDBPeriodOptimal:
105     CkPrintf("Tuning Scheme: Load Balancing Period Steering (Optimal Prediction)\n");
106     break;
107   default:
108     CkPrintf("Unknown tuning scheme\n");
109     break;
110   }
111   fflush(stdout);
112 }
113
114
115
116 /// A reduction type that combines idle time measurements (min/sum/max etc.)
117 CkReduction::reducerType idleTimeReductionType;
118 /// A reducer that combines idle time measurements (min/sum/max etc.)
119 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
120   double ret[3];
121   if(nMsg > 0){
122     CkAssert(msgs[0]->getSize()==3*sizeof(double));
123     double *m=(double *)msgs[0]->getData();
124     ret[0]=m[0];
125     ret[1]=m[1];
126     ret[2]=m[2];
127   }
128   for (int i=1;i<nMsg;i++) {
129     CkAssert(msgs[i]->getSize()==3*sizeof(double));
130     double *m=(double *)msgs[i]->getData();
131     ret[0]=min(ret[0],m[0]);
132     ret[1]+=m[1];
133     ret[2]=max(ret[2],m[2]);
134   }  
135   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
136 }
137
138
139
140 /// A reduction type that combines idle, overhead, and memory measurements
141 CkReduction::reducerType allMeasuresReductionType;
142 /// A reducer that combines idle, overhead, and memory measurements
143 #define ALL_REDUCTION_SIZE 12
144 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
145   double ret[ALL_REDUCTION_SIZE];
146   if(nMsg > 0){
147     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
148     double *m=(double *)msgs[0]->getData();
149     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
150   }
151   for (int i=1;i<nMsg;i++) {
152     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
153     double *m=(double *)msgs[i]->getData();
154     // idle time (min/sum/max)
155     ret[0]=min(ret[0],m[0]);
156     ret[1]+=m[1];
157     ret[2]=max(ret[2],m[2]);
158     // overhead time (min/sum/max)
159     ret[3]=min(ret[3],m[3]);
160     ret[4]+=m[4];
161     ret[5]=max(ret[5],m[5]);
162     // mem usage (max)
163     ret[6]=max(ret[6],m[6]);
164     // bytes per invocation for two types of entry methods
165     ret[7]+=m[7];
166     ret[8]+=m[8];
167     ret[9]+=m[9];
168     ret[10]+=m[10];
169     // Grain size (avg)
170     ret[11]+=m[11];
171   }  
172   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
173 }
174
175
176 /// Registers the control point framework's reduction handlers at startup on each PE
177 /*initproc*/ void registerCPReductions(void) {
178   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
179   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
180 }
181
182
183
184
185
186
187 /// Return an integer between 0 and num-1 inclusive
188 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
189 unsigned int randInt(unsigned int num, const char* name, int seed=0){
190   CkAssert(num > 0);
191
192   unsigned long hash = 0;
193   unsigned int c;
194   unsigned char * str = (unsigned char*)name;
195
196   while (c = *str++){
197     unsigned int c2 = (c+64)%128;
198     unsigned int c3 = (c2*5953)%127;
199     hash = c3 + (hash << 6) + (hash << 16) - hash;
200   }
201
202   unsigned long shuffled1 = (hash*2083)%7907;
203   unsigned long shuffled2 = (seed*4297)%2017;
204
205   unsigned long shuffled3 = (random_seed*4799)%7919;
206
207   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
208
209   unsigned int result = ((namehash * 6029) % 1117) % num;
210
211   CkAssert(result >=0 && result < num);
212   return result;
213 }
214
215
216
217 controlPointManager::controlPointManager() {
218   generatedPlanForStep = -1;
219
220     exitWhenReady = false;
221     alreadyRequestedMemoryUsage = false;   
222     alreadyRequestedIdleTime = false;
223     alreadyRequestedAll = false;
224     
225     instrumentedPhase * newPhase = new instrumentedPhase();
226     allData.phases.push_back(newPhase);   
227     
228     frameworkShouldAdvancePhase = false;
229     haveControlPointChangeCallback = false;
230 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
231     
232     ControlPoint::initControlPointEffects();
233
234     phase_id = 0;
235
236     if(loadDataFileAtStartup){    
237       loadDataFile();
238     }
239
240     
241     if(CkMyPe() == 0){
242       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
243     }
244
245     traceRegisterUserEvent("No currently executing message", 5000);
246     traceRegisterUserEvent("Zero time along critical path", 5010);
247     traceRegisterUserEvent("Positive total time along critical path", 5020);
248     traceRegisterUserEvent("env->setPathHistory()", 6000);
249     traceRegisterUserEvent("Critical Path", 5900);
250     traceRegisterUserEvent("Table Entry", 5901);
251
252
253 #if USER_EVENT_FOR_REGISTERTERMINALPATH
254     traceRegisterUserEvent("registerTerminalPath", 100); 
255 #endif
256
257   }
258   
259
260  controlPointManager::~controlPointManager(){
261 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
262   }
263
264
265   /// Loads the previous run data file
266   void controlPointManager::loadDataFile(){
267     ifstream infile(CPDataFilename);
268     vector<std::string> names;
269     std::string line;
270   
271     while(getline(infile,line)){
272       if(line[0] != '#')
273         break;
274     }
275   
276     int numTimings = 0;
277     std::istringstream n(line);
278     n >> numTimings;
279   
280     while(getline(infile,line)){ 
281       if(line[0] != '#') 
282         break; 
283     }
284
285     int numControlPointNames = 0;
286     std::istringstream n2(line);
287     n2 >> numControlPointNames;
288   
289     for(int i=0; i<numControlPointNames; i++){
290       getline(infile,line);
291       names.push_back(line);
292     }
293
294     for(int i=0;i<numTimings;i++){
295       getline(infile,line);
296       while(line[0] == '#')
297         getline(infile,line); 
298
299       instrumentedPhase * ips = new instrumentedPhase();
300
301       std::istringstream iss(line);
302
303       // Read memory usage for phase
304       iss >> ips->memoryUsageMB;
305       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
306
307       // Read idle time
308       iss >> ips->idleTime.min;
309       iss >> ips->idleTime.avg;
310       iss >> ips->idleTime.max;
311
312       // Read overhead time
313       iss >> ips->overheadTime.min;
314       iss >> ips->overheadTime.avg;
315       iss >> ips->overheadTime.max;
316
317       // read bytePerInvoke
318       iss >> ips->bytesPerInvoke;
319
320       // read grain size
321       iss >> ips->grainSize;
322
323       // Read control point values
324       for(int cp=0;cp<numControlPointNames;cp++){
325         int cpvalue;
326         iss >> cpvalue;
327         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
328       }
329
330       // ignore median time
331       double mt;
332       iss >> mt;
333
334       // Read all times
335
336       double time;
337
338       while(iss >> time){
339         ips->times.push_back(time);
340 #if DEBUGPRINT > 5
341         CkPrintf("read time %lf from file\n", time);
342 #endif
343       }
344
345       allData.phases.push_back(ips);
346
347     }
348
349     infile.close();
350   }
351
352
353
354   /// Add the current data to allData and output it to a file
355   void controlPointManager::writeDataFile(){
356     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
357     ofstream outfile(CPDataFilename);
358     allData.cleanupNames();
359
360 //    string s = allData.toString();
361 //    CkPrintf("At end: \n %s\n", s.c_str());
362
363     if(shouldFilterOutputData){
364       allData.verify();
365       allData.filterOutIncompletePhases();
366     }
367
368 //    string s2 = allData.toString();
369 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
370     if(allData.toString().length() > 10){
371       outfile << allData.toString();
372     } else {
373       outfile << " No data available to save to disk " << endl;
374     }
375     outfile.close();
376   }
377
378   /// User can register a callback that is called when application should advance to next phase
379   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
380     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
381     controlPointChangeCallback = cb;
382     haveControlPointChangeCallback = true;
383   }
384
385
386 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
387 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
388   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
389 }
390
391   /// Called periodically by the runtime to handle the control points
392   /// Currently called on each PE
393   void controlPointManager::processControlPoints(){
394
395     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
396
397
398     //==========================================================================================
399     // Print the data for each phase
400
401     const int s = allData.phases.size();
402
403 #if DEBUGPRINT
404     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
405     for(int p=0;p<s;++p){
406       const instrumentedPhase &phase = allData.phases[p];
407       const idleTimeContainer &idle = phase.idleTime;
408       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
409       vector<double> const &times = phase.times;
410
411       CkPrintf("Phase %d:\n", p);
412       idle.print();
413      //   CkPrintf("critical paths: (* affected by control point)\n");
414         //   for(int i=0;i<criticalPaths.size(); i++){
415         // If affected by a control point
416         //      criticalPaths[i].print();
417
418       //        criticalPaths[i].print();
419       //        CkPrintf("\n");
420         
421
422       //   }
423       CkPrintf("Timings:\n");
424       for(int i=0;i<times.size(); i++){
425         CkPrintf("%lf ", times[i]);
426       }
427       CkPrintf("\n");
428
429     }
430     
431     CkPrintf("\n\n");
432
433
434 #endif
435
436
437
438     //==========================================================================================
439     // If this is a phase during which we try to adapt control point values based on critical path
440 #if 0
441     if( s%5 == 4) {
442
443       // Find the most recent phase with valid critical path data and idle time measurements      
444       int whichPhase=-1;
445       for(int p=0; p<s; ++p){
446         const instrumentedPhase &phase = allData.phases[p];
447         const idleTimeContainer &idle = phase.idleTime;
448         if(idle.isValid() && phase.criticalPaths.size()>0 ){
449           whichPhase = p;
450         }
451       }
452       
453       
454       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
455       const instrumentedPhase &phase = allData.phases[whichPhase];
456       const idleTimeContainer &idle = phase.idleTime;
457       
458       if(idle.min > 0.1){
459         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
460         
461         // Determine the median critical path for this phase
462         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
463         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
464         CkAssert(phase.criticalPaths.size() > 0);
465         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
466         
467         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
468         
469         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
470           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
471
472
473           CkPrintf("Finding control points related to the critical path\n");
474           int cpcount = 0;
475           std::set<std::string> controlPointsAffectingCriticalPath;
476
477           
478           for(int e=0;e<path.getNumUsed();e++){
479             if(path.getUsedCount(e)>0){
480               int ep = path.getUsedEp(e);
481
482               std::map<std::string, std::set<int> >::iterator iter;
483               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
484                 if(iter->second.count(ep)>0){
485                   controlPointsAffectingCriticalPath.insert(iter->first);
486                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
487                   cpcount++;
488                 }
489               }
490               
491             }
492           }
493           
494
495           if(cpcount==0){
496             CkPrintf("No control points are known to affect the critical path\n");
497           } else {
498             double beginTime = CmiWallTimer();
499
500             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
501             
502             newControlPoints = phase.controlPoints;
503             
504             if(frameworkShouldAdvancePhase){
505               gotoNextPhase();  
506             }
507             
508             if(haveControlPointChangeCallback){ 
509 #if DEBUGPRINT
510               CkPrintf("Calling control point change callback\n");
511 #endif
512               // Create a high priority message and send it to the callback
513               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
514               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
515               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
516               controlPointChangeCallback.send(msg);
517               
518             }
519             
520             
521             // adjust the control points that can affect the critical path
522
523             char textDescription[4096*2];
524             textDescription[0] = '\0';
525
526             std::map<std::string,int>::iterator newCP;
527             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
528               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
529                 // decrease the value (increase priority) if within range
530                 int lowerbound = controlPointSpace[newCP->first].first;
531                 if(newCP->second > lowerbound){
532                   newControlPointsAvailable = true;
533                   newCP->second --;
534                 }
535               }
536             }
537            
538             // Create a string for a projections user event
539             if(1){
540               std::map<std::string,int>::iterator newCP;
541               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
542                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
543               }
544             }
545
546             char *userEventString = new char[4096*2];
547             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
548             
549             static int userEventCounter = 20000;
550             CkPrintf("USER EVENT: %s\n", userEventString);
551             
552             traceRegisterUserEvent(userEventString, userEventCounter); 
553             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
554             userEventCounter++;
555             
556           }
557           
558         } else {
559           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
560         }
561         
562         
563       }
564       
565     } else {
566       
567       
568     }
569    
570 #endif
571
572
573
574     if(frameworkShouldAdvancePhase){
575       gotoNextPhase();  
576     }
577     
578     if(haveControlPointChangeCallback){ 
579       // Create a high priority message and send it to the callback
580       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
581       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
582       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
583       controlPointChangeCallback.send(msg);
584     }
585     
586   }
587   
588   /// Determine if any control point is known to affect an entry method
589   bool controlPointManager::controlPointAffectsThisEP(int ep){
590     std::map<std::string, std::set<int> >::iterator iter;
591     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
592       if(iter->second.count(ep)>0){
593         return true;
594       }
595     }
596     return false;    
597   }
598   
599   /// Determine if any control point is known to affect a chare array  
600   bool controlPointManager::controlPointAffectsThisArray(int array){
601     std::map<std::string, std::set<int> >::iterator iter;
602     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
603       if(iter->second.count(array)>0){
604         return true;
605       }
606     }
607     return false;   
608   }
609   
610
611   /// The data from the current phase
612   instrumentedPhase * controlPointManager::currentPhaseData(){
613     int s = allData.phases.size();
614     CkAssert(s>=1);
615     return allData.phases[s-1];
616   }
617  
618
619   /// The data from the previous phase
620   instrumentedPhase * controlPointManager::previousPhaseData(){
621     int s = allData.phases.size();
622     if(s >= 2 && phase_id > 0) {
623       return allData.phases[s-2];
624     } else {
625       return NULL;
626     }
627   }
628  
629   /// The data from two phases back
630   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
631     int s = allData.phases.size();
632     if(s >= 3 && phase_id > 0) {
633       return allData.phases[s-3];
634     } else {
635       return NULL;
636     }
637   }
638   
639
640   /// Called by either the application or the Control Point Framework to advance to the next phase  
641   void controlPointManager::gotoNextPhase(){
642     
643 #ifndef CP_DISABLE_TRACING
644     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
645     fflush(stdout);
646
647     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
648       alreadyRequestedAll = true;
649       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
650       CkPrintf("Requesting all measurements\n");
651       thisProxy.requestAll(*cb);
652       delete cb;
653     
654     } else {
655       
656       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
657         alreadyRequestedMemoryUsage = true;
658         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
659         thisProxy.requestMemoryUsage(*cb);
660         delete cb;
661       }
662       
663       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
664         alreadyRequestedIdleTime = true;
665         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
666         thisProxy.requestIdleTime(*cb);
667         delete cb;
668       }
669     }
670     
671
672 #endif
673
674
675
676 #if CMK_LBDB_ON && 0
677     LBDatabase * myLBdatabase = LBDatabaseObj();
678     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
679     const CkVec<LBObj*> objs = myLBDB->getObjs();
680     const int objCount = myLBDB->getObjCount();
681     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
682     
683     floatType maxObjWallTime = -1.0;
684     
685     for(int i=0;i<objs.length();i++){
686       LBObj* o = objs[i];
687       const LDObjData d = o->ObjData();
688       floatType cpuTime = d.cpuTime;
689       floatType wallTime = d.wallTime;
690       // can also get object handles from the LDObjData struct
691       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
692       if(wallTime > maxObjWallTime){
693
694       }
695       
696     }
697
698     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
699     
700 #endif    
701
702
703     
704     // increment phase id
705     phase_id++;
706     
707
708     // Create new entry for the phase we are starting now
709     instrumentedPhase * newPhase = new instrumentedPhase();
710     allData.phases.push_back(newPhase);
711     
712     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
713
714   }
715
716   /// An application uses this to register an instrumented timing for this phase
717   void controlPointManager::setTiming(double time){
718     currentPhaseData()->times.push_back(time);
719
720 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
721        
722     // First we should register this currently executing message as a path, because it is likely an important one to consider.
723     //    registerTerminalEntryMethod();
724     
725     // save the critical path for this phase
726     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
727     //    maxTerminalPathHistory.reset();
728     
729     
730     // Reset the counts for the currently executing message
731     //resetThisEntryPath();
732         
733 #endif
734     
735   }
736   
737   /// Entry method called on all PEs to request CPU utilization statistics
738   void controlPointManager::requestIdleTime(CkCallback cb){
739 #ifndef CP_DISABLE_TRACING
740     double i = localControlPointTracingInstance()->idleRatio();
741     double idle[3];
742     idle[0] = i;
743     idle[1] = i;
744     idle[2] = i;
745     
746     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
747     
748     localControlPointTracingInstance()->resetTimings();
749
750     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
751 #else
752     CkAbort("Should not get here\n");
753 #endif
754   }
755   
756   /// All processors reduce their memory usages in requestIdleTime() to this method
757   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
758 #ifndef CP_DISABLE_TRACING
759     int size=msg->getSize() / sizeof(double);
760     CkAssert(size==3);
761     double *r=(double *) msg->getData();
762         
763     instrumentedPhase* prevPhase = previousPhaseData();
764     if(prevPhase != NULL){
765       prevPhase->idleTime.min = r[0];
766       prevPhase->idleTime.avg = r[1]/CkNumPes();
767       prevPhase->idleTime.max = r[2];
768       prevPhase->idleTime.print();
769       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
770     } else {
771       CkPrintf("There is no previous phase to store the idle time measurements\n");
772     }
773     
774     alreadyRequestedIdleTime = false;
775     checkForShutdown();
776     delete msg;
777 #else
778     CkAbort("Should not get here\n");
779 #endif
780   }
781
782
783
784
785
786
787   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
788   void controlPointManager::requestAll(CkCallback cb){
789 #ifndef CP_DISABLE_TRACING
790     TraceControlPoints *t = localControlPointTracingInstance();
791
792     double data[ALL_REDUCTION_SIZE];
793
794     double *idle = data;
795     double *over = data+3;
796     double *mem = data+6;
797     double *msgBytes = data+7;  
798     double *grainsize = data+11;  
799
800     const double i = t->idleRatio();
801     idle[0] = i;
802     idle[1] = i;
803     idle[2] = i;
804
805     const double o = t->overheadRatio();
806     over[0] = o;
807     over[1] = o;
808     over[2] = o;
809
810     const double m = t->memoryUsageMB();
811     mem[0] = m;
812
813     msgBytes[0] = t->b2;
814     msgBytes[1] = t->b3;
815     msgBytes[2] = t->b2mlen;
816     msgBytes[3] = t->b3mlen;
817
818     grainsize[0] = t->grainSize();
819     
820     localControlPointTracingInstance()->resetAll();
821
822     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
823 #else
824     CkAbort("Should not get here\n");
825 #endif
826   }
827   
828   /// All processors reduce their memory usages in requestIdleTime() to this method
829   void controlPointManager::gatherAll(CkReductionMsg *msg){
830 #ifndef CP_DISABLE_TRACING
831     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
832     int size=msg->getSize() / sizeof(double);
833     double *data=(double *) msg->getData();
834         
835     double *idle = data;
836     double *over = data+3;
837     double *mem = data+6;
838     double *msgBytes = data+7;
839     double *granularity = data+11;
840
841
842     //    std::string b = allData.toString();
843
844     instrumentedPhase* prevPhase = previousPhaseData();
845     if(prevPhase != NULL){
846       prevPhase->idleTime.min = idle[0];
847       prevPhase->idleTime.avg = idle[1]/CkNumPes();
848       prevPhase->idleTime.max = idle[2];
849       
850       prevPhase->overheadTime.min = over[0];
851       prevPhase->overheadTime.avg = over[1]/CkNumPes();
852       prevPhase->overheadTime.max = over[2];
853       
854       prevPhase->memoryUsageMB = mem[0];
855       
856       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
857
858       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
859       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
860
861       /** The average of the grain sizes on all PEs in us */
862       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
863
864       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
865       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
866
867       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
868       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
869
870       if(bytesPerInvoke2 > bytesPerInvoke3)
871         prevPhase->bytesPerInvoke = bytesPerInvoke2;
872       else
873         prevPhase->bytesPerInvoke = bytesPerInvoke3;
874
875       //      prevPhase->print();
876       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
877
878       //      std::string a = allData.toString();
879
880       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
881       
882     } else {
883       CkPrintf("There is no previous phase to store measurements\n");
884     }
885     
886     alreadyRequestedAll = false;
887     checkForShutdown();
888     delete msg;
889 #else
890     CkAbort("Should not get here\n");
891 #endif
892   }
893
894
895
896
897   void controlPointManager::checkForShutdown(){
898     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
899       doExitNow();
900     }
901   }
902
903
904   void controlPointManager::exitIfReady(){
905      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
906        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
907        doExitNow();
908      } else {
909        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
910        exitWhenReady = true;
911      }
912   }
913
914
915
916   void controlPointManager::doExitNow(){
917           writeOutputToDisk();
918           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
919           CkExit();
920   }
921
922   void controlPointManager::writeOutputToDisk(){
923           if(writeDataFileAtShutdown){
924                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
925           }
926   }
927
928
929   /// Entry method called on all PEs to request memory usage
930   void controlPointManager::requestMemoryUsage(CkCallback cb){
931     int m = CmiMaxMemoryUsage() / 1024 / 1024;
932     CmiResetMaxMemory();
933     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
934     contribute(sizeof(int),&m,CkReduction::max_int, cb);
935   }
936
937   /// All processors reduce their memory usages to this method
938   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
939     int size=msg->getSize() / sizeof(int);
940     CkAssert(size==1);
941     int *m=(int *) msg->getData();
942
943     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
944     
945     instrumentedPhase* prevPhase = previousPhaseData();
946     if(prevPhase != NULL){
947       prevPhase->memoryUsageMB = m[0];
948     } else {
949       CkPrintf("No place to store memory usage");
950     }
951
952     alreadyRequestedMemoryUsage = false;
953     checkForShutdown();
954     delete msg;
955   }
956
957
958 //   /// Inform the control point framework that a named control point affects the priorities of some array  
959 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
960 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
961     
962 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
963 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
964 //     } else {
965 //       std::set<int> s;
966 //       s.insert(groupIdx);
967 //       affectsPrioritiesArray[std::string(name)] = s;
968 //     }
969     
970 // #if DEBUGPRINT   
971 //     std::map<std::string, std::set<int> >::iterator f;
972 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
973 //       std::string name = f->first;
974 //       std::set<int> &vals = f->second;
975 //       cout << "Control point " << name << " affects arrays: ";
976 //       std::set<int>::iterator i;
977 //       for(i=vals.begin(); i!=vals.end();++i){
978 //      cout << *i << " ";
979 //       }
980 //       cout << endl;
981 //     }
982 // #endif
983     
984 //   }
985   
986 //   /// Inform the control point framework that a named control point affects the priority of some entry method
987 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
988 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
989
990 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
991 //       affectsPrioritiesEP[std::string(name)].insert(idx);
992 //     } else {
993 //       std::set<int> s;
994 //       s.insert(idx);
995 //       affectsPrioritiesEP[std::string(name)] = s;
996 //     }
997     
998 // #if DEBUGPRINT
999 //     std::map<std::string, std::set<int> >::iterator f;
1000 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
1001 //       std::string name = f->first;
1002 //       std::set<int> &vals = f->second;
1003 //       cout << "Control point " << name << " affects EP: ";
1004 //       std::set<int>::iterator i;
1005 //       for(i=vals.begin(); i!=vals.end();++i){
1006 //      cout << *i << " ";
1007 //       }
1008 //       cout << endl;
1009 //     }
1010 // #endif
1011
1012
1013 //   }
1014   
1015
1016
1017 /// An interface callable by the application.
1018 void gotoNextPhase(){
1019   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1020 }
1021
1022 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1023 {
1024   gotoNextPhase();
1025 }
1026
1027
1028 /// A mainchare that is used just to create our controlPointManager group at startup
1029 class controlPointMain : public CBase_controlPointMain {
1030 public:
1031   controlPointMain(CkArgMsg* args){
1032 #if OLDRANDSEED
1033     struct timeval tp;
1034     gettimeofday(& tp, NULL);
1035     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1036 #else
1037     double time = CmiWallTimer();
1038     int sec = (int)time;
1039     int usec = (int)((time - (double)sec)*1000000.0);
1040     random_seed =  sec ^ usec;
1041 #endif
1042     
1043     
1044     double period;
1045     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1046     if(haveSamplePeriod){
1047       CkPrintf("controlPointSamplePeriod = %lf sec\n", period);
1048       controlPointSamplePeriod =  (int)(period * 1000); /**< A readonly */
1049     } else {
1050       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1051     }
1052     
1053     
1054     
1055     whichTuningScheme = RandomSelection;
1056
1057
1058     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1059       whichTuningScheme = RandomSelection;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1061       whichTuningScheme = ExhaustiveSearch;
1062     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1063       whichTuningScheme = AlwaysDefaults;
1064     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1065       whichTuningScheme = SimulatedAnnealing;
1066     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1067       whichTuningScheme = CriticalPathAutoPrioritization;
1068     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1069       whichTuningScheme = UseBestKnownTiming;
1070     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1071       whichTuningScheme = UseSteering;
1072     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1073       whichTuningScheme = MemoryAware;
1074     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1075       whichTuningScheme = Simplex;
1076     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1077       whichTuningScheme = DivideAndConquer;
1078     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriod", "Adjust the load balancing period (Constant Predictor)") ){
1079       whichTuningScheme = LDBPeriod;
1080     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodLinear", "Adjust the load balancing period (Linear Predictor)") ){
1081       whichTuningScheme = LDBPeriodLinear;
1082     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodQuadratic", "Adjust the load balancing period (Quadratic Predictor)") ){
1083       whichTuningScheme = LDBPeriodQuadratic;
1084     } else if ( CmiGetArgFlagDesc(args->argv,"+CPLDBPeriodOptimal", "Adjust the load balancing period (Optimal Predictor)") ){
1085       whichTuningScheme = LDBPeriodOptimal;
1086     }
1087
1088     char *defValStr = NULL;
1089     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1090       CkPrintf("You specified default value string: %s\n", defValStr);
1091       
1092       // Parse the string, looking for commas
1093      
1094
1095       char *tok = strtok(defValStr, ",");
1096       while (tok) {
1097         // split on the equal sign
1098         int len = strlen(tok);
1099         char *eqsign = strchr(tok, '=');
1100         if(eqsign != NULL && eqsign != tok){
1101           *eqsign = '\0';
1102           char *cpname = tok;
1103           std::string cpName(tok);
1104           char *cpDefVal = eqsign+1;      
1105           int v=-1;
1106           if(sscanf(cpDefVal, "%d", &v) == 1){
1107             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1108             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1109             defaultControlPointValues[cpName] = v;
1110           }
1111         }
1112         tok = strtok(NULL, ",");
1113       }
1114
1115     }
1116
1117     shouldGatherAll = false;
1118     shouldGatherMemoryUsage = false;
1119     shouldGatherUtilization = false;
1120     
1121     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1122       shouldGatherAll = true;
1123     } else {
1124       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1125         shouldGatherMemoryUsage = true;
1126       }
1127       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1128         shouldGatherUtilization = true;
1129       }
1130     }
1131     
1132     writeDataFileAtShutdown = false;   
1133     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1134       writeDataFileAtShutdown = true;
1135     }
1136
1137     shouldFilterOutputData = true;
1138     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1139       shouldFilterOutputData = false;
1140     }
1141
1142
1143    loadDataFileAtStartup = false;   
1144     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1145       loadDataFileAtStartup = true;
1146     }
1147
1148     char *cpdatafile;
1149     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1150       sprintf(CPDataFilename, "%s", cpdatafile);
1151     } else {
1152       sprintf(CPDataFilename, "controlPointData.txt");
1153     }
1154
1155
1156     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1157   }
1158   ~controlPointMain(){}
1159 };
1160
1161 /// An interface callable by the application.
1162 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1163   CkAssert(CkMyPe() == 0);
1164   CkPrintf("Application has registered a control point change callback\n");
1165   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1166 }
1167
1168 /// An interface callable by the application.
1169 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1170   if(CkMyPe() == 0){
1171     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1172     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1173   }
1174 }
1175
1176 /// An interface callable by the application.
1177 void registerControlPointTiming(double time){
1178   CkAssert(CkMyPe() == 0);
1179 #if DEBUGPRINT>0
1180   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1181 #endif
1182   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1183 }
1184
1185 /// An interface callable by the application.
1186 void controlPointTimingStamp() {
1187   CkAssert(CkMyPe() == 0);
1188 #if DEBUGPRINT>0
1189   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1190 #endif
1191   
1192   static double prev_time = 0.0;
1193   double t = CmiWallTimer();
1194   double duration = t - prev_time;
1195   prev_time = t;
1196     
1197   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1198 }
1199
1200 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1201 {
1202   controlPointTimingStamp();
1203 }
1204
1205
1206 FDECL void FTN_NAME(SETFRAMEWORKADVANCEPHASEF,setframeworkadvancephasef)(CMK_TYPEDEF_INT4 *value) 
1207 {
1208   setFrameworkAdvancePhase(*value);
1209 }
1210
1211
1212
1213
1214 /// Shutdown the control point framework, writing data to disk if necessary
1215 extern "C" void controlPointShutdown(){
1216   if(CkMyPe() == 0){
1217
1218     // wait for gathering of idle time & memory usage to complete
1219     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1220
1221   }
1222 }
1223
1224 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1225 void controlPointInitNode(){
1226 //  CkPrintf("controlPointInitNode()\n");
1227   registerExitFn(controlPointShutdown);
1228 }
1229
1230 /// Called periodically to allow control point framework to do things periodically
1231 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1232 #ifdef DEBUGPRINT
1233   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1234 #endif
1235   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1236   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1237 }
1238
1239
1240
1241
1242
1243 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1244 /// user observed characteristic to adapt specific control point values.
1245 /// This function must return valid values for newControlPoints.
1246 void controlPointManager::generatePlan() {
1247   const double startGenerateTime = CmiWallTimer();
1248   const int phase_id = this->phase_id;
1249   const int effective_phase = allData.phases.size();
1250
1251   // Only generate a plan once per phase
1252   if(generatedPlanForStep == phase_id) 
1253     return;
1254   generatedPlanForStep = phase_id;
1255  
1256   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1257   printTuningScheme();
1258
1259   // By default lets put the previous phase data into newControlPoints
1260   instrumentedPhase *prevPhase = previousPhaseData();
1261   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1262           newControlPoints[cpsIter->first] = cpsIter->second;
1263   }
1264
1265
1266   if( whichTuningScheme == RandomSelection ){
1267     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1268     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1269       const std::string &name = cpsIter->first;
1270       const std::pair<int,int> &bounds = cpsIter->second;
1271       const int lb = bounds.first;
1272       const int ub = bounds.second;
1273       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1274     }
1275   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1276     // -----------------------------------------------------------
1277     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1278     //   
1279     // This scheme will return the median value for the range 
1280     // early on, and then will transition over to the new control points
1281     // determined by the critical path adapting code
1282
1283     // This won't work until the periodic function is fixed up a bit
1284
1285     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1286     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1287       const std::string &name = cpsIter->first;
1288       const std::pair<int,int> &bounds = cpsIter->second;
1289       const int lb = bounds.first;
1290       const int ub = bounds.second;
1291       newControlPoints[name] =  (lb+ub)/2;
1292     }
1293
1294   } else if ( whichTuningScheme == MemoryAware ) {
1295
1296     // -----------------------------------------------------------
1297     //  STEERING BASED ON MEMORY USAGE
1298
1299     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1300     instrumentedPhase *prevPhase = previousPhaseData();
1301  
1302     if(phase_id%2 == 0){
1303       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1304       twoAgoPhase->print();
1305       CkPrintf("\n");
1306       fflush(stdout);
1307       
1308       // See if memory usage is low:
1309       double memUsage = twoAgoPhase->memoryUsageMB;
1310       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1311       fflush(stdout);
1312       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1313         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1314         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1315         
1316         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1317         newControlPoints = twoAgoPhase->controlPoints;
1318
1319
1320         CkPrintf("Steering (memory based) initialized plan\n");
1321         fflush(stdout);
1322
1323         // look for a possible control point knob to turn
1324         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1325         
1326         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1327         bool found = false;
1328         std::string cpName;
1329         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1330         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1331         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1332           cpName = iter->first;
1333           info = &iter->second;
1334           found = true;
1335           break;
1336         }
1337
1338         // Adapt the control point value
1339         if(found){
1340           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1341           fflush(stdout);
1342           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1343           const int maxValue = controlPointSpace[cpName].second;
1344           
1345           if(twoAgoValue+1 <= maxValue){
1346             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1347           }
1348         }
1349         
1350       }
1351     }
1352
1353     CkPrintf("Steering (memory based) done for this phase\n");
1354     fflush(stdout);
1355
1356   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1357
1358     // -----------------------------------------------------------
1359     //  USE BEST KNOWN TIME  
1360     static bool firstTime = true;
1361     if(firstTime){
1362       firstTime = false;
1363       instrumentedPhase *best = allData.findBest(); 
1364       CkPrintf("Best known phase is:\n");
1365       best->print();
1366       CkPrintf("\n");
1367       
1368       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1369       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1370         const std::string &name = cpsIter->first;
1371         newControlPoints[name] =  best->controlPoints[name];
1372       }
1373     }
1374   } else if( whichTuningScheme == LDBPeriod) {
1375     // Assume this is used in this manner:
1376     //  1) go to next phase
1377     //  2) request control point
1378     //  3) load balancing
1379     //  4) computation
1380     
1381     
1382     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1383     instrumentedPhase *prevPhase = previousPhaseData();
1384     
1385     
1386     const std::vector<double> &times = twoAgoPhase->times;
1387     const int oldNumTimings = times.size();
1388
1389
1390     const std::vector<double> &timesNew = prevPhase->times;
1391     const int newNumTimings = timesNew.size();
1392
1393
1394     if(oldNumTimings > 4 && newNumTimings > 4){
1395       
1396       // Build model of execution time based on two phases ago
1397       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1398       
1399       double oldSum = 0;
1400       
1401       for(int i=2; i<oldNumTimings; i++){
1402         oldSum += times[i];
1403       }
1404       
1405       const double oldAvg = oldSum / (oldNumTimings-2);
1406       
1407       
1408       
1409       
1410       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1411       const double expectedTotalTime = oldAvg * newNumTimings;
1412       
1413       
1414       // Measure actual time
1415       double newSum = 0.0;
1416       for(int i=2; i<newNumTimings; ++i){
1417         newSum += timesNew[i];
1418       }
1419       
1420       const double newAvg = newSum / (newNumTimings-2);
1421       const double newTotalTimeExcludingLBSteps = newAvg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1422       
1423       const double benefit = expectedTotalTime - newTotalTimeExcludingLBSteps;
1424       
1425       // Determine load balance cost
1426       const double lbcost = timesNew[0] + timesNew[1] - 2.0*newAvg;
1427       
1428       const double benefitAfterLB = benefit - lbcost;
1429     
1430     
1431       // Determine whether LB cost outweights the estimated benefit
1432       CkPrintf("Constant Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, newTotalTimeExcludingLBSteps);
1433     
1434     
1435       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1436       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1437         const std::string &name = cpsIter->first;
1438         const std::pair<int,int> &bounds = cpsIter->second;
1439         const int lb = bounds.first;
1440         const int ub = bounds.second;
1441       
1442         if(benefitAfterLB > 0){
1443           CkPrintf("Constant Model: Beneficial LB\n");
1444           int newval = newControlPoints[name] / 2;
1445           if(newval > lb)
1446             newControlPoints[name] = newval;
1447           else 
1448             newControlPoints[name] = lb;
1449         } else {
1450           CkPrintf("Constant Model: Detrimental LB\n");
1451           int newval = newControlPoints[name] * 2;
1452           if(newval < ub)
1453             newControlPoints[name] = newval;
1454           else
1455             newControlPoints[name] = ub;
1456         }
1457       }
1458     }
1459     
1460     
1461   }  else if( whichTuningScheme == LDBPeriodLinear) {
1462     // Assume this is used in this manner:
1463     //  1) go to next phase
1464     //  2) request control point
1465     //  3) load balancing
1466     //  4) computation
1467
1468
1469     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1470     instrumentedPhase *prevPhase = previousPhaseData();
1471     
1472     const std::vector<double> &times = twoAgoPhase->times;
1473     const int oldNumTimings = times.size();
1474
1475     const std::vector<double> &timesNew = prevPhase->times;
1476     const int newNumTimings = timesNew.size();
1477     
1478
1479     if(oldNumTimings > 4 && newNumTimings > 4){
1480
1481       // Build model of execution time based on two phases ago
1482       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1483       const int b1 = 2 + (oldNumTimings-2)/2;
1484       double s1 = 0;
1485       double s2 = 0;
1486     
1487       const double ldbStepsTime = times[0] + times[1];
1488     
1489       for(int i=2; i<b1; i++){
1490         s1 += times[i];
1491       }
1492       for(int i=b1; i<oldNumTimings; i++){
1493         s2 += times[i];
1494       }
1495       
1496       
1497       // Compute the estimated time for the last phase's data
1498     
1499       const double a1 = s1 / (double)(b1-2);
1500       const double a2 = s2 / (double)(oldNumTimings-b1);
1501       const double aold = (a1+a2)/2.0;
1502
1503       const double expectedTotalTime = newNumTimings*(aold+(oldNumTimings+newNumTimings)*(a2-a1)/oldNumTimings);
1504         
1505     
1506       // Measure actual time
1507       double sum = 0.0;
1508       for(int i=2; i<newNumTimings; ++i){
1509         sum += timesNew[i];
1510       }
1511
1512       const double avg = sum / ((double)(newNumTimings-2));
1513       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1514
1515       const double benefit = expectedTotalTime - actualTotalTime;
1516
1517       // Determine load balance cost
1518       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1519
1520       const double benefitAfterLB = benefit - lbcost;
1521
1522     
1523       // Determine whether LB cost outweights the estimated benefit
1524       CkPrintf("Linear Model: lbcost = %f, expected = %f, actual = %f\n", lbcost, expectedTotalTime, actualTotalTime);
1525     
1526     
1527     
1528       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1529       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1530         const std::string &name = cpsIter->first;
1531         const std::pair<int,int> &bounds = cpsIter->second;
1532         const int lb = bounds.first;
1533         const int ub = bounds.second;
1534       
1535         if(benefitAfterLB > 0){
1536           CkPrintf("Linear Model: Beneficial LB\n");
1537           int newval = newControlPoints[name] / 2;
1538           if(newval > lb)
1539             newControlPoints[name] = newval;
1540           else 
1541             newControlPoints[name] = lb;
1542         } else {
1543           CkPrintf("Linear Model: Detrimental LB\n");
1544           int newval = newControlPoints[name] * 2;
1545           if(newval < ub)
1546             newControlPoints[name] = newval;
1547           else 
1548             newControlPoints[name] = ub;
1549         }
1550       }
1551     }
1552
1553   }
1554
1555   else if( whichTuningScheme == LDBPeriodQuadratic) {
1556     // Assume this is used in this manner:
1557     //  1) go to next phase
1558     //  2) request control point
1559     //  3) load balancing
1560     //  4) computation
1561
1562
1563     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1564     instrumentedPhase *prevPhase = previousPhaseData();
1565         
1566     const std::vector<double> &times = twoAgoPhase->times;
1567     const int oldNumTimings = times.size();
1568
1569     const std::vector<double> &timesNew = prevPhase->times;
1570     const int newNumTimings = timesNew.size();
1571
1572     
1573     if(oldNumTimings > 4 && newNumTimings > 4){
1574
1575
1576       // Build model of execution time based on two phases ago
1577       // Compute the average times for each 1/3 of the steps, except for the 2 first steps where load balancing occurs
1578       const int b1 = 2 + (oldNumTimings-2)/3;
1579       const int b2 = 2 + (2*(oldNumTimings-2))/3;
1580       double s1 = 0;
1581       double s2 = 0;
1582       double s3 = 0;
1583
1584       const double ldbStepsTime = times[0] + times[1];
1585     
1586       for(int i=2; i<b1; i++){
1587         s1 += times[i];
1588       }
1589       for(int i=b1; i<b2; i++){
1590         s2 += times[i];
1591       }
1592       for(int i=b2; i<oldNumTimings; i++){
1593         s3 += times[i];
1594       }
1595
1596     
1597       // Compute the estimated time for the last phase's data
1598     
1599       const double a1 = s1 / (double)(b1-2);
1600       const double a2 = s2 / (double)(b2-b1);
1601       const double a3 = s3 / (double)(oldNumTimings-b2);
1602     
1603       const double a = (a1-2.0*a2+a3)/2.0;
1604       const double b = (a1-4.0*a2+3.0*a3)/2.0;
1605       const double c = a3;
1606     
1607       // Computed as an integral from 0.5 to the number of bins of the same size as two ago phase + 0.5
1608       const double x1 = (double)newNumTimings / (double)oldNumTimings * 3.0 + 0.5;  // should be 3.5 if ratio is same
1609       const double x2 = 0.5;
1610       const double expectedTotalTime = a*x1*x1*x1/3.0 + b*x1*x1/2.0 + c*x1 - (a*x2*x2*x2/3.0 + b*x2*x2/2.0 + c*x2);
1611    
1612     
1613       // Measure actual time
1614       double sum = 0.0;
1615       for(int i=2; i<newNumTimings; ++i){
1616         sum += timesNew[i];
1617       }
1618
1619       const double avg = sum / ((double)(newNumTimings-2));
1620       const double actualTotalTime = avg * ((double)newNumTimings); // excluding the load balancing abnormal steps
1621
1622       const double benefit = expectedTotalTime - actualTotalTime;
1623
1624       // Determine load balance cost
1625       const double lbcost = timesNew[0] + timesNew[1] - 2.0*avg;
1626
1627       const double benefitAfterLB = benefit - lbcost;
1628
1629     
1630       // Determine whether LB cost outweights the estimated benefit
1631       CkPrintf("Quadratic Model: lbcost = %f, expected = %f, actual = %f, x1=%f, a1=%f, a2=%f, a3=%f, b1=%d, b2=%d, a=%f, b=%f, c=%f\n", lbcost, expectedTotalTime, actualTotalTime, x1, a1, a2, a3, b1, b2, a, b, c);
1632     
1633     
1634     
1635       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1636       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1637         const std::string &name = cpsIter->first;
1638         const std::pair<int,int> &bounds = cpsIter->second;
1639         const int lb = bounds.first;
1640         const int ub = bounds.second;
1641       
1642         if(benefitAfterLB > 0){
1643           CkPrintf("QuadraticModel: Beneficial LB\n");
1644           int newval = newControlPoints[name] / 2;
1645           if(newval > lb)
1646             newControlPoints[name] = newval;
1647           else 
1648             newControlPoints[name] = lb;
1649         } else {
1650           CkPrintf("QuadraticModel: Detrimental LB\n");
1651           int newval = newControlPoints[name] * 2;
1652           if(newval < ub)
1653             newControlPoints[name] = newval;
1654           else 
1655             newControlPoints[name] = ub;
1656         }
1657       
1658       }
1659     }
1660     
1661
1662   }  else if( whichTuningScheme == LDBPeriodOptimal) {
1663     // Assume this is used in this manner:
1664     //  1) go to next phase
1665     //  2) request control point
1666     //  3) load balancing
1667     //  4) computation
1668
1669
1670
1671     instrumentedPhase *prevPhase = previousPhaseData();
1672     
1673     const std::vector<double> &times = prevPhase->times;
1674     const int numTimings = times.size();
1675     
1676     if( numTimings > 4){
1677
1678       const int b1 = 2 + (numTimings-2)/2;
1679       double s1 = 0;
1680       double s2 = 0;
1681     
1682     
1683       for(int i=2; i<b1; i++){
1684         s1 += times[i];
1685       }
1686       for(int i=b1; i<numTimings; i++){
1687         s2 += times[i];
1688       }
1689       
1690     
1691       const double a1 = s1 / (double)(b1-2);
1692       const double a2 = s2 / (double)(numTimings-b1);
1693       const double avg = (a1+a1) / 2.0;
1694
1695       const double m = (a2-a1)/((double)(numTimings-2)/2.0); // An approximation of the slope of the execution times    
1696
1697       const double ldbStepsTime = times[0] + times[1];
1698       const double lbcost = ldbStepsTime - 2.0*avg; // An approximation of the 
1699       
1700
1701       int newval = roundDouble(sqrt(2.0*lbcost/m));
1702       
1703       // We don't really know what to do if m<=0, so we'll just double the period
1704       if(m<=0)
1705         newval = 2*numTimings;     
1706       
1707       CkPrintf("Optimal Model (double when negative): lbcost = %f, m = %f, new ldbperiod should be %d\n", lbcost, m, newval);    
1708     
1709     
1710       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1711       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1712         // TODO: lookup only control points that are relevant instead of all of them
1713         const std::string &name = cpsIter->first;
1714         const std::pair<int,int> &bounds = cpsIter->second;
1715         const int lb = bounds.first;
1716         const int ub = bounds.second;
1717         
1718         if(newval < lb){
1719           newControlPoints[name] = lb;
1720         } else if(newval > ub){
1721           newControlPoints[name] = ub;
1722         } else {
1723           newControlPoints[name] = newval;
1724         } 
1725         
1726       }
1727       
1728       
1729     }
1730     
1731  
1732
1733   } else if ( whichTuningScheme == UseSteering ) {
1734           // -----------------------------------------------------------
1735           //  STEERING BASED ON KNOWLEDGE
1736
1737           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1738           // plans are only generated after 3 phases
1739
1740           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1741           instrumentedPhase *prevPhase = previousPhaseData();
1742
1743           if(phase_id%4 == 0){
1744                   CkPrintf("Steering based on 2 phases ago:\n");
1745                   twoAgoPhase->print();
1746                   CkPrintf("\n");
1747                   fflush(stdout);
1748
1749                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1750
1751
1752                   // ========================================= Concurrency =============================================
1753                   // See if idle time is high:
1754                   {
1755                           double idleTime = twoAgoPhase->idleTime.avg;
1756                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1757                           fflush(stdout);
1758                           if(idleTime > 0.10){
1759                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1760                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1761
1762                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1763
1764                                   bool found = false;
1765                                   std::string cpName;
1766                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1767                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1768                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1769                                           cpName = iter->first;
1770                                           info = &iter->second;
1771
1772                                           // Initialize a new plan based on two phases ago
1773                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1774
1775                                           CkPrintf("Steering found knob to turn\n");
1776                                           fflush(stdout);
1777
1778                                           if(info->first == ControlPoint::EFF_INC){
1779                                                   const int maxValue = controlPointSpace[cpName].second;
1780                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1781                                                   if(twoAgoValue+1 <= maxValue){
1782                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1783                                                   }
1784                                           } else {
1785                                                   const int minValue = controlPointSpace[cpName].second;
1786                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1787                                                   if(twoAgoValue-1 >= minValue){
1788                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1789                                                   }
1790                                           }
1791
1792                                           possibleNextStepPlans.push_back(aNewPlan);
1793
1794                                   }
1795                           }
1796                   }
1797
1798                   // ========================================= Grain Size =============================================
1799                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1800                   {
1801                           double overheadTime = twoAgoPhase->overheadTime.avg;
1802                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1803                           fflush(stdout);
1804                           if(overheadTime > 0.10){
1805                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1806                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1807
1808                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1809
1810                                   bool found = false;
1811                                   std::string cpName;
1812                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1813                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1814                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1815                                           cpName = iter->first;
1816                                           info = &iter->second;
1817
1818                                           // Initialize a new plan based on two phases ago
1819                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1820
1821
1822
1823                                           CkPrintf("Steering found knob to turn\n");
1824                                           fflush(stdout);
1825
1826                                           if(info->first == ControlPoint::EFF_INC){
1827                                                   const int maxValue = controlPointSpace[cpName].second;
1828                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1829                                                   if(twoAgoValue+1 <= maxValue){
1830                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1831                                                   }
1832                                           } else {
1833                                                   const int minValue = controlPointSpace[cpName].second;
1834                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1835                                                   if(twoAgoValue-1 >= minValue){
1836                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1837                                                   }
1838                                           }
1839
1840                                           possibleNextStepPlans.push_back(aNewPlan);
1841
1842                                   }
1843
1844                           }
1845                   }
1846                   // ========================================= GPU Offload =============================================
1847                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1848                   {
1849                           double idleTime = twoAgoPhase->idleTime.avg;
1850                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1851                           fflush(stdout);
1852                           if(idleTime > 0.10){
1853                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1854                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1855
1856                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1857
1858                                   bool found = false;
1859                                   std::string cpName;
1860                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1861                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1862                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1863                                           cpName = iter->first;
1864                                           info = &iter->second;
1865
1866                                           // Initialize a new plan based on two phases ago
1867                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1868
1869
1870                                           CkPrintf("Steering found knob to turn\n");
1871                                           fflush(stdout);
1872
1873                                           if(info->first == ControlPoint::EFF_DEC){
1874                                                   const int maxValue = controlPointSpace[cpName].second;
1875                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1876                                                   if(twoAgoValue+1 <= maxValue){
1877                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1878                                                   }
1879                                           } else {
1880                                                   const int minValue = controlPointSpace[cpName].second;
1881                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1882                                                   if(twoAgoValue-1 >= minValue){
1883                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1884                                                   }
1885                                           }
1886
1887                                           possibleNextStepPlans.push_back(aNewPlan);
1888
1889                                   }
1890
1891                           }
1892                   }
1893
1894                   // ========================================= Done =============================================
1895
1896
1897                   if(possibleNextStepPlans.size() > 0){
1898                           newControlPoints = possibleNextStepPlans[0];
1899                   }
1900
1901
1902                   CkPrintf("Steering done for this phase\n");
1903                   fflush(stdout);
1904
1905           }  else {
1906                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1907                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1908                   fflush(stdout);
1909           }
1910
1911
1912
1913   } else if( whichTuningScheme == SimulatedAnnealing ) {
1914
1915     // -----------------------------------------------------------
1916     //  SIMULATED ANNEALING
1917     //  Simulated Annealing style hill climbing method
1918     //
1919     //  Find the best search space configuration, and try something
1920     //  nearby it, with a radius decreasing as phases increase
1921
1922     CkPrintf("Finding best phase\n");
1923     instrumentedPhase *bestPhase = allData.findBest();  
1924     CkPrintf("best found:\n"); 
1925     bestPhase->print(); 
1926     CkPrintf("\n"); 
1927     
1928
1929     const int convergeByPhase = 100;
1930     // Determine from 0.0 to 1.0 how far along we are
1931     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1932
1933     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1934     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1935       const std::string &name = cpsIter->first;
1936       const std::pair<int,int> &bounds = cpsIter->second;
1937       const int minValue = bounds.first;
1938       const int maxValue = bounds.second;
1939       
1940       const int before = bestPhase->controlPoints[name];   
1941   
1942       const int range = (int)((maxValue-minValue+1)*(1.0-progress));
1943
1944       int high = min(before+range, maxValue);
1945       int low = max(before-range, minValue);
1946       
1947       newControlPoints[name] = low;
1948       if(high-low > 0){
1949         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1950       } 
1951       
1952     }
1953
1954   } else if ( whichTuningScheme == DivideAndConquer ) {
1955
1956           // -----------------------------------------------------------
1957           //  STEERING FOR Divide & Conquer Programs
1958           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1959           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1960           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1961
1962           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1963           // plans are only generated after 3 phases
1964
1965           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1966           instrumentedPhase *prevPhase = previousPhaseData();
1967
1968           if(phase_id%4 == 0){
1969                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1970                   twoAgoPhase->print();
1971                   CkPrintf("\n");
1972                   fflush(stdout);
1973
1974                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1975
1976
1977                   // ========================================= Concurrency =============================================
1978                   // See if idle time is high:
1979                   {
1980                           double idleTime = twoAgoPhase->idleTime.avg;
1981                           double overheadTime = twoAgoPhase->overheadTime.avg;
1982
1983
1984                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1985                           fflush(stdout);
1986                           if(idleTime+overheadTime > 0.10){
1987                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1988                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1989
1990                                   int direction = -1;
1991                                   if (idleTime>overheadTime){
1992                                           // We need to decrease the grain size, or increase the available concurrency
1993                                           direction = 1;
1994                                   }
1995
1996                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1997
1998                                   bool found = false;
1999                                   std::string cpName;
2000                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
2001                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
2002                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
2003                                           cpName = iter->first;
2004                                           info = &iter->second;
2005                                           
2006                                           // Initialize a new plan based on two phases ago
2007                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
2008                                           bool newPlanExists = false;
2009
2010                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
2011                                           fflush(stdout);
2012
2013
2014
2015                                           if(info->first == ControlPoint::EFF_INC){
2016                                                   const int minValue = controlPointSpace[cpName].first;
2017                                                   const int maxValue = controlPointSpace[cpName].second;
2018                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2019                                                   const int newVal = twoAgoValue+1*direction;
2020                                                   if(newVal <= maxValue && newVal >= minValue){
2021                                                     aNewPlan[cpName] = newVal;
2022                                                     newPlanExists = true;
2023                                                   } else {
2024                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2025                                                   }
2026                                           } else {
2027                                                   const int minValue = controlPointSpace[cpName].first;
2028                                                   const int maxValue = controlPointSpace[cpName].second;
2029                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
2030                                                   const int newVal = twoAgoValue-1*direction;
2031                                                   if(newVal <= maxValue && newVal >= minValue){
2032                                                           aNewPlan[cpName] = newVal;
2033                                                           newPlanExists = true;
2034                                                   } else {
2035                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
2036                                                   }
2037                                           }
2038
2039                                           if(newPlanExists){
2040                                             possibleNextStepPlans.push_back(aNewPlan);
2041                                           }
2042                                   }
2043                           }
2044                   }
2045
2046                   if(possibleNextStepPlans.size() > 0){
2047                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
2048                     newControlPoints = possibleNextStepPlans[0];
2049                   } else {
2050                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
2051                   }
2052           }
2053
2054   } else if( whichTuningScheme == Simplex ) {
2055
2056           // -----------------------------------------------------------
2057           //  Nelder Mead Simplex Algorithm
2058           //
2059           //  A scheme that takes a simplex (n+1 points) and moves it
2060           //  toward the minimum, eventually converging there.
2061
2062           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
2063
2064   } else if( whichTuningScheme == ExhaustiveSearch ) {
2065     
2066     // -----------------------------------------------------------
2067     // EXHAUSTIVE SEARCH
2068    
2069     int numDimensions = controlPointSpace.size();
2070     CkAssert(numDimensions > 0);
2071   
2072     vector<int> lowerBounds(numDimensions);
2073     vector<int> upperBounds(numDimensions); 
2074   
2075     int d=0;
2076     std::map<std::string, pair<int,int> >::iterator iter;
2077     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
2078       //    CkPrintf("Examining dimension %d\n", d);
2079       lowerBounds[d] = iter->second.first;
2080       upperBounds[d] = iter->second.second;
2081       d++;
2082     }
2083    
2084     // get names for each dimension (control point)
2085     vector<std::string> names(numDimensions);
2086     d=0;
2087     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
2088       names[d] = niter->first;
2089       d++;
2090     }
2091   
2092   
2093     // Create the first possible configuration
2094     vector<int> config = lowerBounds;
2095     config.push_back(0);
2096   
2097     // Increment until finding an unused configuration
2098     allData.cleanupNames(); // put -1 values in for any control points missing
2099     std::vector<instrumentedPhase*> &phases = allData.phases;     
2100
2101     while(true){
2102     
2103       std::vector<instrumentedPhase*>::iterator piter; 
2104       bool testedConfiguration = false; 
2105       for(piter = phases.begin(); piter != phases.end(); piter++){ 
2106       
2107         // Test if the configuration matches this phase
2108         bool match = true;
2109         for(int j=0;j<numDimensions;j++){
2110           match &= (*piter)->controlPoints[names[j]] == config[j];
2111         }
2112       
2113         if(match){
2114           testedConfiguration = true; 
2115           break;
2116         } 
2117       } 
2118     
2119       if(testedConfiguration == false){ 
2120         break;
2121       } 
2122     
2123       // go to next configuration
2124       config[0] ++;
2125       // Propagate "carrys"
2126       for(int i=0;i<numDimensions;i++){
2127         if(config[i] > upperBounds[i]){
2128           config[i+1]++;
2129           config[i] = lowerBounds[i];
2130         }
2131       }
2132       // check if we have exhausted all possible values
2133       if(config[numDimensions] > 0){
2134         break;
2135       }
2136        
2137     }
2138   
2139     if(config[numDimensions] > 0){
2140       for(int i=0;i<numDimensions;i++){ 
2141         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
2142       }
2143     }
2144
2145     // results are now in config[i]
2146     
2147     for(int i=0; i<numDimensions; i++){
2148       newControlPoints[names[i]] = config[i];
2149       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
2150     }
2151
2152
2153   } else {
2154     CkAbort("Some Control Point tuning strategy must be enabled.\n");
2155   }
2156
2157
2158   const double endGenerateTime = CmiWallTimer();
2159   
2160   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
2161   
2162 }
2163
2164
2165
2166
2167 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
2168
2169
2170 /// Get control point value from range of integers [lb,ub]
2171 int controlPoint(const char *name, int lb, int ub){
2172   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
2173   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
2174   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
2175   int result;
2176
2177   // if we already have control point values for phase, return them
2178   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
2179     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
2180     return thisPhaseData->controlPoints[std::string(name)];
2181   }
2182   
2183
2184   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
2185     // For the first few phases, just use the lower bound, or the default if one was provided 
2186     // This ensures that the ranges for all the control points are known before we do anything fancy
2187     result = lb;
2188
2189     if(defaultControlPointValues.count(std::string(name)) > 0){
2190       int v = defaultControlPointValues[std::string(name)];
2191       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
2192       result = v;
2193     }
2194
2195   } else if(controlPointSpace.count(std::string(name)) == 0){
2196     // if this is the first time we've seen the CP, then return the lower bound
2197     result = lb;
2198     
2199   }  else {
2200     // Generate a plan one time for each phase
2201     controlPointManagerProxy.ckLocalBranch()->generatePlan();
2202     
2203     // Use the planned value:
2204     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
2205     
2206   }
2207
2208   if(!isInRange(result,ub,lb)){
2209     std::cerr << "control point = " << result << " is out of range: " << lb << " " << ub << std::endl;
2210     fflush(stdout);
2211     fflush(stderr);
2212   }
2213   CkAssert(isInRange(result,ub,lb));
2214   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
2215
2216   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
2217
2218   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
2219   //  thisPhaseData->print();
2220   
2221   return result;
2222 }
2223
2224
2225 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
2226   CkAssert(CkMyPe() == 0);
2227   return controlPoint("FortranCP", *lb, *ub);
2228 }
2229
2230
2231
2232
2233 /// Inform the control point framework that a named control point affects the priorities of some array  
2234 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
2235 //   CkGroupID aid = arraybase.ckGetArrayID();
2236 //   int groupIdx = aid.idx;
2237 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
2238 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
2239 // }
2240
2241
2242 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
2243 // void controlPointPriorityEntry(const char *name, int idx){
2244 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
2245 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
2246 // }
2247
2248
2249
2250
2251
2252 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
2253
2254     This function decomposes the algorithm into a state machine that allows it to
2255     evaluate one or more configurations through subsequent clls to this function.
2256     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
2257
2258     At one point in the algorithm, n+1 parameter configurations must be evaluated,
2259     so a list of them will be created and they will be evaluated, one per call.
2260
2261     Currently there is no stopping criteria, but the simplex ought to contract down
2262     to a few close configurations, and hence not much change will happen after this 
2263     point.
2264
2265  */
2266 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2267
2268         if(useBestKnown){
2269                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
2270                 return;
2271         }
2272
2273
2274         if(firstSimplexPhase< 0){
2275                 firstSimplexPhase = allData.phases.size()-1;
2276                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
2277         }
2278
2279         int n = controlPointSpace.size();
2280
2281         CkAssert(n>=2);
2282
2283
2284         if(simplexState == beginning){
2285                 // First we evaluate n+1 random points, then we go to a different state
2286                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
2287                         CkPrintf("Simplex Tuning: chose random configuration\n");
2288
2289                         // Choose random values from the middle third of the range in each dimension
2290                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2291                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2292                                 const std::string &name = cpsIter->first;
2293                                 const std::pair<int,int> &bounds = cpsIter->second;
2294                                 const int lb = bounds.first;
2295                                 const int ub = bounds.second;
2296                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
2297                         }
2298                 } else {
2299                         // Set initial simplex:
2300                         for(int i=0; i<n+1; i++){
2301                                 simplexIndices.insert(firstSimplexPhase+i);
2302                         }
2303                         CkAssert(simplexIndices.size() == n+1);
2304
2305                         // Transition to reflecting state
2306                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2307
2308                 }
2309
2310         } else if (simplexState == reflecting){
2311                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2312                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2313
2314                 // Find the highest time from other points in the simplex
2315                 double highestTimeForOthersInSimplex = 0.0;
2316                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2317                         double t = allData.phases[*iter]->medianTime();
2318                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
2319                                 highestTimeForOthersInSimplex = t;
2320                         }
2321                 }
2322
2323                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
2324
2325                 if(recentPhaseTime < highestTimeForOthersInSimplex){
2326                         // if y* < yl,  transition to "expanding"
2327                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
2328
2329                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
2330                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
2331                         CkAssert(simplexIndices.size() == n+1);
2332                         simplexIndices.erase(worstPhase);
2333                         CkAssert(simplexIndices.size() == n);
2334                         simplexIndices.insert(pPhase);
2335                         CkAssert(simplexIndices.size() == n+1);
2336                         // Transition to reflecting state
2337                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2338
2339                 } else {
2340                         // if y* > yh
2341                         if(recentPhaseTime <= worstTime){
2342                                 // replace Ph with P*
2343                                 CkAssert(simplexIndices.size() == n+1);
2344                                 simplexIndices.erase(worstPhase);
2345                                 CkAssert(simplexIndices.size() == n);
2346                                 simplexIndices.insert(pPhase);
2347                                 CkAssert(simplexIndices.size() == n+1);
2348                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
2349                                 worstPhase = pPhase;
2350                                 // Just as a sanity check, make sure we don't use the non-updated values.
2351                                 worst.clear();
2352                         }
2353
2354                         // Now, form P** and do contracting phase
2355                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
2356
2357                 }
2358
2359         } else if (simplexState == doneExpanding){
2360                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2361                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2362                 // A new configuration has been evaluated
2363
2364                 // Check to see if y** < y1
2365                 if(recentPhaseTime < bestTime){
2366                         // replace Ph by P**
2367                         CkAssert(simplexIndices.size() == n+1);
2368                         simplexIndices.erase(worstPhase);
2369                         CkAssert(simplexIndices.size() == n);
2370                         simplexIndices.insert(p2Phase);
2371                         CkAssert(simplexIndices.size() == n+1);
2372                 } else {
2373                         //      replace Ph by P*
2374                         CkAssert(simplexIndices.size() == n+1);
2375                         simplexIndices.erase(worstPhase);
2376                         CkAssert(simplexIndices.size() == n);
2377                         simplexIndices.insert(pPhase);
2378                         CkAssert(simplexIndices.size() == n+1);
2379                 }
2380
2381                 // Transition to reflecting state
2382                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2383
2384         }  else if (simplexState == contracting){
2385                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2386                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2387                 // A new configuration has been evaluated
2388
2389                 // Check to see if y** > yh
2390                 if(recentPhaseTime <= worstTime){
2391                         // replace Ph by P**
2392                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2393                         CkAssert(simplexIndices.size() == n+1);
2394                         simplexIndices.erase(worstPhase);
2395                         CkAssert(simplexIndices.size() == n);
2396                         simplexIndices.insert(p2Phase);
2397                         CkAssert(simplexIndices.size() == n+1);
2398                         // Transition to reflecting state
2399                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2400
2401                 } else {
2402                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2403                         simplexState = stillContracting;
2404
2405                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2406                         stillMustContractList = simplexIndices;
2407
2408                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2409                 }
2410
2411         } else if (simplexState == stillContracting){
2412                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2413
2414                 if(stillMustContractList.size()>0){
2415                         int c = *stillMustContractList.begin();
2416                         stillMustContractList.erase(c);
2417                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2418
2419                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2420
2421                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2422                         int v = 0;
2423                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2424                                 const std::string &name = cpsIter->first;
2425                                 const std::pair<int,int> &bounds = cpsIter->second;
2426                                 const int lb = bounds.first;
2427                                 const int ub = bounds.second;
2428
2429                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2430
2431                                 newControlPoints[name] = keepInRange((int)val,lb,ub);
2432                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2433                                 v++;
2434                         }
2435                 } else {
2436                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2437                         CkAssert(stillMustContractList.size() == 0);
2438                         simplexIndices.clear();
2439                         CkAssert(simplexIndices.size()==0);
2440                         for(int i=0; i<n+1; i++){
2441                                 simplexIndices.insert(allData.phases.size()-2-i);
2442                         }
2443                         CkAssert(simplexIndices.size()==n+1);
2444
2445                         // Transition to reflecting state
2446                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2447
2448                 }
2449
2450
2451         } else if (simplexState == expanding){
2452                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2453                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2454                 // A new configuration has been evaluated
2455
2456                 // determine if y** < yl
2457                 if(recentPhaseTime < bestTime){
2458                         // replace Ph by P**
2459                         CkAssert(simplexIndices.size() == n+1);
2460                         simplexIndices.erase(worstPhase);
2461                         CkAssert(simplexIndices.size() == n);
2462                         simplexIndices.insert(p2Phase);
2463                         CkAssert(simplexIndices.size() == n+1);
2464                         // Transition to reflecting state
2465                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2466
2467                 } else {
2468                         // else, replace ph with p*
2469                         CkAssert(simplexIndices.size() == n+1);
2470                         simplexIndices.erase(worstPhase);
2471                         CkAssert(simplexIndices.size() == n);
2472                         simplexIndices.insert(pPhase);
2473                         CkAssert(simplexIndices.size() == n+1);
2474                         // Transition to reflecting state
2475                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2476                 }
2477
2478
2479         } else {
2480                 CkAbort("Unknown simplexState");
2481         }
2482
2483 }
2484
2485
2486
2487 /** Replace the worst point with its reflection across the centroid. */
2488 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2489
2490         int n = controlPointSpace.size();
2491
2492         printSimplex(allData);
2493
2494         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2495
2496
2497         // Quit if the diameter of our simplex is small
2498         double maxr = 0.0;
2499         for(int i=0; i<n+1; i++){
2500                 //              Compute r^2 of this simplex point from the centroid
2501                 double r2 = 0.0;
2502                 std::vector<double> p = pointCoords(allData, i);
2503                 for(int d=0; d<p.size(); d++){
2504                         double r1 = (p[d] * centroid[d]);
2505                         r2 += r1*r1;
2506                 }
2507                 if(r2 > maxr)
2508                         maxr = r2;
2509         }
2510
2511 #if 0
2512         // At some point we quit this tuning
2513         if(maxr < 10){
2514                 useBestKnown = true;
2515                 instrumentedPhase *best = allData.findBest();
2516                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2517
2518                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2519                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2520                         const std::string &name = cpsIter->first;
2521                         newControlPoints[name] =  best->controlPoints[name];
2522                 }
2523         }
2524 #endif
2525
2526         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2527
2528         pPhase = allData.phases.size()-1;
2529         P.resize(n);
2530         for(int i=0; i<n; i++){
2531                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2532         }
2533
2534         for(int i=0; i<P.size(); i++){
2535                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2536         }
2537
2538
2539         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2540         int v = 0;
2541         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2542                 const std::string &name = cpsIter->first;
2543                 const std::pair<int,int> &bounds = cpsIter->second;
2544                 const int lb = bounds.first;
2545                 const int ub = bounds.second;
2546                 newControlPoints[name] = keepInRange((int)P[v],lb,ub);
2547                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2548                 v++;
2549         }
2550
2551
2552         // Transition to "reflecting" state
2553         simplexState = reflecting;
2554         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2555
2556 }
2557
2558
2559
2560
2561 /** Replace the newly tested reflection with a further expanded version of itself. */
2562 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2563         int n = controlPointSpace.size();
2564         printSimplex(allData);
2565
2566         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2567         // I believe the equation for P** in the text on page 308 is correct.
2568
2569         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2570
2571
2572         p2Phase = allData.phases.size()-1;
2573         P2.resize(n);
2574         for(int i=0; i<n; i++){
2575                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2576         }
2577
2578         for(int i=0; i<P2.size(); i++){
2579                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2580         }
2581
2582
2583         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2584         int v = 0;
2585         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2586                 const std::string &name = cpsIter->first;
2587                 const std::pair<int,int> &bounds = cpsIter->second;
2588                 const int lb = bounds.first;
2589                 const int ub = bounds.second;
2590                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2591                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2592                 v++;
2593         }
2594
2595
2596         // Transition to "doneExpanding" state
2597         simplexState = doneExpanding;
2598         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2599
2600 }
2601
2602
2603
2604
2605 /** Replace the newly tested reflection with a further expanded version of itself. */
2606 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2607         int n = controlPointSpace.size();
2608         printSimplex(allData);
2609
2610         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2611
2612
2613         p2Phase = allData.phases.size()-1;
2614         P2.resize(n);
2615         for(int i=0; i<n; i++){
2616                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2617         }
2618
2619         for(int i=0; i<P2.size(); i++){
2620                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2621         }
2622
2623
2624         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2625         int v = 0;
2626         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2627                 const std::string &name = cpsIter->first;
2628                 const std::pair<int,int> &bounds = cpsIter->second;
2629                 const int lb = bounds.first;
2630                 const int ub = bounds.second;
2631                 newControlPoints[name] = keepInRange((int)P2[v],lb,ub);
2632                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2633                 v++;
2634         }
2635         
2636         
2637         // Transition to "contracting" state
2638         simplexState = contracting;
2639         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2640
2641 }
2642
2643
2644
2645
2646 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2647         int n = controlPointSpace.size();
2648
2649         // Find worst performing point in the simplex
2650         worstPhase = -1;
2651         worstTime = -1.0;
2652         bestPhase = 10000000;
2653         bestTime = 10000000;
2654         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2655                 double t = allData.phases[*iter]->medianTime();
2656                 if(t > worstTime){
2657                         worstTime = t;
2658                         worstPhase = *iter;
2659                 }
2660                 if(t < bestTime){
2661                         bestTime = t;
2662                         bestPhase = *iter;
2663                 }
2664         }
2665         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2666
2667         best = pointCoords(allData, bestPhase);
2668         CkAssert(best.size() == n);
2669
2670         worst = pointCoords(allData, worstPhase);
2671         CkAssert(worst.size() == n);
2672
2673         // Calculate centroid of the remaining points in the simplex
2674         centroid.resize(n);
2675         for(int i=0; i<n; i++){
2676                 centroid[i] = 0.0;
2677         }
2678
2679         int numPts = 0;
2680
2681         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2682                 if(*iter != worstPhase){
2683                         numPts ++;
2684                         // Accumulate into the result vector
2685                         int c = 0;
2686                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2687                                 centroid[c] += citer->second;
2688                                 c++;
2689                         }
2690
2691                 }
2692         }
2693
2694         // Now divide the sums by the number of points.
2695         for(int v = 0; v<centroid.size(); v++) {
2696                 centroid[v] /= (double)numPts;
2697         }
2698
2699         CkAssert(centroid.size() == n);
2700
2701         for(int i=0; i<centroid.size(); i++){
2702                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2703         }
2704
2705
2706 }
2707
2708
2709
2710 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2711         std::vector<double> result;
2712         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2713                 result.push_back((double)citer->second);
2714         }
2715         return result;
2716 }
2717
2718
2719
2720
2721 void ControlPointWriteOutputToDisk(){
2722         CkAssert(CkMyPe() == 0);
2723         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2724 }
2725
2726
2727
2728 /*! @} */
2729
2730 #ifdef CP_DISABLE_TRACING
2731 #include "ControlPointsNoTrace.def.h"
2732 #else
2733 #include "ControlPoints.def.h"
2734 #endif