Adding a ControlPointsNoTrace module that provides the control point interface but...
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14
15 #include <climits>
16 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
17  
18
19 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
20 /**  @{ */
21
22 using namespace std;
23
24 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
25
26
27 //#undef DEBUGPRINT
28 //#define DEBUGPRINT 4
29
30 static void periodicProcessControlPoints(void* ptr, double currWallTime);
31
32
33 // A pointer to this PE's controlpoint manager Proxy
34 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
35 /* readonly */ int random_seed;
36 /* readonly */ long controlPointSamplePeriod;
37 /* readonly */ int whichTuningScheme;
38 /* readonly */ bool writeDataFileAtShutdown;
39 /* readonly */ bool shouldFilterOutputData;
40 /* readonly */ bool loadDataFileAtStartup;
41 /* readonly */ bool shouldGatherMemoryUsage;
42 /* readonly */ bool shouldGatherUtilization;
43 /* readonly */ bool shouldGatherAll;
44 /* readonly */ char CPDataFilename[512];
45
46
47
48 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
49 /// These probably come from the command line arguments, so are available only on PE 0
50 std::map<std::string, int> defaultControlPointValues;
51
52
53
54 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults}  tuningScheme;
55
56
57
58 void printTuningScheme(){
59   switch(whichTuningScheme){
60   case RandomSelection:
61     CkPrintf("Tuning Scheme: RandomSelection\n");
62     break;
63   case AlwaysDefaults:
64     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
65     break;
66   case SimulatedAnnealing:
67     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
68     break;
69   case ExhaustiveSearch:
70     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
71     break;
72   case CriticalPathAutoPrioritization:
73     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
74     break;
75   case UseBestKnownTiming:
76     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
77     break;
78   case UseSteering:
79     CkPrintf("Tuning Scheme: UseSteering\n");
80     break;
81   case MemoryAware:
82     CkPrintf("Tuning Scheme: MemoryAware\n");
83     break;
84   case Simplex:
85     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
86     break;
87   case DivideAndConquer:
88     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
89     break;
90   default:
91     CkPrintf("Unknown tuning scheme\n");
92     break;
93   }
94   fflush(stdout);
95 }
96
97
98
99 /// A reduction type that combines idle time measurements (min/sum/max etc.)
100 CkReduction::reducerType idleTimeReductionType;
101 /// A reducer that combines idle time measurements (min/sum/max etc.)
102 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
103   double ret[3];
104   if(nMsg > 0){
105     CkAssert(msgs[0]->getSize()==3*sizeof(double));
106     double *m=(double *)msgs[0]->getData();
107     ret[0]=m[0];
108     ret[1]=m[1];
109     ret[2]=m[2];
110   }
111   for (int i=1;i<nMsg;i++) {
112     CkAssert(msgs[i]->getSize()==3*sizeof(double));
113     double *m=(double *)msgs[i]->getData();
114     ret[0]=min(ret[0],m[0]);
115     ret[1]+=m[1];
116     ret[2]=max(ret[2],m[2]);
117   }  
118   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
119 }
120
121
122
123 /// A reduction type that combines idle, overhead, and memory measurements
124 CkReduction::reducerType allMeasuresReductionType;
125 /// A reducer that combines idle, overhead, and memory measurements
126 #define ALL_REDUCTION_SIZE 12
127 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
128   double ret[ALL_REDUCTION_SIZE];
129   if(nMsg > 0){
130     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
131     double *m=(double *)msgs[0]->getData();
132     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
133   }
134   for (int i=1;i<nMsg;i++) {
135     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
136     double *m=(double *)msgs[i]->getData();
137     // idle time (min/sum/max)
138     ret[0]=min(ret[0],m[0]);
139     ret[1]+=m[1];
140     ret[2]=max(ret[2],m[2]);
141     // overhead time (min/sum/max)
142     ret[3]=min(ret[3],m[3]);
143     ret[4]+=m[4];
144     ret[5]=max(ret[5],m[5]);
145     // mem usage (max)
146     ret[6]=max(ret[6],m[6]);
147     // bytes per invocation for two types of entry methods
148     ret[7]+=m[7];
149     ret[8]+=m[8];
150     ret[9]+=m[9];
151     ret[10]+=m[10];
152     // Grain size (avg)
153     ret[11]+=m[11];
154   }  
155   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
156 }
157
158
159 /// Registers the control point framework's reduction handlers at startup on each PE
160 /*initproc*/ void registerCPReductions(void) {
161   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
162   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
163 }
164
165
166
167
168
169
170 /// Return an integer between 0 and num-1 inclusive
171 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
172 unsigned int randInt(unsigned int num, const char* name, int seed=0){
173   CkAssert(num > 0);
174   CkAssert(num < 1000);
175
176   unsigned long hash = 0;
177   unsigned int c;
178   unsigned char * str = (unsigned char*)name;
179
180   while (c = *str++){
181     unsigned int c2 = (c+64)%128;
182     unsigned int c3 = (c2*5953)%127;
183     hash = c3 + (hash << 6) + (hash << 16) - hash;
184   }
185
186   unsigned long shuffled1 = (hash*2083)%7907;
187   unsigned long shuffled2 = (seed*4297)%2017;
188
189   unsigned long shuffled3 = (random_seed*4799)%7919;
190
191   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
192
193   unsigned int result = ((namehash * 6029) % 1117) % num;
194
195   CkAssert(result >=0 && result < num);
196   return result;
197 }
198
199
200
201 controlPointManager::controlPointManager() {
202   generatedPlanForStep = -1;
203
204     exitWhenReady = false;
205     alreadyRequestedMemoryUsage = false;   
206     alreadyRequestedIdleTime = false;
207     alreadyRequestedAll = false;
208     
209     instrumentedPhase * newPhase = new instrumentedPhase();
210     allData.phases.push_back(newPhase);   
211     
212     frameworkShouldAdvancePhase = false;
213     haveControlPointChangeCallback = false;
214 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
215     
216     ControlPoint::initControlPointEffects();
217
218     phase_id = 0;
219
220     if(loadDataFileAtStartup){    
221       loadDataFile();
222     }
223
224     
225     if(CkMyPe() == 0){
226       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
227     }
228
229     traceRegisterUserEvent("No currently executing message", 5000);
230     traceRegisterUserEvent("Zero time along critical path", 5010);
231     traceRegisterUserEvent("Positive total time along critical path", 5020);
232     traceRegisterUserEvent("env->setPathHistory()", 6000);
233     traceRegisterUserEvent("Critical Path", 5900);
234     traceRegisterUserEvent("Table Entry", 5901);
235
236
237 #if USER_EVENT_FOR_REGISTERTERMINALPATH
238     traceRegisterUserEvent("registerTerminalPath", 100); 
239 #endif
240
241   }
242   
243
244  controlPointManager::~controlPointManager(){
245 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
246   }
247
248
249   /// Loads the previous run data file
250   void controlPointManager::loadDataFile(){
251     ifstream infile(CPDataFilename);
252     vector<std::string> names;
253     std::string line;
254   
255     while(getline(infile,line)){
256       if(line[0] != '#')
257         break;
258     }
259   
260     int numTimings = 0;
261     std::istringstream n(line);
262     n >> numTimings;
263   
264     while(getline(infile,line)){ 
265       if(line[0] != '#') 
266         break; 
267     }
268
269     int numControlPointNames = 0;
270     std::istringstream n2(line);
271     n2 >> numControlPointNames;
272   
273     for(int i=0; i<numControlPointNames; i++){
274       getline(infile,line);
275       names.push_back(line);
276     }
277
278     for(int i=0;i<numTimings;i++){
279       getline(infile,line);
280       while(line[0] == '#')
281         getline(infile,line); 
282
283       instrumentedPhase * ips = new instrumentedPhase();
284
285       std::istringstream iss(line);
286
287       // Read memory usage for phase
288       iss >> ips->memoryUsageMB;
289       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
290
291       // Read idle time
292       iss >> ips->idleTime.min;
293       iss >> ips->idleTime.avg;
294       iss >> ips->idleTime.max;
295
296       // Read overhead time
297       iss >> ips->overheadTime.min;
298       iss >> ips->overheadTime.avg;
299       iss >> ips->overheadTime.max;
300
301       // read bytePerInvoke
302       iss >> ips->bytesPerInvoke;
303
304       // read grain size
305       iss >> ips->grainSize;
306
307       // Read control point values
308       for(int cp=0;cp<numControlPointNames;cp++){
309         int cpvalue;
310         iss >> cpvalue;
311         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
312       }
313
314       // ignore median time
315       double mt;
316       iss >> mt;
317
318       // Read all times
319
320       double time;
321
322       while(iss >> time){
323         ips->times.push_back(time);
324 #if DEBUGPRINT > 5
325         CkPrintf("read time %lf from file\n", time);
326 #endif
327       }
328
329       allData.phases.push_back(ips);
330
331     }
332
333     infile.close();
334   }
335
336
337
338   /// Add the current data to allData and output it to a file
339   void controlPointManager::writeDataFile(){
340     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
341     ofstream outfile(CPDataFilename);
342     allData.cleanupNames();
343
344 //    string s = allData.toString();
345 //    CkPrintf("At end: \n %s\n", s.c_str());
346
347     if(shouldFilterOutputData){
348       allData.verify();
349       allData.filterOutIncompletePhases();
350     }
351
352 //    string s2 = allData.toString();
353 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
354     if(allData.toString().length() > 10){
355       outfile << allData.toString();
356     } else {
357       outfile << " No data available to save to disk " << endl;
358     }
359     outfile.close();
360   }
361
362   /// User can register a callback that is called when application should advance to next phase
363   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
364     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
365     controlPointChangeCallback = cb;
366     haveControlPointChangeCallback = true;
367   }
368
369   /// Called periodically by the runtime to handle the control points
370   /// Currently called on each PE
371   void controlPointManager::processControlPoints(){
372
373     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
374
375
376     //==========================================================================================
377     // Print the data for each phase
378
379     const int s = allData.phases.size();
380
381 #if DEBUGPRINT
382     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
383     for(int p=0;p<s;++p){
384       const instrumentedPhase &phase = allData.phases[p];
385       const idleTimeContainer &idle = phase.idleTime;
386       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
387       vector<double> const &times = phase.times;
388
389       CkPrintf("Phase %d:\n", p);
390       idle.print();
391      //   CkPrintf("critical paths: (* affected by control point)\n");
392         //   for(int i=0;i<criticalPaths.size(); i++){
393         // If affected by a control point
394         //      criticalPaths[i].print();
395
396       //        criticalPaths[i].print();
397       //        CkPrintf("\n");
398         
399
400       //   }
401       CkPrintf("Timings:\n");
402       for(int i=0;i<times.size(); i++){
403         CkPrintf("%lf ", times[i]);
404       }
405       CkPrintf("\n");
406
407     }
408     
409     CkPrintf("\n\n");
410
411
412 #endif
413
414
415
416     //==========================================================================================
417     // If this is a phase during which we try to adapt control point values based on critical path
418 #if 0
419     if( s%5 == 4) {
420
421       // Find the most recent phase with valid critical path data and idle time measurements      
422       int whichPhase=-1;
423       for(int p=0; p<s; ++p){
424         const instrumentedPhase &phase = allData.phases[p];
425         const idleTimeContainer &idle = phase.idleTime;
426         if(idle.isValid() && phase.criticalPaths.size()>0 ){
427           whichPhase = p;
428         }
429       }
430       
431       
432       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
433       const instrumentedPhase &phase = allData.phases[whichPhase];
434       const idleTimeContainer &idle = phase.idleTime;
435       
436       if(idle.min > 0.1){
437         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
438         
439         // Determine the median critical path for this phase
440         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
441         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
442         CkAssert(phase.criticalPaths.size() > 0);
443         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
444         
445         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
446         
447         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
448           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
449
450
451           CkPrintf("Finding control points related to the critical path\n");
452           int cpcount = 0;
453           std::set<std::string> controlPointsAffectingCriticalPath;
454
455           
456           for(int e=0;e<path.getNumUsed();e++){
457             if(path.getUsedCount(e)>0){
458               int ep = path.getUsedEp(e);
459
460               std::map<std::string, std::set<int> >::iterator iter;
461               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
462                 if(iter->second.count(ep)>0){
463                   controlPointsAffectingCriticalPath.insert(iter->first);
464                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
465                   cpcount++;
466                 }
467               }
468               
469             }
470           }
471           
472
473           if(cpcount==0){
474             CkPrintf("No control points are known to affect the critical path\n");
475           } else {
476             double beginTime = CmiWallTimer();
477
478             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
479             
480             newControlPoints = phase.controlPoints;
481             
482             if(frameworkShouldAdvancePhase){
483               gotoNextPhase();  
484             }
485             
486             if(haveControlPointChangeCallback){ 
487 #if DEBUGPRINT
488               CkPrintf("Calling control point change callback\n");
489 #endif
490               // Create a high priority message and send it to the callback
491               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
492               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
493               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
494               controlPointChangeCallback.send(msg);
495               
496             }
497             
498             
499             // adjust the control points that can affect the critical path
500
501             char textDescription[4096*2];
502             textDescription[0] = '\0';
503
504             std::map<std::string,int>::iterator newCP;
505             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
506               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
507                 // decrease the value (increase priority) if within range
508                 int lowerbound = controlPointSpace[newCP->first].first;
509                 if(newCP->second > lowerbound){
510                   newControlPointsAvailable = true;
511                   newCP->second --;
512                 }
513               }
514             }
515            
516             // Create a string for a projections user event
517             if(1){
518               std::map<std::string,int>::iterator newCP;
519               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
520                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
521               }
522             }
523
524             char *userEventString = new char[4096*2];
525             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
526             
527             static int userEventCounter = 20000;
528             CkPrintf("USER EVENT: %s\n", userEventString);
529             
530             traceRegisterUserEvent(userEventString, userEventCounter); 
531             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
532             userEventCounter++;
533             
534           }
535           
536         } else {
537           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
538         }
539         
540         
541       }
542       
543     } else {
544       
545       
546     }
547    
548 #endif
549
550
551
552     if(frameworkShouldAdvancePhase){
553       gotoNextPhase();  
554     }
555     
556     if(haveControlPointChangeCallback){ 
557       // Create a high priority message and send it to the callback
558       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
559       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
560       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
561       controlPointChangeCallback.send(msg);
562     }
563     
564   }
565   
566   /// Determine if any control point is known to affect an entry method
567   bool controlPointManager::controlPointAffectsThisEP(int ep){
568     std::map<std::string, std::set<int> >::iterator iter;
569     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
570       if(iter->second.count(ep)>0){
571         return true;
572       }
573     }
574     return false;    
575   }
576   
577   /// Determine if any control point is known to affect a chare array  
578   bool controlPointManager::controlPointAffectsThisArray(int array){
579     std::map<std::string, std::set<int> >::iterator iter;
580     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
581       if(iter->second.count(array)>0){
582         return true;
583       }
584     }
585     return false;   
586   }
587   
588
589   /// The data from the current phase
590   instrumentedPhase * controlPointManager::currentPhaseData(){
591     int s = allData.phases.size();
592     CkAssert(s>=1);
593     return allData.phases[s-1];
594   }
595  
596
597   /// The data from the previous phase
598   instrumentedPhase * controlPointManager::previousPhaseData(){
599     int s = allData.phases.size();
600     if(s >= 2 && phase_id > 0) {
601       return allData.phases[s-2];
602     } else {
603       return NULL;
604     }
605   }
606  
607   /// The data from two phases back
608   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
609     int s = allData.phases.size();
610     if(s >= 3 && phase_id > 0) {
611       return allData.phases[s-3];
612     } else {
613       return NULL;
614     }
615   }
616   
617
618   /// Called by either the application or the Control Point Framework to advance to the next phase  
619   void controlPointManager::gotoNextPhase(){
620     
621 #ifndef CP_DISABLE_TRACING
622     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
623     fflush(stdout);
624
625     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
626       alreadyRequestedAll = true;
627       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
628       CkPrintf("Requesting all measurements\n");
629       thisProxy.requestAll(*cb);
630       delete cb;
631     
632     } else {
633       
634       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
635         alreadyRequestedMemoryUsage = true;
636         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
637         thisProxy.requestMemoryUsage(*cb);
638         delete cb;
639       }
640       
641       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
642         alreadyRequestedIdleTime = true;
643         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
644         thisProxy.requestIdleTime(*cb);
645         delete cb;
646       }
647     }
648     
649
650 #endif
651
652
653
654 #if CMK_LBDB_ON && 0
655     LBDatabase * myLBdatabase = LBDatabaseObj();
656     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
657     const CkVec<LBObj*> objs = myLBDB->getObjs();
658     const int objCount = myLBDB->getObjCount();
659     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
660     
661     floatType maxObjWallTime = -1.0;
662     
663     for(int i=0;i<objs.length();i++){
664       LBObj* o = objs[i];
665       const LDObjData d = o->ObjData();
666       floatType cpuTime = d.cpuTime;
667       floatType wallTime = d.wallTime;
668       // can also get object handles from the LDObjData struct
669       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
670       if(wallTime > maxObjWallTime){
671
672       }
673       
674     }
675
676     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
677     
678 #endif    
679
680
681     
682     // increment phase id
683     phase_id++;
684     
685
686     // Create new entry for the phase we are starting now
687     instrumentedPhase * newPhase = new instrumentedPhase();
688     allData.phases.push_back(newPhase);
689     
690     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
691
692   }
693
694   /// An application uses this to register an instrumented timing for this phase
695   void controlPointManager::setTiming(double time){
696     currentPhaseData()->times.push_back(time);
697
698 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
699        
700     // First we should register this currently executing message as a path, because it is likely an important one to consider.
701     //    registerTerminalEntryMethod();
702     
703     // save the critical path for this phase
704     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
705     //    maxTerminalPathHistory.reset();
706     
707     
708     // Reset the counts for the currently executing message
709     //resetThisEntryPath();
710         
711 #endif
712     
713   }
714   
715   /// Entry method called on all PEs to request CPU utilization statistics
716   void controlPointManager::requestIdleTime(CkCallback cb){
717 #ifndef CP_DISABLE_TRACING
718     double i = localControlPointTracingInstance()->idleRatio();
719     double idle[3];
720     idle[0] = i;
721     idle[1] = i;
722     idle[2] = i;
723     
724     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
725     
726     localControlPointTracingInstance()->resetTimings();
727
728     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
729 #else
730     CkAbort("Should not get here\n");
731 #endif
732   }
733   
734   /// All processors reduce their memory usages in requestIdleTime() to this method
735   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
736 #ifndef CP_DISABLE_TRACING
737     int size=msg->getSize() / sizeof(double);
738     CkAssert(size==3);
739     double *r=(double *) msg->getData();
740         
741     instrumentedPhase* prevPhase = previousPhaseData();
742     if(prevPhase != NULL){
743       prevPhase->idleTime.min = r[0];
744       prevPhase->idleTime.avg = r[1]/CkNumPes();
745       prevPhase->idleTime.max = r[2];
746       prevPhase->idleTime.print();
747       CkPrintf("Stored idle time min=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase);
748     } else {
749       CkPrintf("There is no previous phase to store the idle time measurements\n");
750     }
751     
752     alreadyRequestedIdleTime = false;
753     checkForShutdown();
754     delete msg;
755 #else
756     CkAbort("Should not get here\n");
757 #endif
758   }
759
760
761
762
763
764
765   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
766   void controlPointManager::requestAll(CkCallback cb){
767 #ifndef CP_DISABLE_TRACING
768     TraceControlPoints *t = localControlPointTracingInstance();
769
770     double data[ALL_REDUCTION_SIZE];
771
772     double *idle = data;
773     double *over = data+3;
774     double *mem = data+6;
775     double *msgBytes = data+7;  
776     double *grainsize = data+11;  
777
778     const double i = t->idleRatio();
779     idle[0] = i;
780     idle[1] = i;
781     idle[2] = i;
782
783     const double o = t->overheadRatio();
784     over[0] = o;
785     over[1] = o;
786     over[2] = o;
787
788     const double m = t->memoryUsageMB();
789     mem[0] = m;
790
791     msgBytes[0] = t->b2;
792     msgBytes[1] = t->b3;
793     msgBytes[2] = t->b2mlen;
794     msgBytes[3] = t->b3mlen;
795
796     grainsize[0] = t->grainSize();
797     
798     localControlPointTracingInstance()->resetAll();
799
800     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
801 #else
802     CkAbort("Should not get here\n");
803 #endif
804   }
805   
806   /// All processors reduce their memory usages in requestIdleTime() to this method
807   void controlPointManager::gatherAll(CkReductionMsg *msg){
808 #ifndef CP_DISABLE_TRACING
809     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
810     int size=msg->getSize() / sizeof(double);
811     double *data=(double *) msg->getData();
812         
813     double *idle = data;
814     double *over = data+3;
815     double *mem = data+6;
816     double *msgBytes = data+7;
817     double *granularity = data+11;
818
819
820     //    std::string b = allData.toString();
821
822     instrumentedPhase* prevPhase = previousPhaseData();
823     if(prevPhase != NULL){
824       prevPhase->idleTime.min = idle[0];
825       prevPhase->idleTime.avg = idle[1]/CkNumPes();
826       prevPhase->idleTime.max = idle[2];
827       
828       prevPhase->overheadTime.min = over[0];
829       prevPhase->overheadTime.avg = over[1]/CkNumPes();
830       prevPhase->overheadTime.max = over[2];
831       
832       prevPhase->memoryUsageMB = mem[0];
833       
834       CkPrintf("Stored idle time min=%lf, mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, (double)prevPhase->memoryUsageMB, prevPhase);
835
836       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
837       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
838
839       /** The average of the grain sizes on all PEs in us */
840       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
841
842       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
843       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
844
845       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
846       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
847
848       if(bytesPerInvoke2 > bytesPerInvoke3)
849         prevPhase->bytesPerInvoke = bytesPerInvoke2;
850       else
851         prevPhase->bytesPerInvoke = bytesPerInvoke3;
852
853       //      prevPhase->print();
854       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
855
856       //      std::string a = allData.toString();
857
858       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
859       
860     } else {
861       CkPrintf("There is no previous phase to store measurements\n");
862     }
863     
864     alreadyRequestedAll = false;
865     checkForShutdown();
866     delete msg;
867 #else
868     CkAbort("Should not get here\n");
869 #endif
870   }
871
872
873
874
875   void controlPointManager::checkForShutdown(){
876     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
877       doExitNow();
878     }
879   }
880
881
882   void controlPointManager::exitIfReady(){
883      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
884        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
885        doExitNow();
886      } else {
887        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
888        exitWhenReady = true;
889      }
890   }
891
892
893
894   void controlPointManager::doExitNow(){
895           writeOutputToDisk();
896           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
897           CkExit();
898   }
899
900   void controlPointManager::writeOutputToDisk(){
901           if(writeDataFileAtShutdown){
902                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
903           }
904   }
905
906
907   /// Entry method called on all PEs to request memory usage
908   void controlPointManager::requestMemoryUsage(CkCallback cb){
909     int m = CmiMaxMemoryUsage() / 1024 / 1024;
910     CmiResetMaxMemory();
911     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
912     contribute(sizeof(int),&m,CkReduction::max_int, cb);
913   }
914
915   /// All processors reduce their memory usages to this method
916   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
917     int size=msg->getSize() / sizeof(int);
918     CkAssert(size==1);
919     int *m=(int *) msg->getData();
920
921     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
922     
923     instrumentedPhase* prevPhase = previousPhaseData();
924     if(prevPhase != NULL){
925       prevPhase->memoryUsageMB = m[0];
926     } else {
927       CkPrintf("No place to store memory usage");
928     }
929
930     alreadyRequestedMemoryUsage = false;
931     checkForShutdown();
932     delete msg;
933   }
934
935
936 //   /// Inform the control point framework that a named control point affects the priorities of some array  
937 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
938 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
939     
940 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
941 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
942 //     } else {
943 //       std::set<int> s;
944 //       s.insert(groupIdx);
945 //       affectsPrioritiesArray[std::string(name)] = s;
946 //     }
947     
948 // #if DEBUGPRINT   
949 //     std::map<std::string, std::set<int> >::iterator f;
950 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
951 //       std::string name = f->first;
952 //       std::set<int> &vals = f->second;
953 //       cout << "Control point " << name << " affects arrays: ";
954 //       std::set<int>::iterator i;
955 //       for(i=vals.begin(); i!=vals.end();++i){
956 //      cout << *i << " ";
957 //       }
958 //       cout << endl;
959 //     }
960 // #endif
961     
962 //   }
963   
964 //   /// Inform the control point framework that a named control point affects the priority of some entry method
965 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
966 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
967
968 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
969 //       affectsPrioritiesEP[std::string(name)].insert(idx);
970 //     } else {
971 //       std::set<int> s;
972 //       s.insert(idx);
973 //       affectsPrioritiesEP[std::string(name)] = s;
974 //     }
975     
976 // #if DEBUGPRINT
977 //     std::map<std::string, std::set<int> >::iterator f;
978 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
979 //       std::string name = f->first;
980 //       std::set<int> &vals = f->second;
981 //       cout << "Control point " << name << " affects EP: ";
982 //       std::set<int>::iterator i;
983 //       for(i=vals.begin(); i!=vals.end();++i){
984 //      cout << *i << " ";
985 //       }
986 //       cout << endl;
987 //     }
988 // #endif
989
990
991 //   }
992   
993
994
995 /// An interface callable by the application.
996 void gotoNextPhase(){
997   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
998 }
999
1000
1001 /// A mainchare that is used just to create our controlPointManager group at startup
1002 class controlPointMain : public CBase_controlPointMain {
1003 public:
1004   controlPointMain(CkArgMsg* args){
1005 #if OLDRANDSEED
1006     struct timeval tp;
1007     gettimeofday(& tp, NULL);
1008     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1009 #else
1010     double time = CmiWallTimer();
1011     int sec = (int)time;
1012     int usec = (int)((time - (double)sec)*1000000.0);
1013     random_seed =  sec ^ usec;
1014 #endif
1015     
1016     
1017     double period;
1018     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1019     if(haveSamplePeriod){
1020       CkPrintf("controlPointSamplePeriod = %ld sec\n", period);
1021       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1022     } else {
1023       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1024     }
1025     
1026     
1027     
1028     whichTuningScheme = RandomSelection;
1029
1030
1031     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1032       whichTuningScheme = RandomSelection;
1033     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1034       whichTuningScheme = ExhaustiveSearch;
1035     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1036       whichTuningScheme = AlwaysDefaults;
1037     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1038       whichTuningScheme = SimulatedAnnealing;
1039     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1040       whichTuningScheme = CriticalPathAutoPrioritization;
1041     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1042       whichTuningScheme = UseBestKnownTiming;
1043     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1044       whichTuningScheme = UseSteering;
1045     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1046       whichTuningScheme = MemoryAware;
1047     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1048       whichTuningScheme = Simplex;
1049     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1050       whichTuningScheme = DivideAndConquer;
1051     }
1052
1053     char *defValStr = NULL;
1054     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1055       CkPrintf("You specified default value string: %s\n", defValStr);
1056       
1057       // Parse the string, looking for commas
1058      
1059
1060       char *tok = strtok(defValStr, ",");
1061       while (tok) {
1062         // split on the equal sign
1063         int len = strlen(tok);
1064         char *eqsign = strchr(tok, '=');
1065         if(eqsign != NULL && eqsign != tok){
1066           *eqsign = '\0';
1067           char *cpname = tok;
1068           std::string cpName(tok);
1069           char *cpDefVal = eqsign+1;      
1070           int v=-1;
1071           if(sscanf(cpDefVal, "%d", &v) == 1){
1072             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1073             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1074             defaultControlPointValues[cpName] = v;
1075           }
1076         }
1077         tok = strtok(NULL, ",");
1078       }
1079
1080     }
1081
1082     shouldGatherAll = false;
1083     shouldGatherMemoryUsage = false;
1084     shouldGatherUtilization = false;
1085     
1086     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1087       shouldGatherAll = true;
1088     } else {
1089       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1090         shouldGatherMemoryUsage = true;
1091       }
1092       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1093         shouldGatherUtilization = true;
1094       }
1095     }
1096     
1097     writeDataFileAtShutdown = false;   
1098     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1099       writeDataFileAtShutdown = true;
1100     }
1101
1102     shouldFilterOutputData = true;
1103     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1104       shouldFilterOutputData = false;
1105     }
1106
1107
1108    loadDataFileAtStartup = false;   
1109     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1110       loadDataFileAtStartup = true;
1111     }
1112
1113     char *cpdatafile;
1114     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1115       sprintf(CPDataFilename, "%s", cpdatafile);
1116     } else {
1117       sprintf(CPDataFilename, "controlPointData.txt");
1118     }
1119
1120
1121     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1122   }
1123   ~controlPointMain(){}
1124 };
1125
1126 /// An interface callable by the application.
1127 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1128   CkAssert(CkMyPe() == 0);
1129   CkPrintf("Application has registered a control point change callback\n");
1130   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1131 }
1132
1133 /// An interface callable by the application.
1134 void registerControlPointTiming(double time){
1135   CkAssert(CkMyPe() == 0);
1136 #if DEBUGPRINT>0
1137   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1138 #endif
1139   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1140 }
1141
1142 /// An interface callable by the application.
1143 void controlPointTimingStamp() {
1144   CkAssert(CkMyPe() == 0);
1145 #if DEBUGPRINT>0
1146   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1147 #endif
1148   
1149   static double prev_time = 0.0;
1150   double t = CmiWallTimer();
1151   double duration = t - prev_time;
1152   prev_time = t;
1153     
1154   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1155 }
1156
1157 /// Shutdown the control point framework, writing data to disk if necessary
1158 extern "C" void controlPointShutdown(){
1159   if(CkMyPe() == 0){
1160
1161     // wait for gathering of idle time & memory usage to complete
1162     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1163
1164   }
1165 }
1166
1167 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1168 void controlPointInitNode(){
1169 //  CkPrintf("controlPointInitNode()\n");
1170   registerExitFn(controlPointShutdown);
1171 }
1172
1173 /// Called periodically to allow control point framework to do things periodically
1174 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1175 #ifdef DEBUGPRINT
1176   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1177 #endif
1178   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1179   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1180 }
1181
1182
1183
1184
1185
1186 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1187 /// user observed characteristic to adapt specific control point values.
1188 /// This function must return valid values for newControlPoints.
1189 void controlPointManager::generatePlan() {
1190   const double startGenerateTime = CmiWallTimer();
1191   const int phase_id = this->phase_id;
1192   const int effective_phase = allData.phases.size();
1193
1194   // Only generate a plan once per phase
1195   if(generatedPlanForStep == phase_id) 
1196     return;
1197   generatedPlanForStep = phase_id;
1198  
1199   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1200   printTuningScheme();
1201
1202   // By default lets put the previous phase data into newControlPoints
1203   instrumentedPhase *prevPhase = previousPhaseData();
1204   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1205           newControlPoints[cpsIter->first] = cpsIter->second;
1206   }
1207
1208
1209   if( whichTuningScheme == RandomSelection ){
1210     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1211     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1212       const std::string &name = cpsIter->first;
1213       const std::pair<int,int> &bounds = cpsIter->second;
1214       const int lb = bounds.first;
1215       const int ub = bounds.second;
1216       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1217     }
1218   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1219     // -----------------------------------------------------------
1220     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1221     //   
1222     // This scheme will return the median value for the range 
1223     // early on, and then will transition over to the new control points
1224     // determined by the critical path adapting code
1225
1226     // This won't work until the periodic function is fixed up a bit
1227
1228     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1229     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1230       const std::string &name = cpsIter->first;
1231       const std::pair<int,int> &bounds = cpsIter->second;
1232       const int lb = bounds.first;
1233       const int ub = bounds.second;
1234       newControlPoints[name] =  (lb+ub)/2;
1235     }
1236
1237   } else if ( whichTuningScheme == MemoryAware ) {
1238
1239     // -----------------------------------------------------------
1240     //  STEERING BASED ON MEMORY USAGE
1241
1242     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1243     instrumentedPhase *prevPhase = previousPhaseData();
1244  
1245     if(phase_id%2 == 0){
1246       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1247       twoAgoPhase->print();
1248       CkPrintf("\n");
1249       fflush(stdout);
1250       
1251       // See if memory usage is low:
1252       double memUsage = twoAgoPhase->memoryUsageMB;
1253       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1254       fflush(stdout);
1255       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1256         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1257         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1258         
1259         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1260         newControlPoints = twoAgoPhase->controlPoints;
1261
1262
1263         CkPrintf("Steering (memory based) initialized plan\n");
1264         fflush(stdout);
1265
1266         // look for a possible control point knob to turn
1267         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1268         
1269         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1270         bool found = false;
1271         std::string cpName;
1272         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1273         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1274         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1275           cpName = iter->first;
1276           info = &iter->second;
1277           found = true;
1278           break;
1279         }
1280
1281         // Adapt the control point value
1282         if(found){
1283           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1284           fflush(stdout);
1285           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1286           const int maxValue = controlPointSpace[cpName].second;
1287           
1288           if(twoAgoValue+1 <= maxValue){
1289             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1290           }
1291         }
1292         
1293       }
1294     }
1295
1296     CkPrintf("Steering (memory based) done for this phase\n");
1297     fflush(stdout);
1298
1299   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1300
1301     // -----------------------------------------------------------
1302     //  USE BEST KNOWN TIME  
1303     static bool firstTime = true;
1304     if(firstTime){
1305       firstTime = false;
1306       instrumentedPhase *best = allData.findBest(); 
1307       CkPrintf("Best known phase is:\n");
1308       best->print();
1309       CkPrintf("\n");
1310       
1311       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1312       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1313         const std::string &name = cpsIter->first;
1314         newControlPoints[name] =  best->controlPoints[name];
1315       }
1316     }
1317
1318   } else if ( whichTuningScheme == UseSteering ) {
1319           // -----------------------------------------------------------
1320           //  STEERING BASED ON KNOWLEDGE
1321
1322           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1323           // plans are only generated after 3 phases
1324
1325           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1326           instrumentedPhase *prevPhase = previousPhaseData();
1327
1328           if(phase_id%4 == 0){
1329                   CkPrintf("Steering based on 2 phases ago:\n");
1330                   twoAgoPhase->print();
1331                   CkPrintf("\n");
1332                   fflush(stdout);
1333
1334                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1335
1336
1337                   // ========================================= Concurrency =============================================
1338                   // See if idle time is high:
1339                   {
1340                           double idleTime = twoAgoPhase->idleTime.avg;
1341                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1342                           fflush(stdout);
1343                           if(idleTime > 0.10){
1344                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1345                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1346
1347                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1348
1349                                   bool found = false;
1350                                   std::string cpName;
1351                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1352                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1353                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1354                                           cpName = iter->first;
1355                                           info = &iter->second;
1356
1357                                           // Initialize a new plan based on two phases ago
1358                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1359
1360                                           CkPrintf("Steering found knob to turn\n");
1361                                           fflush(stdout);
1362
1363                                           if(info->first == ControlPoint::EFF_INC){
1364                                                   const int maxValue = controlPointSpace[cpName].second;
1365                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1366                                                   if(twoAgoValue+1 <= maxValue){
1367                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1368                                                   }
1369                                           } else {
1370                                                   const int minValue = controlPointSpace[cpName].second;
1371                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1372                                                   if(twoAgoValue-1 >= minValue){
1373                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1374                                                   }
1375                                           }
1376
1377                                           possibleNextStepPlans.push_back(aNewPlan);
1378
1379                                   }
1380                           }
1381                   }
1382
1383                   // ========================================= Grain Size =============================================
1384                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1385                   {
1386                           double overheadTime = twoAgoPhase->overheadTime.avg;
1387                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1388                           fflush(stdout);
1389                           if(overheadTime > 0.10){
1390                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1391                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1392
1393                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1394
1395                                   bool found = false;
1396                                   std::string cpName;
1397                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1398                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1399                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1400                                           cpName = iter->first;
1401                                           info = &iter->second;
1402
1403                                           // Initialize a new plan based on two phases ago
1404                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1405
1406
1407
1408                                           CkPrintf("Steering found knob to turn\n");
1409                                           fflush(stdout);
1410
1411                                           if(info->first == ControlPoint::EFF_INC){
1412                                                   const int maxValue = controlPointSpace[cpName].second;
1413                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1414                                                   if(twoAgoValue+1 <= maxValue){
1415                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1416                                                   }
1417                                           } else {
1418                                                   const int minValue = controlPointSpace[cpName].second;
1419                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1420                                                   if(twoAgoValue-1 >= minValue){
1421                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1422                                                   }
1423                                           }
1424
1425                                           possibleNextStepPlans.push_back(aNewPlan);
1426
1427                                   }
1428
1429                           }
1430                   }
1431                   // ========================================= GPU Offload =============================================
1432                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1433                   {
1434                           double idleTime = twoAgoPhase->idleTime.avg;
1435                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1436                           fflush(stdout);
1437                           if(idleTime > 0.10){
1438                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1439                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1440
1441                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1442
1443                                   bool found = false;
1444                                   std::string cpName;
1445                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1446                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1447                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1448                                           cpName = iter->first;
1449                                           info = &iter->second;
1450
1451                                           // Initialize a new plan based on two phases ago
1452                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1453
1454
1455                                           CkPrintf("Steering found knob to turn\n");
1456                                           fflush(stdout);
1457
1458                                           if(info->first == ControlPoint::EFF_DEC){
1459                                                   const int maxValue = controlPointSpace[cpName].second;
1460                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1461                                                   if(twoAgoValue+1 <= maxValue){
1462                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1463                                                   }
1464                                           } else {
1465                                                   const int minValue = controlPointSpace[cpName].second;
1466                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1467                                                   if(twoAgoValue-1 >= minValue){
1468                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1469                                                   }
1470                                           }
1471
1472                                           possibleNextStepPlans.push_back(aNewPlan);
1473
1474                                   }
1475
1476                           }
1477                   }
1478
1479                   // ========================================= Done =============================================
1480
1481
1482                   if(possibleNextStepPlans.size() > 0){
1483                           newControlPoints = possibleNextStepPlans[0];
1484                   }
1485
1486
1487                   CkPrintf("Steering done for this phase\n");
1488                   fflush(stdout);
1489
1490           }  else {
1491                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1492                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1493                   fflush(stdout);
1494           }
1495
1496
1497
1498   } else if( whichTuningScheme == SimulatedAnnealing ) {
1499
1500     // -----------------------------------------------------------
1501     //  SIMULATED ANNEALING
1502     //  Simulated Annealing style hill climbing method
1503     //
1504     //  Find the best search space configuration, and try something
1505     //  nearby it, with a radius decreasing as phases increase
1506
1507     CkPrintf("Finding best phase\n");
1508     instrumentedPhase *bestPhase = allData.findBest();  
1509     CkPrintf("best found:\n"); 
1510     bestPhase->print(); 
1511     CkPrintf("\n"); 
1512     
1513
1514     const int convergeByPhase = 100;
1515     // Determine from 0.0 to 1.0 how far along we are
1516     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1517
1518     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1519     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1520       const std::string &name = cpsIter->first;
1521       const std::pair<int,int> &bounds = cpsIter->second;
1522       const int minValue = bounds.first;
1523       const int maxValue = bounds.second;
1524       
1525       const int before = bestPhase->controlPoints[name];   
1526   
1527       const int range = (maxValue-minValue+1)*(1.0-progress);
1528
1529       int high = min(before+range, maxValue);
1530       int low = max(before-range, minValue);
1531       
1532       newControlPoints[name] = low;
1533       if(high-low > 0){
1534         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1535       } 
1536       
1537     }
1538
1539   } else if ( whichTuningScheme == DivideAndConquer ) {
1540
1541           // -----------------------------------------------------------
1542           //  STEERING FOR Divide & Conquer Programs
1543           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1544           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1545           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1546
1547           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1548           // plans are only generated after 3 phases
1549
1550           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1551           instrumentedPhase *prevPhase = previousPhaseData();
1552
1553           if(phase_id%4 == 0){
1554                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1555                   twoAgoPhase->print();
1556                   CkPrintf("\n");
1557                   fflush(stdout);
1558
1559                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1560
1561
1562                   // ========================================= Concurrency =============================================
1563                   // See if idle time is high:
1564                   {
1565                           double idleTime = twoAgoPhase->idleTime.avg;
1566                           double overheadTime = twoAgoPhase->overheadTime.avg;
1567
1568
1569                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1570                           fflush(stdout);
1571                           if(idleTime+overheadTime > 0.10){
1572                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1573                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1574
1575                                   int direction = -1;
1576                                   if (idleTime>overheadTime){
1577                                           // We need to decrease the grain size, or increase the available concurrency
1578                                           direction = 1;
1579                                   }
1580
1581                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1582
1583                                   bool found = false;
1584                                   std::string cpName;
1585                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1586                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1587                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1588                                           cpName = iter->first;
1589                                           info = &iter->second;
1590                                           
1591                                           // Initialize a new plan based on two phases ago
1592                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1593                                           bool newPlanExists = false;
1594
1595                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1596                                           fflush(stdout);
1597
1598
1599
1600                                           if(info->first == ControlPoint::EFF_INC){
1601                                                   const int minValue = controlPointSpace[cpName].first;
1602                                                   const int maxValue = controlPointSpace[cpName].second;
1603                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1604                                                   const int newVal = twoAgoValue+1*direction;
1605                                                   if(newVal <= maxValue && newVal >= minValue){
1606                                                     aNewPlan[cpName] = newVal;
1607                                                     newPlanExists = true;
1608                                                   } else {
1609                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1610                                                   }
1611                                           } else {
1612                                                   const int minValue = controlPointSpace[cpName].first;
1613                                                   const int maxValue = controlPointSpace[cpName].second;
1614                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1615                                                   const int newVal = twoAgoValue-1*direction;
1616                                                   if(newVal <= maxValue && newVal >= minValue){
1617                                                           aNewPlan[cpName] = newVal;
1618                                                           newPlanExists = true;
1619                                                   } else {
1620                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1621                                                   }
1622                                           }
1623
1624                                           if(newPlanExists){
1625                                             possibleNextStepPlans.push_back(aNewPlan);
1626                                           }
1627                                   }
1628                           }
1629                   }
1630
1631                   if(possibleNextStepPlans.size() > 0){
1632                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1633                     newControlPoints = possibleNextStepPlans[0];
1634                   } else {
1635                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1636                   }
1637           }
1638
1639   } else if( whichTuningScheme == Simplex ) {
1640
1641           // -----------------------------------------------------------
1642           //  Nelder Mead Simplex Algorithm
1643           //
1644           //  A scheme that takes a simplex (n+1 points) and moves it
1645           //  toward the minimum, eventually converging there.
1646
1647           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1648
1649   } else if( whichTuningScheme == ExhaustiveSearch ) {
1650     
1651     // -----------------------------------------------------------
1652     // EXHAUSTIVE SEARCH
1653    
1654     int numDimensions = controlPointSpace.size();
1655     CkAssert(numDimensions > 0);
1656   
1657     vector<int> lowerBounds(numDimensions);
1658     vector<int> upperBounds(numDimensions); 
1659   
1660     int d=0;
1661     std::map<std::string, pair<int,int> >::iterator iter;
1662     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1663       //    CkPrintf("Examining dimension %d\n", d);
1664       lowerBounds[d] = iter->second.first;
1665       upperBounds[d] = iter->second.second;
1666       d++;
1667     }
1668    
1669     // get names for each dimension (control point)
1670     vector<std::string> names(numDimensions);
1671     d=0;
1672     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
1673       names[d] = niter->first;
1674       d++;
1675     }
1676   
1677   
1678     // Create the first possible configuration
1679     vector<int> config = lowerBounds;
1680     config.push_back(0);
1681   
1682     // Increment until finding an unused configuration
1683     allData.cleanupNames(); // put -1 values in for any control points missing
1684     std::vector<instrumentedPhase*> &phases = allData.phases;     
1685
1686     while(true){
1687     
1688       std::vector<instrumentedPhase*>::iterator piter; 
1689       bool testedConfiguration = false; 
1690       for(piter = phases.begin(); piter != phases.end(); piter++){ 
1691       
1692         // Test if the configuration matches this phase
1693         bool match = true;
1694         for(int j=0;j<numDimensions;j++){
1695           match &= (*piter)->controlPoints[names[j]] == config[j];
1696         }
1697       
1698         if(match){
1699           testedConfiguration = true; 
1700           break;
1701         } 
1702       } 
1703     
1704       if(testedConfiguration == false){ 
1705         break;
1706       } 
1707     
1708       // go to next configuration
1709       config[0] ++;
1710       // Propagate "carrys"
1711       for(int i=0;i<numDimensions;i++){
1712         if(config[i] > upperBounds[i]){
1713           config[i+1]++;
1714           config[i] = lowerBounds[i];
1715         }
1716       }
1717       // check if we have exhausted all possible values
1718       if(config[numDimensions] > 0){
1719         break;
1720       }
1721        
1722     }
1723   
1724     if(config[numDimensions] > 0){
1725       for(int i=0;i<numDimensions;i++){ 
1726         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
1727       }
1728     }
1729
1730     // results are now in config[i]
1731     
1732     for(int i=0; i<numDimensions; i++){
1733       newControlPoints[names[i]] = config[i];
1734       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
1735     }
1736
1737
1738   } else {
1739     CkAbort("Some Control Point tuning strategy must be enabled.\n");
1740   }
1741
1742
1743   const double endGenerateTime = CmiWallTimer();
1744   
1745   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
1746   
1747 }
1748
1749
1750
1751
1752 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
1753
1754
1755 /// Get control point value from range of integers [lb,ub]
1756 int controlPoint(const char *name, int lb, int ub){
1757   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
1758   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
1759   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
1760   int result;
1761
1762   // if we already have control point values for phase, return them
1763   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
1764     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
1765     return thisPhaseData->controlPoints[std::string(name)];
1766   }
1767   
1768
1769   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
1770     // For the first few phases, just use the lower bound, or the default if one was provided 
1771     // This ensures that the ranges for all the control points are known before we do anything fancy
1772     result = lb;
1773
1774     if(defaultControlPointValues.count(std::string(name)) > 0){
1775       int v = defaultControlPointValues[std::string(name)];
1776       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
1777       result = v;
1778     }
1779
1780   } else if(controlPointSpace.count(std::string(name)) == 0){
1781     // if this is the first time we've seen the CP, then return the lower bound
1782     result = lb;
1783     
1784   }  else {
1785     // Generate a plan one time for each phase
1786     controlPointManagerProxy.ckLocalBranch()->generatePlan();
1787     
1788     // Use the planned value:
1789     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
1790     
1791   }
1792
1793   CkAssert(isInRange(result,ub,lb));
1794   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
1795
1796   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
1797
1798   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
1799   //  thisPhaseData->print();
1800   
1801   return result;
1802 }
1803
1804
1805
1806
1807 /// Inform the control point framework that a named control point affects the priorities of some array  
1808 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
1809 //   CkGroupID aid = arraybase.ckGetArrayID();
1810 //   int groupIdx = aid.idx;
1811 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
1812 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
1813 // }
1814
1815
1816 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
1817 // void controlPointPriorityEntry(const char *name, int idx){
1818 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
1819 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
1820 // }
1821
1822
1823
1824
1825
1826 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
1827
1828     This function decomposes the algorithm into a state machine that allows it to
1829     evaluate one or more configurations through subsequent clls to this function.
1830     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
1831
1832     At one point in the algorithm, n+1 parameter configurations must be evaluated,
1833     so a list of them will be created and they will be evaluated, one per call.
1834
1835     Currently there is no stopping criteria, but the simplex ought to contract down
1836     to a few close configurations, and hence not much change will happen after this 
1837     point.
1838
1839  */
1840 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
1841
1842         if(useBestKnown){
1843                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
1844                 return;
1845         }
1846
1847
1848         if(firstSimplexPhase< 0){
1849                 firstSimplexPhase = allData.phases.size()-1;
1850                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
1851         }
1852
1853         int n = controlPointSpace.size();
1854
1855         CkAssert(n>=2);
1856
1857
1858         if(simplexState == beginning){
1859                 // First we evaluate n+1 random points, then we go to a different state
1860                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
1861                         CkPrintf("Simplex Tuning: chose random configuration\n");
1862
1863                         // Choose random values from the middle third of the range in each dimension
1864                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1865                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1866                                 const std::string &name = cpsIter->first;
1867                                 const std::pair<int,int> &bounds = cpsIter->second;
1868                                 const int lb = bounds.first;
1869                                 const int ub = bounds.second;
1870                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1871                         }
1872                 } else {
1873                         // Set initial simplex:
1874                         for(int i=0; i<n+1; i++){
1875                                 simplexIndices.insert(firstSimplexPhase+i);
1876                         }
1877                         CkAssert(simplexIndices.size() == n+1);
1878
1879                         // Transition to reflecting state
1880                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1881
1882                 }
1883
1884         } else if (simplexState == reflecting){
1885                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1886                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1887
1888                 // Find the highest time from other points in the simplex
1889                 double highestTimeForOthersInSimplex = 0.0;
1890                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
1891                         double t = allData.phases[*iter]->medianTime();
1892                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
1893                                 highestTimeForOthersInSimplex = t;
1894                         }
1895                 }
1896
1897                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
1898
1899                 if(recentPhaseTime < highestTimeForOthersInSimplex){
1900                         // if y* < yl,  transition to "expanding"
1901                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
1902
1903                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
1904                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
1905                         CkAssert(simplexIndices.size() == n+1);
1906                         simplexIndices.erase(worstPhase);
1907                         CkAssert(simplexIndices.size() == n);
1908                         simplexIndices.insert(pPhase);
1909                         CkAssert(simplexIndices.size() == n+1);
1910                         // Transition to reflecting state
1911                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1912
1913                 } else {
1914                         // if y* > yh
1915                         if(recentPhaseTime <= worstTime){
1916                                 // replace Ph with P*
1917                                 CkAssert(simplexIndices.size() == n+1);
1918                                 simplexIndices.erase(worstPhase);
1919                                 CkAssert(simplexIndices.size() == n);
1920                                 simplexIndices.insert(pPhase);
1921                                 CkAssert(simplexIndices.size() == n+1);
1922                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
1923                                 worstPhase = pPhase;
1924                                 // Just as a sanity check, make sure we don't use the non-updated values.
1925                                 worst.clear();
1926                         }
1927
1928                         // Now, form P** and do contracting phase
1929                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
1930
1931                 }
1932
1933         } else if (simplexState == doneExpanding){
1934                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1935                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1936                 // A new configuration has been evaluated
1937
1938                 // Check to see if y** < y1
1939                 if(recentPhaseTime < bestTime){
1940                         // replace Ph by P**
1941                         CkAssert(simplexIndices.size() == n+1);
1942                         simplexIndices.erase(worstPhase);
1943                         CkAssert(simplexIndices.size() == n);
1944                         simplexIndices.insert(p2Phase);
1945                         CkAssert(simplexIndices.size() == n+1);
1946                 } else {
1947                         //      replace Ph by P*
1948                         CkAssert(simplexIndices.size() == n+1);
1949                         simplexIndices.erase(worstPhase);
1950                         CkAssert(simplexIndices.size() == n);
1951                         simplexIndices.insert(pPhase);
1952                         CkAssert(simplexIndices.size() == n+1);
1953                 }
1954
1955                 // Transition to reflecting state
1956                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1957
1958         }  else if (simplexState == contracting){
1959                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1960                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1961                 // A new configuration has been evaluated
1962
1963                 // Check to see if y** > yh
1964                 if(recentPhaseTime <= worstTime){
1965                         // replace Ph by P**
1966                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
1967                         CkAssert(simplexIndices.size() == n+1);
1968                         simplexIndices.erase(worstPhase);
1969                         CkAssert(simplexIndices.size() == n);
1970                         simplexIndices.insert(p2Phase);
1971                         CkAssert(simplexIndices.size() == n+1);
1972                         // Transition to reflecting state
1973                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1974
1975                 } else {
1976                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
1977                         simplexState = stillContracting;
1978
1979                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
1980                         stillMustContractList = simplexIndices;
1981
1982                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
1983                 }
1984
1985         } else if (simplexState == stillContracting){
1986                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
1987
1988                 if(stillMustContractList.size()>0){
1989                         int c = *stillMustContractList.begin();
1990                         stillMustContractList.erase(c);
1991                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
1992
1993                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
1994
1995                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
1996                         int v = 0;
1997                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1998                                 const std::string &name = cpsIter->first;
1999                                 const std::pair<int,int> &bounds = cpsIter->second;
2000                                 const int lb = bounds.first;
2001                                 const int ub = bounds.second;
2002
2003                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2004
2005                                 newControlPoints[name] = keepInRange(val,lb,ub);
2006                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2007                                 v++;
2008                         }
2009                 } else {
2010                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2011                         CkAssert(stillMustContractList.size() == 0);
2012                         simplexIndices.clear();
2013                         CkAssert(simplexIndices.size()==0);
2014                         for(int i=0; i<n+1; i++){
2015                                 simplexIndices.insert(allData.phases.size()-2-i);
2016                         }
2017                         CkAssert(simplexIndices.size()==n+1);
2018
2019                         // Transition to reflecting state
2020                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2021
2022                 }
2023
2024
2025         } else if (simplexState == expanding){
2026                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2027                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2028                 // A new configuration has been evaluated
2029
2030                 // determine if y** < yl
2031                 if(recentPhaseTime < bestTime){
2032                         // replace Ph by P**
2033                         CkAssert(simplexIndices.size() == n+1);
2034                         simplexIndices.erase(worstPhase);
2035                         CkAssert(simplexIndices.size() == n);
2036                         simplexIndices.insert(p2Phase);
2037                         CkAssert(simplexIndices.size() == n+1);
2038                         // Transition to reflecting state
2039                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2040
2041                 } else {
2042                         // else, replace ph with p*
2043                         CkAssert(simplexIndices.size() == n+1);
2044                         simplexIndices.erase(worstPhase);
2045                         CkAssert(simplexIndices.size() == n);
2046                         simplexIndices.insert(pPhase);
2047                         CkAssert(simplexIndices.size() == n+1);
2048                         // Transition to reflecting state
2049                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2050                 }
2051
2052
2053         } else {
2054                 CkAbort("Unknown simplexState");
2055         }
2056
2057 }
2058
2059
2060
2061 /** Replace the worst point with its reflection across the centroid. */
2062 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2063
2064         int n = controlPointSpace.size();
2065
2066         printSimplex(allData);
2067
2068         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2069
2070
2071         // Quit if the diameter of our simplex is small
2072         double maxr = 0.0;
2073         for(int i=0; i<n+1; i++){
2074                 //              Compute r^2 of this simplex point from the centroid
2075                 double r2 = 0.0;
2076                 std::vector<double> p = pointCoords(allData, i);
2077                 for(int d=0; d<p.size(); d++){
2078                         double r1 = (p[d] * centroid[d]);
2079                         r2 += r1*r1;
2080                 }
2081                 if(r2 > maxr)
2082                         maxr = r2;
2083         }
2084
2085 #if 0
2086         // At some point we quit this tuning
2087         if(maxr < 10){
2088                 useBestKnown = true;
2089                 instrumentedPhase *best = allData.findBest();
2090                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2091
2092                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2093                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2094                         const std::string &name = cpsIter->first;
2095                         newControlPoints[name] =  best->controlPoints[name];
2096                 }
2097         }
2098 #endif
2099
2100         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2101
2102         pPhase = allData.phases.size()-1;
2103         P.resize(n);
2104         for(int i=0; i<n; i++){
2105                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2106         }
2107
2108         for(int i=0; i<P.size(); i++){
2109                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2110         }
2111
2112
2113         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2114         int v = 0;
2115         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2116                 const std::string &name = cpsIter->first;
2117                 const std::pair<int,int> &bounds = cpsIter->second;
2118                 const int lb = bounds.first;
2119                 const int ub = bounds.second;
2120                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2121                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2122                 v++;
2123         }
2124
2125
2126         // Transition to "reflecting" state
2127         simplexState = reflecting;
2128         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2129
2130 }
2131
2132
2133
2134
2135 /** Replace the newly tested reflection with a further expanded version of itself. */
2136 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2137         int n = controlPointSpace.size();
2138         printSimplex(allData);
2139
2140         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2141         // I believe the equation for P** in the text on page 308 is correct.
2142
2143         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2144
2145
2146         p2Phase = allData.phases.size()-1;
2147         P2.resize(n);
2148         for(int i=0; i<n; i++){
2149                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2150         }
2151
2152         for(int i=0; i<P2.size(); i++){
2153                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2154         }
2155
2156
2157         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2158         int v = 0;
2159         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2160                 const std::string &name = cpsIter->first;
2161                 const std::pair<int,int> &bounds = cpsIter->second;
2162                 const int lb = bounds.first;
2163                 const int ub = bounds.second;
2164                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2165                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2166                 v++;
2167         }
2168
2169
2170         // Transition to "doneExpanding" state
2171         simplexState = doneExpanding;
2172         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2173
2174 }
2175
2176
2177
2178
2179 /** Replace the newly tested reflection with a further expanded version of itself. */
2180 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2181         int n = controlPointSpace.size();
2182         printSimplex(allData);
2183
2184         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2185
2186
2187         p2Phase = allData.phases.size()-1;
2188         P2.resize(n);
2189         for(int i=0; i<n; i++){
2190                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2191         }
2192
2193         for(int i=0; i<P2.size(); i++){
2194                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2195         }
2196
2197
2198         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2199         int v = 0;
2200         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2201                 const std::string &name = cpsIter->first;
2202                 const std::pair<int,int> &bounds = cpsIter->second;
2203                 const int lb = bounds.first;
2204                 const int ub = bounds.second;
2205                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2206                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2207                 v++;
2208         }
2209
2210
2211         // Transition to "contracting" state
2212         simplexState = contracting;
2213         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2214
2215 }
2216
2217
2218
2219
2220 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2221         int n = controlPointSpace.size();
2222
2223         // Find worst performing point in the simplex
2224         worstPhase = -1;
2225         worstTime = -1.0;
2226         bestPhase = 10000000;
2227         bestTime = 10000000;
2228         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2229                 double t = allData.phases[*iter]->medianTime();
2230                 if(t > worstTime){
2231                         worstTime = t;
2232                         worstPhase = *iter;
2233                 }
2234                 if(t < bestTime){
2235                         bestTime = t;
2236                         bestPhase = *iter;
2237                 }
2238         }
2239         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2240
2241         best = pointCoords(allData, bestPhase);
2242         CkAssert(best.size() == n);
2243
2244         worst = pointCoords(allData, worstPhase);
2245         CkAssert(worst.size() == n);
2246
2247         // Calculate centroid of the remaining points in the simplex
2248         centroid.resize(n);
2249         for(int i=0; i<n; i++){
2250                 centroid[i] = 0.0;
2251         }
2252
2253         int numPts = 0;
2254
2255         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2256                 if(*iter != worstPhase){
2257                         numPts ++;
2258                         // Accumulate into the result vector
2259                         int c = 0;
2260                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2261                                 centroid[c] += citer->second;
2262                                 c++;
2263                         }
2264
2265                 }
2266         }
2267
2268         // Now divide the sums by the number of points.
2269         for(int v = 0; v<centroid.size(); v++) {
2270                 centroid[v] /= (double)numPts;
2271         }
2272
2273         CkAssert(centroid.size() == n);
2274
2275         for(int i=0; i<centroid.size(); i++){
2276                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2277         }
2278
2279
2280 }
2281
2282
2283
2284 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2285         std::vector<double> result;
2286         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2287                 result.push_back((double)citer->second);
2288         }
2289         return result;
2290 }
2291
2292
2293
2294
2295 void ControlPointWriteOutputToDisk(){
2296         CkAssert(CkMyPe() == 0);
2297         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2298 }
2299
2300
2301
2302 /*! @} */
2303
2304 #ifdef CP_DISABLE_TRACING
2305 #include "ControlPointsNoTrace.def.h"
2306 #else
2307 #include "ControlPoints.def.h"
2308 #endif