Merge branch 'charm' of charmgit:charm into charm
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14
15 #include <climits>
16 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
17  
18
19 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
20 /**  @{ */
21
22 using namespace std;
23
24 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
25
26
27 //#undef DEBUGPRINT
28 //#define DEBUGPRINT 4
29
30 static void periodicProcessControlPoints(void* ptr, double currWallTime);
31
32
33 // A pointer to this PE's controlpoint manager Proxy
34 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
35 /* readonly */ int random_seed;
36 /* readonly */ long controlPointSamplePeriod;
37 /* readonly */ int whichTuningScheme;
38 /* readonly */ bool writeDataFileAtShutdown;
39 /* readonly */ bool shouldFilterOutputData;
40 /* readonly */ bool loadDataFileAtStartup;
41 /* readonly */ bool shouldGatherMemoryUsage;
42 /* readonly */ bool shouldGatherUtilization;
43 /* readonly */ bool shouldGatherAll;
44 /* readonly */ char CPDataFilename[512];
45
46
47
48 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
49 /// These probably come from the command line arguments, so are available only on PE 0
50 std::map<std::string, int> defaultControlPointValues;
51
52
53
54 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults}  tuningScheme;
55
56
57
58 void printTuningScheme(){
59   switch(whichTuningScheme){
60   case RandomSelection:
61     CkPrintf("Tuning Scheme: RandomSelection\n");
62     break;
63   case AlwaysDefaults:
64     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
65     break;
66   case SimulatedAnnealing:
67     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
68     break;
69   case ExhaustiveSearch:
70     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
71     break;
72   case CriticalPathAutoPrioritization:
73     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
74     break;
75   case UseBestKnownTiming:
76     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
77     break;
78   case UseSteering:
79     CkPrintf("Tuning Scheme: UseSteering\n");
80     break;
81   case MemoryAware:
82     CkPrintf("Tuning Scheme: MemoryAware\n");
83     break;
84   case Simplex:
85     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
86     break;
87   case DivideAndConquer:
88     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
89     break;
90   default:
91     CkPrintf("Unknown tuning scheme\n");
92     break;
93   }
94   fflush(stdout);
95 }
96
97
98
99 /// A reduction type that combines idle time measurements (min/sum/max etc.)
100 CkReduction::reducerType idleTimeReductionType;
101 /// A reducer that combines idle time measurements (min/sum/max etc.)
102 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
103   double ret[3];
104   if(nMsg > 0){
105     CkAssert(msgs[0]->getSize()==3*sizeof(double));
106     double *m=(double *)msgs[0]->getData();
107     ret[0]=m[0];
108     ret[1]=m[1];
109     ret[2]=m[2];
110   }
111   for (int i=1;i<nMsg;i++) {
112     CkAssert(msgs[i]->getSize()==3*sizeof(double));
113     double *m=(double *)msgs[i]->getData();
114     ret[0]=min(ret[0],m[0]);
115     ret[1]+=m[1];
116     ret[2]=max(ret[2],m[2]);
117   }  
118   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
119 }
120
121
122
123 /// A reduction type that combines idle, overhead, and memory measurements
124 CkReduction::reducerType allMeasuresReductionType;
125 /// A reducer that combines idle, overhead, and memory measurements
126 #define ALL_REDUCTION_SIZE 12
127 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
128   double ret[ALL_REDUCTION_SIZE];
129   if(nMsg > 0){
130     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
131     double *m=(double *)msgs[0]->getData();
132     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
133   }
134   for (int i=1;i<nMsg;i++) {
135     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
136     double *m=(double *)msgs[i]->getData();
137     // idle time (min/sum/max)
138     ret[0]=min(ret[0],m[0]);
139     ret[1]+=m[1];
140     ret[2]=max(ret[2],m[2]);
141     // overhead time (min/sum/max)
142     ret[3]=min(ret[3],m[3]);
143     ret[4]+=m[4];
144     ret[5]=max(ret[5],m[5]);
145     // mem usage (max)
146     ret[6]=max(ret[6],m[6]);
147     // bytes per invocation for two types of entry methods
148     ret[7]+=m[7];
149     ret[8]+=m[8];
150     ret[9]+=m[9];
151     ret[10]+=m[10];
152     // Grain size (avg)
153     ret[11]+=m[11];
154   }  
155   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
156 }
157
158
159 /// Registers the control point framework's reduction handlers at startup on each PE
160 /*initproc*/ void registerCPReductions(void) {
161   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
162   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
163 }
164
165
166
167
168
169
170 /// Return an integer between 0 and num-1 inclusive
171 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
172 unsigned int randInt(unsigned int num, const char* name, int seed=0){
173   CkAssert(num > 0);
174   CkAssert(num < 1000);
175
176   unsigned long hash = 0;
177   unsigned int c;
178   unsigned char * str = (unsigned char*)name;
179
180   while (c = *str++){
181     unsigned int c2 = (c+64)%128;
182     unsigned int c3 = (c2*5953)%127;
183     hash = c3 + (hash << 6) + (hash << 16) - hash;
184   }
185
186   unsigned long shuffled1 = (hash*2083)%7907;
187   unsigned long shuffled2 = (seed*4297)%2017;
188
189   unsigned long shuffled3 = (random_seed*4799)%7919;
190
191   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
192
193   unsigned int result = ((namehash * 6029) % 1117) % num;
194
195   CkAssert(result >=0 && result < num);
196   return result;
197 }
198
199
200
201 controlPointManager::controlPointManager() {
202   generatedPlanForStep = -1;
203
204     exitWhenReady = false;
205     alreadyRequestedMemoryUsage = false;   
206     alreadyRequestedIdleTime = false;
207     alreadyRequestedAll = false;
208     
209     instrumentedPhase * newPhase = new instrumentedPhase();
210     allData.phases.push_back(newPhase);   
211     
212     frameworkShouldAdvancePhase = false;
213     haveControlPointChangeCallback = false;
214 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
215     
216     ControlPoint::initControlPointEffects();
217
218     phase_id = 0;
219
220     if(loadDataFileAtStartup){    
221       loadDataFile();
222     }
223
224     
225     if(CkMyPe() == 0){
226       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
227     }
228
229     traceRegisterUserEvent("No currently executing message", 5000);
230     traceRegisterUserEvent("Zero time along critical path", 5010);
231     traceRegisterUserEvent("Positive total time along critical path", 5020);
232     traceRegisterUserEvent("env->setPathHistory()", 6000);
233     traceRegisterUserEvent("Critical Path", 5900);
234     traceRegisterUserEvent("Table Entry", 5901);
235
236
237 #if USER_EVENT_FOR_REGISTERTERMINALPATH
238     traceRegisterUserEvent("registerTerminalPath", 100); 
239 #endif
240
241   }
242   
243
244  controlPointManager::~controlPointManager(){
245 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
246   }
247
248
249   /// Loads the previous run data file
250   void controlPointManager::loadDataFile(){
251     ifstream infile(CPDataFilename);
252     vector<std::string> names;
253     std::string line;
254   
255     while(getline(infile,line)){
256       if(line[0] != '#')
257         break;
258     }
259   
260     int numTimings = 0;
261     std::istringstream n(line);
262     n >> numTimings;
263   
264     while(getline(infile,line)){ 
265       if(line[0] != '#') 
266         break; 
267     }
268
269     int numControlPointNames = 0;
270     std::istringstream n2(line);
271     n2 >> numControlPointNames;
272   
273     for(int i=0; i<numControlPointNames; i++){
274       getline(infile,line);
275       names.push_back(line);
276     }
277
278     for(int i=0;i<numTimings;i++){
279       getline(infile,line);
280       while(line[0] == '#')
281         getline(infile,line); 
282
283       instrumentedPhase * ips = new instrumentedPhase();
284
285       std::istringstream iss(line);
286
287       // Read memory usage for phase
288       iss >> ips->memoryUsageMB;
289       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
290
291       // Read idle time
292       iss >> ips->idleTime.min;
293       iss >> ips->idleTime.avg;
294       iss >> ips->idleTime.max;
295
296       // Read overhead time
297       iss >> ips->overheadTime.min;
298       iss >> ips->overheadTime.avg;
299       iss >> ips->overheadTime.max;
300
301       // read bytePerInvoke
302       iss >> ips->bytesPerInvoke;
303
304       // read grain size
305       iss >> ips->grainSize;
306
307       // Read control point values
308       for(int cp=0;cp<numControlPointNames;cp++){
309         int cpvalue;
310         iss >> cpvalue;
311         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
312       }
313
314       // ignore median time
315       double mt;
316       iss >> mt;
317
318       // Read all times
319
320       double time;
321
322       while(iss >> time){
323         ips->times.push_back(time);
324 #if DEBUGPRINT > 5
325         CkPrintf("read time %lf from file\n", time);
326 #endif
327       }
328
329       allData.phases.push_back(ips);
330
331     }
332
333     infile.close();
334   }
335
336
337
338   /// Add the current data to allData and output it to a file
339   void controlPointManager::writeDataFile(){
340     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
341     ofstream outfile(CPDataFilename);
342     allData.cleanupNames();
343
344 //    string s = allData.toString();
345 //    CkPrintf("At end: \n %s\n", s.c_str());
346
347     if(shouldFilterOutputData){
348       allData.verify();
349       allData.filterOutIncompletePhases();
350     }
351
352 //    string s2 = allData.toString();
353 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
354     if(allData.toString().length() > 10){
355       outfile << allData.toString();
356     } else {
357       outfile << " No data available to save to disk " << endl;
358     }
359     outfile.close();
360   }
361
362   /// User can register a callback that is called when application should advance to next phase
363   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
364     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
365     controlPointChangeCallback = cb;
366     haveControlPointChangeCallback = true;
367   }
368
369
370 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
371 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
372   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
373 }
374
375   /// Called periodically by the runtime to handle the control points
376   /// Currently called on each PE
377   void controlPointManager::processControlPoints(){
378
379     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
380
381
382     //==========================================================================================
383     // Print the data for each phase
384
385     const int s = allData.phases.size();
386
387 #if DEBUGPRINT
388     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
389     for(int p=0;p<s;++p){
390       const instrumentedPhase &phase = allData.phases[p];
391       const idleTimeContainer &idle = phase.idleTime;
392       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
393       vector<double> const &times = phase.times;
394
395       CkPrintf("Phase %d:\n", p);
396       idle.print();
397      //   CkPrintf("critical paths: (* affected by control point)\n");
398         //   for(int i=0;i<criticalPaths.size(); i++){
399         // If affected by a control point
400         //      criticalPaths[i].print();
401
402       //        criticalPaths[i].print();
403       //        CkPrintf("\n");
404         
405
406       //   }
407       CkPrintf("Timings:\n");
408       for(int i=0;i<times.size(); i++){
409         CkPrintf("%lf ", times[i]);
410       }
411       CkPrintf("\n");
412
413     }
414     
415     CkPrintf("\n\n");
416
417
418 #endif
419
420
421
422     //==========================================================================================
423     // If this is a phase during which we try to adapt control point values based on critical path
424 #if 0
425     if( s%5 == 4) {
426
427       // Find the most recent phase with valid critical path data and idle time measurements      
428       int whichPhase=-1;
429       for(int p=0; p<s; ++p){
430         const instrumentedPhase &phase = allData.phases[p];
431         const idleTimeContainer &idle = phase.idleTime;
432         if(idle.isValid() && phase.criticalPaths.size()>0 ){
433           whichPhase = p;
434         }
435       }
436       
437       
438       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
439       const instrumentedPhase &phase = allData.phases[whichPhase];
440       const idleTimeContainer &idle = phase.idleTime;
441       
442       if(idle.min > 0.1){
443         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
444         
445         // Determine the median critical path for this phase
446         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
447         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
448         CkAssert(phase.criticalPaths.size() > 0);
449         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
450         
451         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
452         
453         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
454           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
455
456
457           CkPrintf("Finding control points related to the critical path\n");
458           int cpcount = 0;
459           std::set<std::string> controlPointsAffectingCriticalPath;
460
461           
462           for(int e=0;e<path.getNumUsed();e++){
463             if(path.getUsedCount(e)>0){
464               int ep = path.getUsedEp(e);
465
466               std::map<std::string, std::set<int> >::iterator iter;
467               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
468                 if(iter->second.count(ep)>0){
469                   controlPointsAffectingCriticalPath.insert(iter->first);
470                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
471                   cpcount++;
472                 }
473               }
474               
475             }
476           }
477           
478
479           if(cpcount==0){
480             CkPrintf("No control points are known to affect the critical path\n");
481           } else {
482             double beginTime = CmiWallTimer();
483
484             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
485             
486             newControlPoints = phase.controlPoints;
487             
488             if(frameworkShouldAdvancePhase){
489               gotoNextPhase();  
490             }
491             
492             if(haveControlPointChangeCallback){ 
493 #if DEBUGPRINT
494               CkPrintf("Calling control point change callback\n");
495 #endif
496               // Create a high priority message and send it to the callback
497               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
498               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
499               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
500               controlPointChangeCallback.send(msg);
501               
502             }
503             
504             
505             // adjust the control points that can affect the critical path
506
507             char textDescription[4096*2];
508             textDescription[0] = '\0';
509
510             std::map<std::string,int>::iterator newCP;
511             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
512               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
513                 // decrease the value (increase priority) if within range
514                 int lowerbound = controlPointSpace[newCP->first].first;
515                 if(newCP->second > lowerbound){
516                   newControlPointsAvailable = true;
517                   newCP->second --;
518                 }
519               }
520             }
521            
522             // Create a string for a projections user event
523             if(1){
524               std::map<std::string,int>::iterator newCP;
525               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
526                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
527               }
528             }
529
530             char *userEventString = new char[4096*2];
531             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
532             
533             static int userEventCounter = 20000;
534             CkPrintf("USER EVENT: %s\n", userEventString);
535             
536             traceRegisterUserEvent(userEventString, userEventCounter); 
537             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
538             userEventCounter++;
539             
540           }
541           
542         } else {
543           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
544         }
545         
546         
547       }
548       
549     } else {
550       
551       
552     }
553    
554 #endif
555
556
557
558     if(frameworkShouldAdvancePhase){
559       gotoNextPhase();  
560     }
561     
562     if(haveControlPointChangeCallback){ 
563       // Create a high priority message and send it to the callback
564       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
565       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
566       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
567       controlPointChangeCallback.send(msg);
568     }
569     
570   }
571   
572   /// Determine if any control point is known to affect an entry method
573   bool controlPointManager::controlPointAffectsThisEP(int ep){
574     std::map<std::string, std::set<int> >::iterator iter;
575     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
576       if(iter->second.count(ep)>0){
577         return true;
578       }
579     }
580     return false;    
581   }
582   
583   /// Determine if any control point is known to affect a chare array  
584   bool controlPointManager::controlPointAffectsThisArray(int array){
585     std::map<std::string, std::set<int> >::iterator iter;
586     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
587       if(iter->second.count(array)>0){
588         return true;
589       }
590     }
591     return false;   
592   }
593   
594
595   /// The data from the current phase
596   instrumentedPhase * controlPointManager::currentPhaseData(){
597     int s = allData.phases.size();
598     CkAssert(s>=1);
599     return allData.phases[s-1];
600   }
601  
602
603   /// The data from the previous phase
604   instrumentedPhase * controlPointManager::previousPhaseData(){
605     int s = allData.phases.size();
606     if(s >= 2 && phase_id > 0) {
607       return allData.phases[s-2];
608     } else {
609       return NULL;
610     }
611   }
612  
613   /// The data from two phases back
614   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
615     int s = allData.phases.size();
616     if(s >= 3 && phase_id > 0) {
617       return allData.phases[s-3];
618     } else {
619       return NULL;
620     }
621   }
622   
623
624   /// Called by either the application or the Control Point Framework to advance to the next phase  
625   void controlPointManager::gotoNextPhase(){
626     
627 #ifndef CP_DISABLE_TRACING
628     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
629     fflush(stdout);
630
631     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
632       alreadyRequestedAll = true;
633       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
634       CkPrintf("Requesting all measurements\n");
635       thisProxy.requestAll(*cb);
636       delete cb;
637     
638     } else {
639       
640       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
641         alreadyRequestedMemoryUsage = true;
642         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
643         thisProxy.requestMemoryUsage(*cb);
644         delete cb;
645       }
646       
647       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
648         alreadyRequestedIdleTime = true;
649         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
650         thisProxy.requestIdleTime(*cb);
651         delete cb;
652       }
653     }
654     
655
656 #endif
657
658
659
660 #if CMK_LBDB_ON && 0
661     LBDatabase * myLBdatabase = LBDatabaseObj();
662     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
663     const CkVec<LBObj*> objs = myLBDB->getObjs();
664     const int objCount = myLBDB->getObjCount();
665     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
666     
667     floatType maxObjWallTime = -1.0;
668     
669     for(int i=0;i<objs.length();i++){
670       LBObj* o = objs[i];
671       const LDObjData d = o->ObjData();
672       floatType cpuTime = d.cpuTime;
673       floatType wallTime = d.wallTime;
674       // can also get object handles from the LDObjData struct
675       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
676       if(wallTime > maxObjWallTime){
677
678       }
679       
680     }
681
682     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
683     
684 #endif    
685
686
687     
688     // increment phase id
689     phase_id++;
690     
691
692     // Create new entry for the phase we are starting now
693     instrumentedPhase * newPhase = new instrumentedPhase();
694     allData.phases.push_back(newPhase);
695     
696     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
697
698   }
699
700   /// An application uses this to register an instrumented timing for this phase
701   void controlPointManager::setTiming(double time){
702     currentPhaseData()->times.push_back(time);
703
704 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
705        
706     // First we should register this currently executing message as a path, because it is likely an important one to consider.
707     //    registerTerminalEntryMethod();
708     
709     // save the critical path for this phase
710     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
711     //    maxTerminalPathHistory.reset();
712     
713     
714     // Reset the counts for the currently executing message
715     //resetThisEntryPath();
716         
717 #endif
718     
719   }
720   
721   /// Entry method called on all PEs to request CPU utilization statistics
722   void controlPointManager::requestIdleTime(CkCallback cb){
723 #ifndef CP_DISABLE_TRACING
724     double i = localControlPointTracingInstance()->idleRatio();
725     double idle[3];
726     idle[0] = i;
727     idle[1] = i;
728     idle[2] = i;
729     
730     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
731     
732     localControlPointTracingInstance()->resetTimings();
733
734     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
735 #else
736     CkAbort("Should not get here\n");
737 #endif
738   }
739   
740   /// All processors reduce their memory usages in requestIdleTime() to this method
741   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
742 #ifndef CP_DISABLE_TRACING
743     int size=msg->getSize() / sizeof(double);
744     CkAssert(size==3);
745     double *r=(double *) msg->getData();
746         
747     instrumentedPhase* prevPhase = previousPhaseData();
748     if(prevPhase != NULL){
749       prevPhase->idleTime.min = r[0];
750       prevPhase->idleTime.avg = r[1]/CkNumPes();
751       prevPhase->idleTime.max = r[2];
752       prevPhase->idleTime.print();
753       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
754     } else {
755       CkPrintf("There is no previous phase to store the idle time measurements\n");
756     }
757     
758     alreadyRequestedIdleTime = false;
759     checkForShutdown();
760     delete msg;
761 #else
762     CkAbort("Should not get here\n");
763 #endif
764   }
765
766
767
768
769
770
771   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
772   void controlPointManager::requestAll(CkCallback cb){
773 #ifndef CP_DISABLE_TRACING
774     TraceControlPoints *t = localControlPointTracingInstance();
775
776     double data[ALL_REDUCTION_SIZE];
777
778     double *idle = data;
779     double *over = data+3;
780     double *mem = data+6;
781     double *msgBytes = data+7;  
782     double *grainsize = data+11;  
783
784     const double i = t->idleRatio();
785     idle[0] = i;
786     idle[1] = i;
787     idle[2] = i;
788
789     const double o = t->overheadRatio();
790     over[0] = o;
791     over[1] = o;
792     over[2] = o;
793
794     const double m = t->memoryUsageMB();
795     mem[0] = m;
796
797     msgBytes[0] = t->b2;
798     msgBytes[1] = t->b3;
799     msgBytes[2] = t->b2mlen;
800     msgBytes[3] = t->b3mlen;
801
802     grainsize[0] = t->grainSize();
803     
804     localControlPointTracingInstance()->resetAll();
805
806     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
807 #else
808     CkAbort("Should not get here\n");
809 #endif
810   }
811   
812   /// All processors reduce their memory usages in requestIdleTime() to this method
813   void controlPointManager::gatherAll(CkReductionMsg *msg){
814 #ifndef CP_DISABLE_TRACING
815     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
816     int size=msg->getSize() / sizeof(double);
817     double *data=(double *) msg->getData();
818         
819     double *idle = data;
820     double *over = data+3;
821     double *mem = data+6;
822     double *msgBytes = data+7;
823     double *granularity = data+11;
824
825
826     //    std::string b = allData.toString();
827
828     instrumentedPhase* prevPhase = previousPhaseData();
829     if(prevPhase != NULL){
830       prevPhase->idleTime.min = idle[0];
831       prevPhase->idleTime.avg = idle[1]/CkNumPes();
832       prevPhase->idleTime.max = idle[2];
833       
834       prevPhase->overheadTime.min = over[0];
835       prevPhase->overheadTime.avg = over[1]/CkNumPes();
836       prevPhase->overheadTime.max = over[2];
837       
838       prevPhase->memoryUsageMB = mem[0];
839       
840       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
841
842       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
843       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
844
845       /** The average of the grain sizes on all PEs in us */
846       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
847
848       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
849       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
850
851       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
852       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
853
854       if(bytesPerInvoke2 > bytesPerInvoke3)
855         prevPhase->bytesPerInvoke = bytesPerInvoke2;
856       else
857         prevPhase->bytesPerInvoke = bytesPerInvoke3;
858
859       //      prevPhase->print();
860       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
861
862       //      std::string a = allData.toString();
863
864       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
865       
866     } else {
867       CkPrintf("There is no previous phase to store measurements\n");
868     }
869     
870     alreadyRequestedAll = false;
871     checkForShutdown();
872     delete msg;
873 #else
874     CkAbort("Should not get here\n");
875 #endif
876   }
877
878
879
880
881   void controlPointManager::checkForShutdown(){
882     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
883       doExitNow();
884     }
885   }
886
887
888   void controlPointManager::exitIfReady(){
889      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
890        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
891        doExitNow();
892      } else {
893        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
894        exitWhenReady = true;
895      }
896   }
897
898
899
900   void controlPointManager::doExitNow(){
901           writeOutputToDisk();
902           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
903           CkExit();
904   }
905
906   void controlPointManager::writeOutputToDisk(){
907           if(writeDataFileAtShutdown){
908                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
909           }
910   }
911
912
913   /// Entry method called on all PEs to request memory usage
914   void controlPointManager::requestMemoryUsage(CkCallback cb){
915     int m = CmiMaxMemoryUsage() / 1024 / 1024;
916     CmiResetMaxMemory();
917     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
918     contribute(sizeof(int),&m,CkReduction::max_int, cb);
919   }
920
921   /// All processors reduce their memory usages to this method
922   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
923     int size=msg->getSize() / sizeof(int);
924     CkAssert(size==1);
925     int *m=(int *) msg->getData();
926
927     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
928     
929     instrumentedPhase* prevPhase = previousPhaseData();
930     if(prevPhase != NULL){
931       prevPhase->memoryUsageMB = m[0];
932     } else {
933       CkPrintf("No place to store memory usage");
934     }
935
936     alreadyRequestedMemoryUsage = false;
937     checkForShutdown();
938     delete msg;
939   }
940
941
942 //   /// Inform the control point framework that a named control point affects the priorities of some array  
943 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
944 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
945     
946 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
947 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
948 //     } else {
949 //       std::set<int> s;
950 //       s.insert(groupIdx);
951 //       affectsPrioritiesArray[std::string(name)] = s;
952 //     }
953     
954 // #if DEBUGPRINT   
955 //     std::map<std::string, std::set<int> >::iterator f;
956 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
957 //       std::string name = f->first;
958 //       std::set<int> &vals = f->second;
959 //       cout << "Control point " << name << " affects arrays: ";
960 //       std::set<int>::iterator i;
961 //       for(i=vals.begin(); i!=vals.end();++i){
962 //      cout << *i << " ";
963 //       }
964 //       cout << endl;
965 //     }
966 // #endif
967     
968 //   }
969   
970 //   /// Inform the control point framework that a named control point affects the priority of some entry method
971 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
972 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
973
974 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
975 //       affectsPrioritiesEP[std::string(name)].insert(idx);
976 //     } else {
977 //       std::set<int> s;
978 //       s.insert(idx);
979 //       affectsPrioritiesEP[std::string(name)] = s;
980 //     }
981     
982 // #if DEBUGPRINT
983 //     std::map<std::string, std::set<int> >::iterator f;
984 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
985 //       std::string name = f->first;
986 //       std::set<int> &vals = f->second;
987 //       cout << "Control point " << name << " affects EP: ";
988 //       std::set<int>::iterator i;
989 //       for(i=vals.begin(); i!=vals.end();++i){
990 //      cout << *i << " ";
991 //       }
992 //       cout << endl;
993 //     }
994 // #endif
995
996
997 //   }
998   
999
1000
1001 /// An interface callable by the application.
1002 void gotoNextPhase(){
1003   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1004 }
1005
1006 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1007 {
1008   gotoNextPhase();
1009 }
1010
1011
1012 /// A mainchare that is used just to create our controlPointManager group at startup
1013 class controlPointMain : public CBase_controlPointMain {
1014 public:
1015   controlPointMain(CkArgMsg* args){
1016 #if OLDRANDSEED
1017     struct timeval tp;
1018     gettimeofday(& tp, NULL);
1019     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1020 #else
1021     double time = CmiWallTimer();
1022     int sec = (int)time;
1023     int usec = (int)((time - (double)sec)*1000000.0);
1024     random_seed =  sec ^ usec;
1025 #endif
1026     
1027     
1028     double period;
1029     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1030     if(haveSamplePeriod){
1031       CkPrintf("controlPointSamplePeriod = %ld sec\n", period);
1032       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1033     } else {
1034       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1035     }
1036     
1037     
1038     
1039     whichTuningScheme = RandomSelection;
1040
1041
1042     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1043       whichTuningScheme = RandomSelection;
1044     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1045       whichTuningScheme = ExhaustiveSearch;
1046     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1047       whichTuningScheme = AlwaysDefaults;
1048     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1049       whichTuningScheme = SimulatedAnnealing;
1050     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1051       whichTuningScheme = CriticalPathAutoPrioritization;
1052     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1053       whichTuningScheme = UseBestKnownTiming;
1054     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1055       whichTuningScheme = UseSteering;
1056     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1057       whichTuningScheme = MemoryAware;
1058     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1059       whichTuningScheme = Simplex;
1060     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1061       whichTuningScheme = DivideAndConquer;
1062     }
1063
1064     char *defValStr = NULL;
1065     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1066       CkPrintf("You specified default value string: %s\n", defValStr);
1067       
1068       // Parse the string, looking for commas
1069      
1070
1071       char *tok = strtok(defValStr, ",");
1072       while (tok) {
1073         // split on the equal sign
1074         int len = strlen(tok);
1075         char *eqsign = strchr(tok, '=');
1076         if(eqsign != NULL && eqsign != tok){
1077           *eqsign = '\0';
1078           char *cpname = tok;
1079           std::string cpName(tok);
1080           char *cpDefVal = eqsign+1;      
1081           int v=-1;
1082           if(sscanf(cpDefVal, "%d", &v) == 1){
1083             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1084             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1085             defaultControlPointValues[cpName] = v;
1086           }
1087         }
1088         tok = strtok(NULL, ",");
1089       }
1090
1091     }
1092
1093     shouldGatherAll = false;
1094     shouldGatherMemoryUsage = false;
1095     shouldGatherUtilization = false;
1096     
1097     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1098       shouldGatherAll = true;
1099     } else {
1100       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1101         shouldGatherMemoryUsage = true;
1102       }
1103       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1104         shouldGatherUtilization = true;
1105       }
1106     }
1107     
1108     writeDataFileAtShutdown = false;   
1109     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1110       writeDataFileAtShutdown = true;
1111     }
1112
1113     shouldFilterOutputData = true;
1114     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1115       shouldFilterOutputData = false;
1116     }
1117
1118
1119    loadDataFileAtStartup = false;   
1120     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1121       loadDataFileAtStartup = true;
1122     }
1123
1124     char *cpdatafile;
1125     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1126       sprintf(CPDataFilename, "%s", cpdatafile);
1127     } else {
1128       sprintf(CPDataFilename, "controlPointData.txt");
1129     }
1130
1131
1132     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1133   }
1134   ~controlPointMain(){}
1135 };
1136
1137 /// An interface callable by the application.
1138 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1139   CkAssert(CkMyPe() == 0);
1140   CkPrintf("Application has registered a control point change callback\n");
1141   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1142 }
1143
1144 /// An interface callable by the application.
1145 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1146   if(CkMyPe() == 0){
1147     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1148     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1149   }
1150 }
1151
1152 /// An interface callable by the application.
1153 void registerControlPointTiming(double time){
1154   CkAssert(CkMyPe() == 0);
1155 #if DEBUGPRINT>0
1156   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1157 #endif
1158   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1159 }
1160
1161 /// An interface callable by the application.
1162 void controlPointTimingStamp() {
1163   CkAssert(CkMyPe() == 0);
1164 #if DEBUGPRINT>0
1165   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1166 #endif
1167   
1168   static double prev_time = 0.0;
1169   double t = CmiWallTimer();
1170   double duration = t - prev_time;
1171   prev_time = t;
1172     
1173   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1174 }
1175
1176 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1177 {
1178   controlPointTimingStamp();
1179 }
1180
1181
1182
1183
1184
1185 /// Shutdown the control point framework, writing data to disk if necessary
1186 extern "C" void controlPointShutdown(){
1187   if(CkMyPe() == 0){
1188
1189     // wait for gathering of idle time & memory usage to complete
1190     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1191
1192   }
1193 }
1194
1195 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1196 void controlPointInitNode(){
1197 //  CkPrintf("controlPointInitNode()\n");
1198   registerExitFn(controlPointShutdown);
1199 }
1200
1201 /// Called periodically to allow control point framework to do things periodically
1202 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1203 #ifdef DEBUGPRINT
1204   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1205 #endif
1206   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1207   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1208 }
1209
1210
1211
1212
1213
1214 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1215 /// user observed characteristic to adapt specific control point values.
1216 /// This function must return valid values for newControlPoints.
1217 void controlPointManager::generatePlan() {
1218   const double startGenerateTime = CmiWallTimer();
1219   const int phase_id = this->phase_id;
1220   const int effective_phase = allData.phases.size();
1221
1222   // Only generate a plan once per phase
1223   if(generatedPlanForStep == phase_id) 
1224     return;
1225   generatedPlanForStep = phase_id;
1226  
1227   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1228   printTuningScheme();
1229
1230   // By default lets put the previous phase data into newControlPoints
1231   instrumentedPhase *prevPhase = previousPhaseData();
1232   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1233           newControlPoints[cpsIter->first] = cpsIter->second;
1234   }
1235
1236
1237   if( whichTuningScheme == RandomSelection ){
1238     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1239     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1240       const std::string &name = cpsIter->first;
1241       const std::pair<int,int> &bounds = cpsIter->second;
1242       const int lb = bounds.first;
1243       const int ub = bounds.second;
1244       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1245     }
1246   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1247     // -----------------------------------------------------------
1248     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1249     //   
1250     // This scheme will return the median value for the range 
1251     // early on, and then will transition over to the new control points
1252     // determined by the critical path adapting code
1253
1254     // This won't work until the periodic function is fixed up a bit
1255
1256     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1257     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1258       const std::string &name = cpsIter->first;
1259       const std::pair<int,int> &bounds = cpsIter->second;
1260       const int lb = bounds.first;
1261       const int ub = bounds.second;
1262       newControlPoints[name] =  (lb+ub)/2;
1263     }
1264
1265   } else if ( whichTuningScheme == MemoryAware ) {
1266
1267     // -----------------------------------------------------------
1268     //  STEERING BASED ON MEMORY USAGE
1269
1270     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1271     instrumentedPhase *prevPhase = previousPhaseData();
1272  
1273     if(phase_id%2 == 0){
1274       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1275       twoAgoPhase->print();
1276       CkPrintf("\n");
1277       fflush(stdout);
1278       
1279       // See if memory usage is low:
1280       double memUsage = twoAgoPhase->memoryUsageMB;
1281       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1282       fflush(stdout);
1283       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1284         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1285         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1286         
1287         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1288         newControlPoints = twoAgoPhase->controlPoints;
1289
1290
1291         CkPrintf("Steering (memory based) initialized plan\n");
1292         fflush(stdout);
1293
1294         // look for a possible control point knob to turn
1295         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1296         
1297         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1298         bool found = false;
1299         std::string cpName;
1300         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1301         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1302         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1303           cpName = iter->first;
1304           info = &iter->second;
1305           found = true;
1306           break;
1307         }
1308
1309         // Adapt the control point value
1310         if(found){
1311           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1312           fflush(stdout);
1313           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1314           const int maxValue = controlPointSpace[cpName].second;
1315           
1316           if(twoAgoValue+1 <= maxValue){
1317             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1318           }
1319         }
1320         
1321       }
1322     }
1323
1324     CkPrintf("Steering (memory based) done for this phase\n");
1325     fflush(stdout);
1326
1327   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1328
1329     // -----------------------------------------------------------
1330     //  USE BEST KNOWN TIME  
1331     static bool firstTime = true;
1332     if(firstTime){
1333       firstTime = false;
1334       instrumentedPhase *best = allData.findBest(); 
1335       CkPrintf("Best known phase is:\n");
1336       best->print();
1337       CkPrintf("\n");
1338       
1339       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1340       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1341         const std::string &name = cpsIter->first;
1342         newControlPoints[name] =  best->controlPoints[name];
1343       }
1344     }
1345
1346   } else if ( whichTuningScheme == UseSteering ) {
1347           // -----------------------------------------------------------
1348           //  STEERING BASED ON KNOWLEDGE
1349
1350           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1351           // plans are only generated after 3 phases
1352
1353           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1354           instrumentedPhase *prevPhase = previousPhaseData();
1355
1356           if(phase_id%4 == 0){
1357                   CkPrintf("Steering based on 2 phases ago:\n");
1358                   twoAgoPhase->print();
1359                   CkPrintf("\n");
1360                   fflush(stdout);
1361
1362                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1363
1364
1365                   // ========================================= Concurrency =============================================
1366                   // See if idle time is high:
1367                   {
1368                           double idleTime = twoAgoPhase->idleTime.avg;
1369                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1370                           fflush(stdout);
1371                           if(idleTime > 0.10){
1372                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1373                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1374
1375                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1376
1377                                   bool found = false;
1378                                   std::string cpName;
1379                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1380                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1381                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1382                                           cpName = iter->first;
1383                                           info = &iter->second;
1384
1385                                           // Initialize a new plan based on two phases ago
1386                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1387
1388                                           CkPrintf("Steering found knob to turn\n");
1389                                           fflush(stdout);
1390
1391                                           if(info->first == ControlPoint::EFF_INC){
1392                                                   const int maxValue = controlPointSpace[cpName].second;
1393                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1394                                                   if(twoAgoValue+1 <= maxValue){
1395                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1396                                                   }
1397                                           } else {
1398                                                   const int minValue = controlPointSpace[cpName].second;
1399                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1400                                                   if(twoAgoValue-1 >= minValue){
1401                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1402                                                   }
1403                                           }
1404
1405                                           possibleNextStepPlans.push_back(aNewPlan);
1406
1407                                   }
1408                           }
1409                   }
1410
1411                   // ========================================= Grain Size =============================================
1412                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1413                   {
1414                           double overheadTime = twoAgoPhase->overheadTime.avg;
1415                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1416                           fflush(stdout);
1417                           if(overheadTime > 0.10){
1418                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1419                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1420
1421                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1422
1423                                   bool found = false;
1424                                   std::string cpName;
1425                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1426                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1427                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1428                                           cpName = iter->first;
1429                                           info = &iter->second;
1430
1431                                           // Initialize a new plan based on two phases ago
1432                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1433
1434
1435
1436                                           CkPrintf("Steering found knob to turn\n");
1437                                           fflush(stdout);
1438
1439                                           if(info->first == ControlPoint::EFF_INC){
1440                                                   const int maxValue = controlPointSpace[cpName].second;
1441                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1442                                                   if(twoAgoValue+1 <= maxValue){
1443                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1444                                                   }
1445                                           } else {
1446                                                   const int minValue = controlPointSpace[cpName].second;
1447                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1448                                                   if(twoAgoValue-1 >= minValue){
1449                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1450                                                   }
1451                                           }
1452
1453                                           possibleNextStepPlans.push_back(aNewPlan);
1454
1455                                   }
1456
1457                           }
1458                   }
1459                   // ========================================= GPU Offload =============================================
1460                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1461                   {
1462                           double idleTime = twoAgoPhase->idleTime.avg;
1463                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1464                           fflush(stdout);
1465                           if(idleTime > 0.10){
1466                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1467                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1468
1469                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1470
1471                                   bool found = false;
1472                                   std::string cpName;
1473                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1474                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1475                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1476                                           cpName = iter->first;
1477                                           info = &iter->second;
1478
1479                                           // Initialize a new plan based on two phases ago
1480                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1481
1482
1483                                           CkPrintf("Steering found knob to turn\n");
1484                                           fflush(stdout);
1485
1486                                           if(info->first == ControlPoint::EFF_DEC){
1487                                                   const int maxValue = controlPointSpace[cpName].second;
1488                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1489                                                   if(twoAgoValue+1 <= maxValue){
1490                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1491                                                   }
1492                                           } else {
1493                                                   const int minValue = controlPointSpace[cpName].second;
1494                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1495                                                   if(twoAgoValue-1 >= minValue){
1496                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1497                                                   }
1498                                           }
1499
1500                                           possibleNextStepPlans.push_back(aNewPlan);
1501
1502                                   }
1503
1504                           }
1505                   }
1506
1507                   // ========================================= Done =============================================
1508
1509
1510                   if(possibleNextStepPlans.size() > 0){
1511                           newControlPoints = possibleNextStepPlans[0];
1512                   }
1513
1514
1515                   CkPrintf("Steering done for this phase\n");
1516                   fflush(stdout);
1517
1518           }  else {
1519                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1520                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1521                   fflush(stdout);
1522           }
1523
1524
1525
1526   } else if( whichTuningScheme == SimulatedAnnealing ) {
1527
1528     // -----------------------------------------------------------
1529     //  SIMULATED ANNEALING
1530     //  Simulated Annealing style hill climbing method
1531     //
1532     //  Find the best search space configuration, and try something
1533     //  nearby it, with a radius decreasing as phases increase
1534
1535     CkPrintf("Finding best phase\n");
1536     instrumentedPhase *bestPhase = allData.findBest();  
1537     CkPrintf("best found:\n"); 
1538     bestPhase->print(); 
1539     CkPrintf("\n"); 
1540     
1541
1542     const int convergeByPhase = 100;
1543     // Determine from 0.0 to 1.0 how far along we are
1544     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1545
1546     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1547     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1548       const std::string &name = cpsIter->first;
1549       const std::pair<int,int> &bounds = cpsIter->second;
1550       const int minValue = bounds.first;
1551       const int maxValue = bounds.second;
1552       
1553       const int before = bestPhase->controlPoints[name];   
1554   
1555       const int range = (maxValue-minValue+1)*(1.0-progress);
1556
1557       int high = min(before+range, maxValue);
1558       int low = max(before-range, minValue);
1559       
1560       newControlPoints[name] = low;
1561       if(high-low > 0){
1562         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1563       } 
1564       
1565     }
1566
1567   } else if ( whichTuningScheme == DivideAndConquer ) {
1568
1569           // -----------------------------------------------------------
1570           //  STEERING FOR Divide & Conquer Programs
1571           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1572           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1573           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1574
1575           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1576           // plans are only generated after 3 phases
1577
1578           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1579           instrumentedPhase *prevPhase = previousPhaseData();
1580
1581           if(phase_id%4 == 0){
1582                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1583                   twoAgoPhase->print();
1584                   CkPrintf("\n");
1585                   fflush(stdout);
1586
1587                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1588
1589
1590                   // ========================================= Concurrency =============================================
1591                   // See if idle time is high:
1592                   {
1593                           double idleTime = twoAgoPhase->idleTime.avg;
1594                           double overheadTime = twoAgoPhase->overheadTime.avg;
1595
1596
1597                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1598                           fflush(stdout);
1599                           if(idleTime+overheadTime > 0.10){
1600                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1601                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1602
1603                                   int direction = -1;
1604                                   if (idleTime>overheadTime){
1605                                           // We need to decrease the grain size, or increase the available concurrency
1606                                           direction = 1;
1607                                   }
1608
1609                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1610
1611                                   bool found = false;
1612                                   std::string cpName;
1613                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1614                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1615                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1616                                           cpName = iter->first;
1617                                           info = &iter->second;
1618                                           
1619                                           // Initialize a new plan based on two phases ago
1620                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1621                                           bool newPlanExists = false;
1622
1623                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1624                                           fflush(stdout);
1625
1626
1627
1628                                           if(info->first == ControlPoint::EFF_INC){
1629                                                   const int minValue = controlPointSpace[cpName].first;
1630                                                   const int maxValue = controlPointSpace[cpName].second;
1631                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1632                                                   const int newVal = twoAgoValue+1*direction;
1633                                                   if(newVal <= maxValue && newVal >= minValue){
1634                                                     aNewPlan[cpName] = newVal;
1635                                                     newPlanExists = true;
1636                                                   } else {
1637                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1638                                                   }
1639                                           } else {
1640                                                   const int minValue = controlPointSpace[cpName].first;
1641                                                   const int maxValue = controlPointSpace[cpName].second;
1642                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1643                                                   const int newVal = twoAgoValue-1*direction;
1644                                                   if(newVal <= maxValue && newVal >= minValue){
1645                                                           aNewPlan[cpName] = newVal;
1646                                                           newPlanExists = true;
1647                                                   } else {
1648                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1649                                                   }
1650                                           }
1651
1652                                           if(newPlanExists){
1653                                             possibleNextStepPlans.push_back(aNewPlan);
1654                                           }
1655                                   }
1656                           }
1657                   }
1658
1659                   if(possibleNextStepPlans.size() > 0){
1660                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1661                     newControlPoints = possibleNextStepPlans[0];
1662                   } else {
1663                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1664                   }
1665           }
1666
1667   } else if( whichTuningScheme == Simplex ) {
1668
1669           // -----------------------------------------------------------
1670           //  Nelder Mead Simplex Algorithm
1671           //
1672           //  A scheme that takes a simplex (n+1 points) and moves it
1673           //  toward the minimum, eventually converging there.
1674
1675           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1676
1677   } else if( whichTuningScheme == ExhaustiveSearch ) {
1678     
1679     // -----------------------------------------------------------
1680     // EXHAUSTIVE SEARCH
1681    
1682     int numDimensions = controlPointSpace.size();
1683     CkAssert(numDimensions > 0);
1684   
1685     vector<int> lowerBounds(numDimensions);
1686     vector<int> upperBounds(numDimensions); 
1687   
1688     int d=0;
1689     std::map<std::string, pair<int,int> >::iterator iter;
1690     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1691       //    CkPrintf("Examining dimension %d\n", d);
1692       lowerBounds[d] = iter->second.first;
1693       upperBounds[d] = iter->second.second;
1694       d++;
1695     }
1696    
1697     // get names for each dimension (control point)
1698     vector<std::string> names(numDimensions);
1699     d=0;
1700     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
1701       names[d] = niter->first;
1702       d++;
1703     }
1704   
1705   
1706     // Create the first possible configuration
1707     vector<int> config = lowerBounds;
1708     config.push_back(0);
1709   
1710     // Increment until finding an unused configuration
1711     allData.cleanupNames(); // put -1 values in for any control points missing
1712     std::vector<instrumentedPhase*> &phases = allData.phases;     
1713
1714     while(true){
1715     
1716       std::vector<instrumentedPhase*>::iterator piter; 
1717       bool testedConfiguration = false; 
1718       for(piter = phases.begin(); piter != phases.end(); piter++){ 
1719       
1720         // Test if the configuration matches this phase
1721         bool match = true;
1722         for(int j=0;j<numDimensions;j++){
1723           match &= (*piter)->controlPoints[names[j]] == config[j];
1724         }
1725       
1726         if(match){
1727           testedConfiguration = true; 
1728           break;
1729         } 
1730       } 
1731     
1732       if(testedConfiguration == false){ 
1733         break;
1734       } 
1735     
1736       // go to next configuration
1737       config[0] ++;
1738       // Propagate "carrys"
1739       for(int i=0;i<numDimensions;i++){
1740         if(config[i] > upperBounds[i]){
1741           config[i+1]++;
1742           config[i] = lowerBounds[i];
1743         }
1744       }
1745       // check if we have exhausted all possible values
1746       if(config[numDimensions] > 0){
1747         break;
1748       }
1749        
1750     }
1751   
1752     if(config[numDimensions] > 0){
1753       for(int i=0;i<numDimensions;i++){ 
1754         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
1755       }
1756     }
1757
1758     // results are now in config[i]
1759     
1760     for(int i=0; i<numDimensions; i++){
1761       newControlPoints[names[i]] = config[i];
1762       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
1763     }
1764
1765
1766   } else {
1767     CkAbort("Some Control Point tuning strategy must be enabled.\n");
1768   }
1769
1770
1771   const double endGenerateTime = CmiWallTimer();
1772   
1773   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
1774   
1775 }
1776
1777
1778
1779
1780 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
1781
1782
1783 /// Get control point value from range of integers [lb,ub]
1784 int controlPoint(const char *name, int lb, int ub){
1785   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
1786   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
1787   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
1788   int result;
1789
1790   // if we already have control point values for phase, return them
1791   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
1792     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
1793     return thisPhaseData->controlPoints[std::string(name)];
1794   }
1795   
1796
1797   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
1798     // For the first few phases, just use the lower bound, or the default if one was provided 
1799     // This ensures that the ranges for all the control points are known before we do anything fancy
1800     result = lb;
1801
1802     if(defaultControlPointValues.count(std::string(name)) > 0){
1803       int v = defaultControlPointValues[std::string(name)];
1804       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
1805       result = v;
1806     }
1807
1808   } else if(controlPointSpace.count(std::string(name)) == 0){
1809     // if this is the first time we've seen the CP, then return the lower bound
1810     result = lb;
1811     
1812   }  else {
1813     // Generate a plan one time for each phase
1814     controlPointManagerProxy.ckLocalBranch()->generatePlan();
1815     
1816     // Use the planned value:
1817     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
1818     
1819   }
1820
1821   CkAssert(isInRange(result,ub,lb));
1822   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
1823
1824   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
1825
1826   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
1827   //  thisPhaseData->print();
1828   
1829   return result;
1830 }
1831
1832
1833 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(int lb, int ub){
1834   return controlPoint("Fortran CP", lb, ub);
1835 }
1836
1837
1838
1839
1840 /// Inform the control point framework that a named control point affects the priorities of some array  
1841 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
1842 //   CkGroupID aid = arraybase.ckGetArrayID();
1843 //   int groupIdx = aid.idx;
1844 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
1845 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
1846 // }
1847
1848
1849 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
1850 // void controlPointPriorityEntry(const char *name, int idx){
1851 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
1852 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
1853 // }
1854
1855
1856
1857
1858
1859 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
1860
1861     This function decomposes the algorithm into a state machine that allows it to
1862     evaluate one or more configurations through subsequent clls to this function.
1863     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
1864
1865     At one point in the algorithm, n+1 parameter configurations must be evaluated,
1866     so a list of them will be created and they will be evaluated, one per call.
1867
1868     Currently there is no stopping criteria, but the simplex ought to contract down
1869     to a few close configurations, and hence not much change will happen after this 
1870     point.
1871
1872  */
1873 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
1874
1875         if(useBestKnown){
1876                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
1877                 return;
1878         }
1879
1880
1881         if(firstSimplexPhase< 0){
1882                 firstSimplexPhase = allData.phases.size()-1;
1883                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
1884         }
1885
1886         int n = controlPointSpace.size();
1887
1888         CkAssert(n>=2);
1889
1890
1891         if(simplexState == beginning){
1892                 // First we evaluate n+1 random points, then we go to a different state
1893                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
1894                         CkPrintf("Simplex Tuning: chose random configuration\n");
1895
1896                         // Choose random values from the middle third of the range in each dimension
1897                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1898                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1899                                 const std::string &name = cpsIter->first;
1900                                 const std::pair<int,int> &bounds = cpsIter->second;
1901                                 const int lb = bounds.first;
1902                                 const int ub = bounds.second;
1903                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1904                         }
1905                 } else {
1906                         // Set initial simplex:
1907                         for(int i=0; i<n+1; i++){
1908                                 simplexIndices.insert(firstSimplexPhase+i);
1909                         }
1910                         CkAssert(simplexIndices.size() == n+1);
1911
1912                         // Transition to reflecting state
1913                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1914
1915                 }
1916
1917         } else if (simplexState == reflecting){
1918                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1919                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1920
1921                 // Find the highest time from other points in the simplex
1922                 double highestTimeForOthersInSimplex = 0.0;
1923                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
1924                         double t = allData.phases[*iter]->medianTime();
1925                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
1926                                 highestTimeForOthersInSimplex = t;
1927                         }
1928                 }
1929
1930                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
1931
1932                 if(recentPhaseTime < highestTimeForOthersInSimplex){
1933                         // if y* < yl,  transition to "expanding"
1934                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
1935
1936                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
1937                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
1938                         CkAssert(simplexIndices.size() == n+1);
1939                         simplexIndices.erase(worstPhase);
1940                         CkAssert(simplexIndices.size() == n);
1941                         simplexIndices.insert(pPhase);
1942                         CkAssert(simplexIndices.size() == n+1);
1943                         // Transition to reflecting state
1944                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1945
1946                 } else {
1947                         // if y* > yh
1948                         if(recentPhaseTime <= worstTime){
1949                                 // replace Ph with P*
1950                                 CkAssert(simplexIndices.size() == n+1);
1951                                 simplexIndices.erase(worstPhase);
1952                                 CkAssert(simplexIndices.size() == n);
1953                                 simplexIndices.insert(pPhase);
1954                                 CkAssert(simplexIndices.size() == n+1);
1955                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
1956                                 worstPhase = pPhase;
1957                                 // Just as a sanity check, make sure we don't use the non-updated values.
1958                                 worst.clear();
1959                         }
1960
1961                         // Now, form P** and do contracting phase
1962                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
1963
1964                 }
1965
1966         } else if (simplexState == doneExpanding){
1967                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1968                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1969                 // A new configuration has been evaluated
1970
1971                 // Check to see if y** < y1
1972                 if(recentPhaseTime < bestTime){
1973                         // replace Ph by P**
1974                         CkAssert(simplexIndices.size() == n+1);
1975                         simplexIndices.erase(worstPhase);
1976                         CkAssert(simplexIndices.size() == n);
1977                         simplexIndices.insert(p2Phase);
1978                         CkAssert(simplexIndices.size() == n+1);
1979                 } else {
1980                         //      replace Ph by P*
1981                         CkAssert(simplexIndices.size() == n+1);
1982                         simplexIndices.erase(worstPhase);
1983                         CkAssert(simplexIndices.size() == n);
1984                         simplexIndices.insert(pPhase);
1985                         CkAssert(simplexIndices.size() == n+1);
1986                 }
1987
1988                 // Transition to reflecting state
1989                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1990
1991         }  else if (simplexState == contracting){
1992                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1993                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1994                 // A new configuration has been evaluated
1995
1996                 // Check to see if y** > yh
1997                 if(recentPhaseTime <= worstTime){
1998                         // replace Ph by P**
1999                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2000                         CkAssert(simplexIndices.size() == n+1);
2001                         simplexIndices.erase(worstPhase);
2002                         CkAssert(simplexIndices.size() == n);
2003                         simplexIndices.insert(p2Phase);
2004                         CkAssert(simplexIndices.size() == n+1);
2005                         // Transition to reflecting state
2006                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2007
2008                 } else {
2009                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2010                         simplexState = stillContracting;
2011
2012                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2013                         stillMustContractList = simplexIndices;
2014
2015                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2016                 }
2017
2018         } else if (simplexState == stillContracting){
2019                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2020
2021                 if(stillMustContractList.size()>0){
2022                         int c = *stillMustContractList.begin();
2023                         stillMustContractList.erase(c);
2024                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2025
2026                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2027
2028                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2029                         int v = 0;
2030                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2031                                 const std::string &name = cpsIter->first;
2032                                 const std::pair<int,int> &bounds = cpsIter->second;
2033                                 const int lb = bounds.first;
2034                                 const int ub = bounds.second;
2035
2036                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2037
2038                                 newControlPoints[name] = keepInRange(val,lb,ub);
2039                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2040                                 v++;
2041                         }
2042                 } else {
2043                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2044                         CkAssert(stillMustContractList.size() == 0);
2045                         simplexIndices.clear();
2046                         CkAssert(simplexIndices.size()==0);
2047                         for(int i=0; i<n+1; i++){
2048                                 simplexIndices.insert(allData.phases.size()-2-i);
2049                         }
2050                         CkAssert(simplexIndices.size()==n+1);
2051
2052                         // Transition to reflecting state
2053                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2054
2055                 }
2056
2057
2058         } else if (simplexState == expanding){
2059                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2060                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2061                 // A new configuration has been evaluated
2062
2063                 // determine if y** < yl
2064                 if(recentPhaseTime < bestTime){
2065                         // replace Ph by P**
2066                         CkAssert(simplexIndices.size() == n+1);
2067                         simplexIndices.erase(worstPhase);
2068                         CkAssert(simplexIndices.size() == n);
2069                         simplexIndices.insert(p2Phase);
2070                         CkAssert(simplexIndices.size() == n+1);
2071                         // Transition to reflecting state
2072                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2073
2074                 } else {
2075                         // else, replace ph with p*
2076                         CkAssert(simplexIndices.size() == n+1);
2077                         simplexIndices.erase(worstPhase);
2078                         CkAssert(simplexIndices.size() == n);
2079                         simplexIndices.insert(pPhase);
2080                         CkAssert(simplexIndices.size() == n+1);
2081                         // Transition to reflecting state
2082                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2083                 }
2084
2085
2086         } else {
2087                 CkAbort("Unknown simplexState");
2088         }
2089
2090 }
2091
2092
2093
2094 /** Replace the worst point with its reflection across the centroid. */
2095 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2096
2097         int n = controlPointSpace.size();
2098
2099         printSimplex(allData);
2100
2101         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2102
2103
2104         // Quit if the diameter of our simplex is small
2105         double maxr = 0.0;
2106         for(int i=0; i<n+1; i++){
2107                 //              Compute r^2 of this simplex point from the centroid
2108                 double r2 = 0.0;
2109                 std::vector<double> p = pointCoords(allData, i);
2110                 for(int d=0; d<p.size(); d++){
2111                         double r1 = (p[d] * centroid[d]);
2112                         r2 += r1*r1;
2113                 }
2114                 if(r2 > maxr)
2115                         maxr = r2;
2116         }
2117
2118 #if 0
2119         // At some point we quit this tuning
2120         if(maxr < 10){
2121                 useBestKnown = true;
2122                 instrumentedPhase *best = allData.findBest();
2123                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2124
2125                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2126                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2127                         const std::string &name = cpsIter->first;
2128                         newControlPoints[name] =  best->controlPoints[name];
2129                 }
2130         }
2131 #endif
2132
2133         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2134
2135         pPhase = allData.phases.size()-1;
2136         P.resize(n);
2137         for(int i=0; i<n; i++){
2138                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2139         }
2140
2141         for(int i=0; i<P.size(); i++){
2142                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2143         }
2144
2145
2146         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2147         int v = 0;
2148         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2149                 const std::string &name = cpsIter->first;
2150                 const std::pair<int,int> &bounds = cpsIter->second;
2151                 const int lb = bounds.first;
2152                 const int ub = bounds.second;
2153                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2154                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2155                 v++;
2156         }
2157
2158
2159         // Transition to "reflecting" state
2160         simplexState = reflecting;
2161         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2162
2163 }
2164
2165
2166
2167
2168 /** Replace the newly tested reflection with a further expanded version of itself. */
2169 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2170         int n = controlPointSpace.size();
2171         printSimplex(allData);
2172
2173         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2174         // I believe the equation for P** in the text on page 308 is correct.
2175
2176         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2177
2178
2179         p2Phase = allData.phases.size()-1;
2180         P2.resize(n);
2181         for(int i=0; i<n; i++){
2182                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2183         }
2184
2185         for(int i=0; i<P2.size(); i++){
2186                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2187         }
2188
2189
2190         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2191         int v = 0;
2192         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2193                 const std::string &name = cpsIter->first;
2194                 const std::pair<int,int> &bounds = cpsIter->second;
2195                 const int lb = bounds.first;
2196                 const int ub = bounds.second;
2197                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2198                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2199                 v++;
2200         }
2201
2202
2203         // Transition to "doneExpanding" state
2204         simplexState = doneExpanding;
2205         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2206
2207 }
2208
2209
2210
2211
2212 /** Replace the newly tested reflection with a further expanded version of itself. */
2213 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2214         int n = controlPointSpace.size();
2215         printSimplex(allData);
2216
2217         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2218
2219
2220         p2Phase = allData.phases.size()-1;
2221         P2.resize(n);
2222         for(int i=0; i<n; i++){
2223                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2224         }
2225
2226         for(int i=0; i<P2.size(); i++){
2227                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2228         }
2229
2230
2231         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2232         int v = 0;
2233         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2234                 const std::string &name = cpsIter->first;
2235                 const std::pair<int,int> &bounds = cpsIter->second;
2236                 const int lb = bounds.first;
2237                 const int ub = bounds.second;
2238                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2239                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2240                 v++;
2241         }
2242
2243
2244         // Transition to "contracting" state
2245         simplexState = contracting;
2246         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2247
2248 }
2249
2250
2251
2252
2253 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2254         int n = controlPointSpace.size();
2255
2256         // Find worst performing point in the simplex
2257         worstPhase = -1;
2258         worstTime = -1.0;
2259         bestPhase = 10000000;
2260         bestTime = 10000000;
2261         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2262                 double t = allData.phases[*iter]->medianTime();
2263                 if(t > worstTime){
2264                         worstTime = t;
2265                         worstPhase = *iter;
2266                 }
2267                 if(t < bestTime){
2268                         bestTime = t;
2269                         bestPhase = *iter;
2270                 }
2271         }
2272         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2273
2274         best = pointCoords(allData, bestPhase);
2275         CkAssert(best.size() == n);
2276
2277         worst = pointCoords(allData, worstPhase);
2278         CkAssert(worst.size() == n);
2279
2280         // Calculate centroid of the remaining points in the simplex
2281         centroid.resize(n);
2282         for(int i=0; i<n; i++){
2283                 centroid[i] = 0.0;
2284         }
2285
2286         int numPts = 0;
2287
2288         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2289                 if(*iter != worstPhase){
2290                         numPts ++;
2291                         // Accumulate into the result vector
2292                         int c = 0;
2293                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2294                                 centroid[c] += citer->second;
2295                                 c++;
2296                         }
2297
2298                 }
2299         }
2300
2301         // Now divide the sums by the number of points.
2302         for(int v = 0; v<centroid.size(); v++) {
2303                 centroid[v] /= (double)numPts;
2304         }
2305
2306         CkAssert(centroid.size() == n);
2307
2308         for(int i=0; i<centroid.size(); i++){
2309                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2310         }
2311
2312
2313 }
2314
2315
2316
2317 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2318         std::vector<double> result;
2319         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2320                 result.push_back((double)citer->second);
2321         }
2322         return result;
2323 }
2324
2325
2326
2327
2328 void ControlPointWriteOutputToDisk(){
2329         CkAssert(CkMyPe() == 0);
2330         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2331 }
2332
2333
2334
2335 /*! @} */
2336
2337 #ifdef CP_DISABLE_TRACING
2338 #include "ControlPointsNoTrace.def.h"
2339 #else
2340 #include "ControlPoints.def.h"
2341 #endif