Fixing fortran control point interface routine. Fortran passes ints by reference...
[charm.git] / src / ck-cp / controlPoints.C
1 #include <charm++.h>
2
3 // This file is compiled twice to make a version that is capable of not needing the tracing to be turned on. 
4 // The Makefile will have -DCP_DISABLE_TRACING
5
6 #include "controlPoints.h"
7 #include "trace-controlPoints.h"
8 #include "LBDatabase.h"
9 #include "controlPoints.h"
10 #include "charm++.h"
11 #include "trace-projections.h"
12 #include <pathHistory.h>
13 #include "cp_effects.h"
14 #include <iostream>
15
16 #include <climits>
17 //  A framework for tuning "control points" exposed by an application. Tuning decisions are based upon observed performance measurements.
18  
19
20 /** @defgroup ControlPointFramework Automatic Performance Tuning and Steering Framework  */
21 /**  @{ */
22
23 using namespace std;
24
25 #define DEFAULT_CONTROL_POINT_SAMPLE_PERIOD  10000000
26
27
28 //#undef DEBUGPRINT
29 //#define DEBUGPRINT 4
30
31 static void periodicProcessControlPoints(void* ptr, double currWallTime);
32
33
34 // A pointer to this PE's controlpoint manager Proxy
35 /* readonly */ CProxy_controlPointManager controlPointManagerProxy;
36 /* readonly */ int random_seed;
37 /* readonly */ long controlPointSamplePeriod;
38 /* readonly */ int whichTuningScheme;
39 /* readonly */ bool writeDataFileAtShutdown;
40 /* readonly */ bool shouldFilterOutputData;
41 /* readonly */ bool loadDataFileAtStartup;
42 /* readonly */ bool shouldGatherMemoryUsage;
43 /* readonly */ bool shouldGatherUtilization;
44 /* readonly */ bool shouldGatherAll;
45 /* readonly */ char CPDataFilename[512];
46
47
48
49 /// The control point values to be used for the first few phases if the strategy doesn't choose to do something else.
50 /// These probably come from the command line arguments, so are available only on PE 0
51 std::map<std::string, int> defaultControlPointValues;
52
53
54
55 typedef enum tuningSchemeEnum {RandomSelection, SimulatedAnnealing, ExhaustiveSearch, CriticalPathAutoPrioritization, UseBestKnownTiming, UseSteering, MemoryAware, Simplex, DivideAndConquer, AlwaysDefaults}  tuningScheme;
56
57
58
59 void printTuningScheme(){
60   switch(whichTuningScheme){
61   case RandomSelection:
62     CkPrintf("Tuning Scheme: RandomSelection\n");
63     break;
64   case AlwaysDefaults:
65     CkPrintf("Tuning Scheme: AlwaysDefaults\n");
66     break;
67   case SimulatedAnnealing:
68     CkPrintf("Tuning Scheme: SimulatedAnnealing\n");
69     break;
70   case ExhaustiveSearch:
71     CkPrintf("Tuning Scheme: ExhaustiveSearch\n");
72     break;
73   case CriticalPathAutoPrioritization:
74     CkPrintf("Tuning Scheme: CriticalPathAutoPrioritization\n");
75     break;
76   case UseBestKnownTiming:
77     CkPrintf("Tuning Scheme: UseBestKnownTiming\n");
78     break;
79   case UseSteering:
80     CkPrintf("Tuning Scheme: UseSteering\n");
81     break;
82   case MemoryAware:
83     CkPrintf("Tuning Scheme: MemoryAware\n");
84     break;
85   case Simplex:
86     CkPrintf("Tuning Scheme: Simplex Algorithm\n");
87     break;
88   case DivideAndConquer:
89     CkPrintf("Tuning Scheme: Divide & Conquer Algorithm\n");
90     break;
91   default:
92     CkPrintf("Unknown tuning scheme\n");
93     break;
94   }
95   fflush(stdout);
96 }
97
98
99
100 /// A reduction type that combines idle time measurements (min/sum/max etc.)
101 CkReduction::reducerType idleTimeReductionType;
102 /// A reducer that combines idle time measurements (min/sum/max etc.)
103 CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
104   double ret[3];
105   if(nMsg > 0){
106     CkAssert(msgs[0]->getSize()==3*sizeof(double));
107     double *m=(double *)msgs[0]->getData();
108     ret[0]=m[0];
109     ret[1]=m[1];
110     ret[2]=m[2];
111   }
112   for (int i=1;i<nMsg;i++) {
113     CkAssert(msgs[i]->getSize()==3*sizeof(double));
114     double *m=(double *)msgs[i]->getData();
115     ret[0]=min(ret[0],m[0]);
116     ret[1]+=m[1];
117     ret[2]=max(ret[2],m[2]);
118   }  
119   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
120 }
121
122
123
124 /// A reduction type that combines idle, overhead, and memory measurements
125 CkReduction::reducerType allMeasuresReductionType;
126 /// A reducer that combines idle, overhead, and memory measurements
127 #define ALL_REDUCTION_SIZE 12
128 CkReductionMsg *allMeasuresReduction(int nMsg,CkReductionMsg **msgs){
129   double ret[ALL_REDUCTION_SIZE];
130   if(nMsg > 0){
131     CkAssert(msgs[0]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
132     double *m=(double *)msgs[0]->getData();
133     memcpy(ret, m, ALL_REDUCTION_SIZE*sizeof(double) );
134   }
135   for (int i=1;i<nMsg;i++) {
136     CkAssert(msgs[i]->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
137     double *m=(double *)msgs[i]->getData();
138     // idle time (min/sum/max)
139     ret[0]=min(ret[0],m[0]);
140     ret[1]+=m[1];
141     ret[2]=max(ret[2],m[2]);
142     // overhead time (min/sum/max)
143     ret[3]=min(ret[3],m[3]);
144     ret[4]+=m[4];
145     ret[5]=max(ret[5],m[5]);
146     // mem usage (max)
147     ret[6]=max(ret[6],m[6]);
148     // bytes per invocation for two types of entry methods
149     ret[7]+=m[7];
150     ret[8]+=m[8];
151     ret[9]+=m[9];
152     ret[10]+=m[10];
153     // Grain size (avg)
154     ret[11]+=m[11];
155   }  
156   return CkReductionMsg::buildNew(ALL_REDUCTION_SIZE*sizeof(double),ret);   
157 }
158
159
160 /// Registers the control point framework's reduction handlers at startup on each PE
161 /*initproc*/ void registerCPReductions(void) {
162   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
163   allMeasuresReductionType=CkReduction::addReducer(allMeasuresReduction);
164 }
165
166
167
168
169
170
171 /// Return an integer between 0 and num-1 inclusive
172 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
173 unsigned int randInt(unsigned int num, const char* name, int seed=0){
174   CkAssert(num > 0);
175   CkAssert(num < 1000);
176
177   unsigned long hash = 0;
178   unsigned int c;
179   unsigned char * str = (unsigned char*)name;
180
181   while (c = *str++){
182     unsigned int c2 = (c+64)%128;
183     unsigned int c3 = (c2*5953)%127;
184     hash = c3 + (hash << 6) + (hash << 16) - hash;
185   }
186
187   unsigned long shuffled1 = (hash*2083)%7907;
188   unsigned long shuffled2 = (seed*4297)%2017;
189
190   unsigned long shuffled3 = (random_seed*4799)%7919;
191
192   unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
193
194   unsigned int result = ((namehash * 6029) % 1117) % num;
195
196   CkAssert(result >=0 && result < num);
197   return result;
198 }
199
200
201
202 controlPointManager::controlPointManager() {
203   generatedPlanForStep = -1;
204
205     exitWhenReady = false;
206     alreadyRequestedMemoryUsage = false;   
207     alreadyRequestedIdleTime = false;
208     alreadyRequestedAll = false;
209     
210     instrumentedPhase * newPhase = new instrumentedPhase();
211     allData.phases.push_back(newPhase);   
212     
213     frameworkShouldAdvancePhase = false;
214     haveControlPointChangeCallback = false;
215 //    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
216     
217     ControlPoint::initControlPointEffects();
218
219     phase_id = 0;
220
221     if(loadDataFileAtStartup){    
222       loadDataFile();
223     }
224
225     
226     if(CkMyPe() == 0){
227       CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
228     }
229
230     traceRegisterUserEvent("No currently executing message", 5000);
231     traceRegisterUserEvent("Zero time along critical path", 5010);
232     traceRegisterUserEvent("Positive total time along critical path", 5020);
233     traceRegisterUserEvent("env->setPathHistory()", 6000);
234     traceRegisterUserEvent("Critical Path", 5900);
235     traceRegisterUserEvent("Table Entry", 5901);
236
237
238 #if USER_EVENT_FOR_REGISTERTERMINALPATH
239     traceRegisterUserEvent("registerTerminalPath", 100); 
240 #endif
241
242   }
243   
244
245  controlPointManager::~controlPointManager(){
246 //    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
247   }
248
249
250   /// Loads the previous run data file
251   void controlPointManager::loadDataFile(){
252     ifstream infile(CPDataFilename);
253     vector<std::string> names;
254     std::string line;
255   
256     while(getline(infile,line)){
257       if(line[0] != '#')
258         break;
259     }
260   
261     int numTimings = 0;
262     std::istringstream n(line);
263     n >> numTimings;
264   
265     while(getline(infile,line)){ 
266       if(line[0] != '#') 
267         break; 
268     }
269
270     int numControlPointNames = 0;
271     std::istringstream n2(line);
272     n2 >> numControlPointNames;
273   
274     for(int i=0; i<numControlPointNames; i++){
275       getline(infile,line);
276       names.push_back(line);
277     }
278
279     for(int i=0;i<numTimings;i++){
280       getline(infile,line);
281       while(line[0] == '#')
282         getline(infile,line); 
283
284       instrumentedPhase * ips = new instrumentedPhase();
285
286       std::istringstream iss(line);
287
288       // Read memory usage for phase
289       iss >> ips->memoryUsageMB;
290       //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
291
292       // Read idle time
293       iss >> ips->idleTime.min;
294       iss >> ips->idleTime.avg;
295       iss >> ips->idleTime.max;
296
297       // Read overhead time
298       iss >> ips->overheadTime.min;
299       iss >> ips->overheadTime.avg;
300       iss >> ips->overheadTime.max;
301
302       // read bytePerInvoke
303       iss >> ips->bytesPerInvoke;
304
305       // read grain size
306       iss >> ips->grainSize;
307
308       // Read control point values
309       for(int cp=0;cp<numControlPointNames;cp++){
310         int cpvalue;
311         iss >> cpvalue;
312         ips->controlPoints.insert(make_pair(names[cp],cpvalue));
313       }
314
315       // ignore median time
316       double mt;
317       iss >> mt;
318
319       // Read all times
320
321       double time;
322
323       while(iss >> time){
324         ips->times.push_back(time);
325 #if DEBUGPRINT > 5
326         CkPrintf("read time %lf from file\n", time);
327 #endif
328       }
329
330       allData.phases.push_back(ips);
331
332     }
333
334     infile.close();
335   }
336
337
338
339   /// Add the current data to allData and output it to a file
340   void controlPointManager::writeDataFile(){
341     CkPrintf("============= writeDataFile() to %s  ============\n", CPDataFilename);
342     ofstream outfile(CPDataFilename);
343     allData.cleanupNames();
344
345 //    string s = allData.toString();
346 //    CkPrintf("At end: \n %s\n", s.c_str());
347
348     if(shouldFilterOutputData){
349       allData.verify();
350       allData.filterOutIncompletePhases();
351     }
352
353 //    string s2 = allData.toString();
354 //    CkPrintf("After filtering: \n %s\n", s2.c_str());
355     if(allData.toString().length() > 10){
356       outfile << allData.toString();
357     } else {
358       outfile << " No data available to save to disk " << endl;
359     }
360     outfile.close();
361   }
362
363   /// User can register a callback that is called when application should advance to next phase
364   void controlPointManager::setCPCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
365     frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
366     controlPointChangeCallback = cb;
367     haveControlPointChangeCallback = true;
368   }
369
370
371 /// A user can specify that the framework should advance the phases automatically. Useful for gather performance measurements without modifying a program.
372 void controlPointManager::setFrameworkAdvancePhase(bool _frameworkShouldAdvancePhase){
373   frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
374 }
375
376   /// Called periodically by the runtime to handle the control points
377   /// Currently called on each PE
378   void controlPointManager::processControlPoints(){
379
380     CkPrintf("[%d] processControlPoints() haveControlPointChangeCallback=%d frameworkShouldAdvancePhase=%d\n", CkMyPe(), (int)haveControlPointChangeCallback, (int)frameworkShouldAdvancePhase);
381
382
383     //==========================================================================================
384     // Print the data for each phase
385
386     const int s = allData.phases.size();
387
388 #if DEBUGPRINT
389     CkPrintf("\n\nExamining critical paths and priorities and idle times (num phases=%d)\n", s );
390     for(int p=0;p<s;++p){
391       const instrumentedPhase &phase = allData.phases[p];
392       const idleTimeContainer &idle = phase.idleTime;
393       //      vector<MergeablePathHistory> const &criticalPaths = phase.criticalPaths;
394       vector<double> const &times = phase.times;
395
396       CkPrintf("Phase %d:\n", p);
397       idle.print();
398      //   CkPrintf("critical paths: (* affected by control point)\n");
399         //   for(int i=0;i<criticalPaths.size(); i++){
400         // If affected by a control point
401         //      criticalPaths[i].print();
402
403       //        criticalPaths[i].print();
404       //        CkPrintf("\n");
405         
406
407       //   }
408       CkPrintf("Timings:\n");
409       for(int i=0;i<times.size(); i++){
410         CkPrintf("%lf ", times[i]);
411       }
412       CkPrintf("\n");
413
414     }
415     
416     CkPrintf("\n\n");
417
418
419 #endif
420
421
422
423     //==========================================================================================
424     // If this is a phase during which we try to adapt control point values based on critical path
425 #if 0
426     if( s%5 == 4) {
427
428       // Find the most recent phase with valid critical path data and idle time measurements      
429       int whichPhase=-1;
430       for(int p=0; p<s; ++p){
431         const instrumentedPhase &phase = allData.phases[p];
432         const idleTimeContainer &idle = phase.idleTime;
433         if(idle.isValid() && phase.criticalPaths.size()>0 ){
434           whichPhase = p;
435         }
436       }
437       
438       
439       CkPrintf("Examining phase %d which has a valid idle time and critical paths\n", whichPhase);
440       const instrumentedPhase &phase = allData.phases[whichPhase];
441       const idleTimeContainer &idle = phase.idleTime;
442       
443       if(idle.min > 0.1){
444         CkPrintf("Min PE idle is HIGH. %.2lf%% > 10%%\n", idle.min*100.0);
445         
446         // Determine the median critical path for this phase
447         int medianCriticalPathIdx = phase.medianCriticalPathIdx();
448         const PathHistory &path = phase.criticalPaths[medianCriticalPathIdx];
449         CkAssert(phase.criticalPaths.size() > 0);
450         CkAssert(phase.criticalPaths.size() > medianCriticalPathIdx); 
451         
452         CkPrintf("Median Critical Path has time %lf\n", path.getTotalTime());
453         
454         if(phase.times[medianCriticalPathIdx] > 1.2 * path.getTotalTime()){
455           CkPrintf("The application step(%lf) is taking significantly longer than the critical path(%lf). BAD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
456
457
458           CkPrintf("Finding control points related to the critical path\n");
459           int cpcount = 0;
460           std::set<std::string> controlPointsAffectingCriticalPath;
461
462           
463           for(int e=0;e<path.getNumUsed();e++){
464             if(path.getUsedCount(e)>0){
465               int ep = path.getUsedEp(e);
466
467               std::map<std::string, std::set<int> >::iterator iter;
468               for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
469                 if(iter->second.count(ep)>0){
470                   controlPointsAffectingCriticalPath.insert(iter->first);
471                   CkPrintf("Control Point \"%s\" affects the critical path\n", iter->first.c_str());
472                   cpcount++;
473                 }
474               }
475               
476             }
477           }
478           
479
480           if(cpcount==0){
481             CkPrintf("No control points are known to affect the critical path\n");
482           } else {
483             double beginTime = CmiWallTimer();
484
485             CkPrintf("Attempting to modify control point values for %d control points that affect the critical path\n", controlPointsAffectingCriticalPath.size());
486             
487             newControlPoints = phase.controlPoints;
488             
489             if(frameworkShouldAdvancePhase){
490               gotoNextPhase();  
491             }
492             
493             if(haveControlPointChangeCallback){ 
494 #if DEBUGPRINT
495               CkPrintf("Calling control point change callback\n");
496 #endif
497               // Create a high priority message and send it to the callback
498               controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
499               *((int*)CkPriorityPtr(msg)) = -INT_MAX;
500               CkSetQueueing(msg, CK_QUEUEING_IFIFO);
501               controlPointChangeCallback.send(msg);
502               
503             }
504             
505             
506             // adjust the control points that can affect the critical path
507
508             char textDescription[4096*2];
509             textDescription[0] = '\0';
510
511             std::map<std::string,int>::iterator newCP;
512             for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
513               if( controlPointsAffectingCriticalPath.count(newCP->first) > 0 ){
514                 // decrease the value (increase priority) if within range
515                 int lowerbound = controlPointSpace[newCP->first].first;
516                 if(newCP->second > lowerbound){
517                   newControlPointsAvailable = true;
518                   newCP->second --;
519                 }
520               }
521             }
522            
523             // Create a string for a projections user event
524             if(1){
525               std::map<std::string,int>::iterator newCP;
526               for(newCP = newControlPoints.begin(); newCP != newControlPoints.end(); ++ newCP){
527                 sprintf(textDescription+strlen(textDescription), "<br>\"%s\"=%d", newCP->first.c_str(), newCP->second);
528               }
529             }
530
531             char *userEventString = new char[4096*2];
532             sprintf(userEventString, "Adjusting control points affecting critical path: %s ***", textDescription);
533             
534             static int userEventCounter = 20000;
535             CkPrintf("USER EVENT: %s\n", userEventString);
536             
537             traceRegisterUserEvent(userEventString, userEventCounter); 
538             traceUserBracketEvent(userEventCounter, beginTime, CmiWallTimer());
539             userEventCounter++;
540             
541           }
542           
543         } else {
544           CkPrintf("The application step(%lf) is dominated mostly by the critical path(%lf). GOOD\n",phase.times[medianCriticalPathIdx], path.getTotalTime() );
545         }
546         
547         
548       }
549       
550     } else {
551       
552       
553     }
554    
555 #endif
556
557
558
559     if(frameworkShouldAdvancePhase){
560       gotoNextPhase();  
561     }
562     
563     if(haveControlPointChangeCallback){ 
564       // Create a high priority message and send it to the callback
565       controlPointMsg *msg = new (8*sizeof(int)) controlPointMsg; 
566       *((int*)CkPriorityPtr(msg)) = -INT_MAX;
567       CkSetQueueing(msg, CK_QUEUEING_IFIFO);
568       controlPointChangeCallback.send(msg);
569     }
570     
571   }
572   
573   /// Determine if any control point is known to affect an entry method
574   bool controlPointManager::controlPointAffectsThisEP(int ep){
575     std::map<std::string, std::set<int> >::iterator iter;
576     for(iter=affectsPrioritiesEP.begin(); iter!= affectsPrioritiesEP.end(); ++iter){
577       if(iter->second.count(ep)>0){
578         return true;
579       }
580     }
581     return false;    
582   }
583   
584   /// Determine if any control point is known to affect a chare array  
585   bool controlPointManager::controlPointAffectsThisArray(int array){
586     std::map<std::string, std::set<int> >::iterator iter;
587     for(iter=affectsPrioritiesArray.begin(); iter!= affectsPrioritiesArray.end(); ++iter){
588       if(iter->second.count(array)>0){
589         return true;
590       }
591     }
592     return false;   
593   }
594   
595
596   /// The data from the current phase
597   instrumentedPhase * controlPointManager::currentPhaseData(){
598     int s = allData.phases.size();
599     CkAssert(s>=1);
600     return allData.phases[s-1];
601   }
602  
603
604   /// The data from the previous phase
605   instrumentedPhase * controlPointManager::previousPhaseData(){
606     int s = allData.phases.size();
607     if(s >= 2 && phase_id > 0) {
608       return allData.phases[s-2];
609     } else {
610       return NULL;
611     }
612   }
613  
614   /// The data from two phases back
615   instrumentedPhase * controlPointManager::twoAgoPhaseData(){
616     int s = allData.phases.size();
617     if(s >= 3 && phase_id > 0) {
618       return allData.phases[s-3];
619     } else {
620       return NULL;
621     }
622   }
623   
624
625   /// Called by either the application or the Control Point Framework to advance to the next phase  
626   void controlPointManager::gotoNextPhase(){
627     
628 #ifndef CP_DISABLE_TRACING
629     CkPrintf("gotoNextPhase shouldGatherAll=%d\n", (int)shouldGatherAll);
630     fflush(stdout);
631
632     if(shouldGatherAll && CkMyPe() == 0 && !alreadyRequestedAll){
633       alreadyRequestedAll = true;
634       CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherAll(NULL), 0, thisProxy);
635       CkPrintf("Requesting all measurements\n");
636       thisProxy.requestAll(*cb);
637       delete cb;
638     
639     } else {
640       
641       if(shouldGatherMemoryUsage && CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
642         alreadyRequestedMemoryUsage = true;
643         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
644         thisProxy.requestMemoryUsage(*cb);
645         delete cb;
646       }
647       
648       if(shouldGatherUtilization && CkMyPe() == 0 && !alreadyRequestedIdleTime){
649         alreadyRequestedIdleTime = true;
650         CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherIdleTime(NULL), 0, thisProxy);
651         thisProxy.requestIdleTime(*cb);
652         delete cb;
653       }
654     }
655     
656
657 #endif
658
659
660
661 #if CMK_LBDB_ON && 0
662     LBDatabase * myLBdatabase = LBDatabaseObj();
663     LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
664     const CkVec<LBObj*> objs = myLBDB->getObjs();
665     const int objCount = myLBDB->getObjCount();
666     CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
667     
668     floatType maxObjWallTime = -1.0;
669     
670     for(int i=0;i<objs.length();i++){
671       LBObj* o = objs[i];
672       const LDObjData d = o->ObjData();
673       floatType cpuTime = d.cpuTime;
674       floatType wallTime = d.wallTime;
675       // can also get object handles from the LDObjData struct
676       CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
677       if(wallTime > maxObjWallTime){
678
679       }
680       
681     }
682
683     myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
684     
685 #endif    
686
687
688     
689     // increment phase id
690     phase_id++;
691     
692
693     // Create new entry for the phase we are starting now
694     instrumentedPhase * newPhase = new instrumentedPhase();
695     allData.phases.push_back(newPhase);
696     
697     CkPrintf("Now in phase %d allData.phases.size()=%d\n", phase_id, allData.phases.size());
698
699   }
700
701   /// An application uses this to register an instrumented timing for this phase
702   void controlPointManager::setTiming(double time){
703     currentPhaseData()->times.push_back(time);
704
705 #ifdef USE_CRITICAL_PATH_HEADER_ARRAY
706        
707     // First we should register this currently executing message as a path, because it is likely an important one to consider.
708     //    registerTerminalEntryMethod();
709     
710     // save the critical path for this phase
711     //   thisPhaseData.criticalPaths.push_back(maxTerminalPathHistory);
712     //    maxTerminalPathHistory.reset();
713     
714     
715     // Reset the counts for the currently executing message
716     //resetThisEntryPath();
717         
718 #endif
719     
720   }
721   
722   /// Entry method called on all PEs to request CPU utilization statistics
723   void controlPointManager::requestIdleTime(CkCallback cb){
724 #ifndef CP_DISABLE_TRACING
725     double i = localControlPointTracingInstance()->idleRatio();
726     double idle[3];
727     idle[0] = i;
728     idle[1] = i;
729     idle[2] = i;
730     
731     //    CkPrintf("[%d] idleRatio=%f\n", CkMyPe(), i);
732     
733     localControlPointTracingInstance()->resetTimings();
734
735     contribute(3*sizeof(double),idle,idleTimeReductionType, cb);
736 #else
737     CkAbort("Should not get here\n");
738 #endif
739   }
740   
741   /// All processors reduce their memory usages in requestIdleTime() to this method
742   void controlPointManager::gatherIdleTime(CkReductionMsg *msg){
743 #ifndef CP_DISABLE_TRACING
744     int size=msg->getSize() / sizeof(double);
745     CkAssert(size==3);
746     double *r=(double *) msg->getData();
747         
748     instrumentedPhase* prevPhase = previousPhaseData();
749     if(prevPhase != NULL){
750       prevPhase->idleTime.min = r[0];
751       prevPhase->idleTime.avg = r[1]/CkNumPes();
752       prevPhase->idleTime.max = r[2];
753       prevPhase->idleTime.print();
754       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf in prevPhase=%p\n", prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, prevPhase);
755     } else {
756       CkPrintf("There is no previous phase to store the idle time measurements\n");
757     }
758     
759     alreadyRequestedIdleTime = false;
760     checkForShutdown();
761     delete msg;
762 #else
763     CkAbort("Should not get here\n");
764 #endif
765   }
766
767
768
769
770
771
772   /// Entry method called on all PEs to request CPU utilization statistics and memory usage
773   void controlPointManager::requestAll(CkCallback cb){
774 #ifndef CP_DISABLE_TRACING
775     TraceControlPoints *t = localControlPointTracingInstance();
776
777     double data[ALL_REDUCTION_SIZE];
778
779     double *idle = data;
780     double *over = data+3;
781     double *mem = data+6;
782     double *msgBytes = data+7;  
783     double *grainsize = data+11;  
784
785     const double i = t->idleRatio();
786     idle[0] = i;
787     idle[1] = i;
788     idle[2] = i;
789
790     const double o = t->overheadRatio();
791     over[0] = o;
792     over[1] = o;
793     over[2] = o;
794
795     const double m = t->memoryUsageMB();
796     mem[0] = m;
797
798     msgBytes[0] = t->b2;
799     msgBytes[1] = t->b3;
800     msgBytes[2] = t->b2mlen;
801     msgBytes[3] = t->b3mlen;
802
803     grainsize[0] = t->grainSize();
804     
805     localControlPointTracingInstance()->resetAll();
806
807     contribute(ALL_REDUCTION_SIZE*sizeof(double),data,allMeasuresReductionType, cb);
808 #else
809     CkAbort("Should not get here\n");
810 #endif
811   }
812   
813   /// All processors reduce their memory usages in requestIdleTime() to this method
814   void controlPointManager::gatherAll(CkReductionMsg *msg){
815 #ifndef CP_DISABLE_TRACING
816     CkAssert(msg->getSize()==ALL_REDUCTION_SIZE*sizeof(double));
817     int size=msg->getSize() / sizeof(double);
818     double *data=(double *) msg->getData();
819         
820     double *idle = data;
821     double *over = data+3;
822     double *mem = data+6;
823     double *msgBytes = data+7;
824     double *granularity = data+11;
825
826
827     //    std::string b = allData.toString();
828
829     instrumentedPhase* prevPhase = previousPhaseData();
830     if(prevPhase != NULL){
831       prevPhase->idleTime.min = idle[0];
832       prevPhase->idleTime.avg = idle[1]/CkNumPes();
833       prevPhase->idleTime.max = idle[2];
834       
835       prevPhase->overheadTime.min = over[0];
836       prevPhase->overheadTime.avg = over[1]/CkNumPes();
837       prevPhase->overheadTime.max = over[2];
838       
839       prevPhase->memoryUsageMB = mem[0];
840       
841       CkPrintf("Stored idle time min=%lf avg=%lf max=%lf  mem=%lf in prevPhase=%p\n", (double)prevPhase->idleTime.min, prevPhase->idleTime.avg, prevPhase->idleTime.max, (double)prevPhase->memoryUsageMB, prevPhase);
842
843       double bytesPerInvoke2 = msgBytes[2] / msgBytes[0];
844       double bytesPerInvoke3 = msgBytes[3] / msgBytes[1];
845
846       /** The average of the grain sizes on all PEs in us */
847       prevPhase->grainSize = (granularity[0] / (double)CkNumPes());
848
849       CkPrintf("Bytes Per Invokation 2 = %f\n", bytesPerInvoke2);
850       CkPrintf("Bytes Per Invokation 3 = %f\n", bytesPerInvoke3);
851
852       CkPrintf("Bytes Per us of work 2 = %f\n", bytesPerInvoke2/prevPhase->grainSize);
853       CkPrintf("Bytes Per us of work 3 = %f\n", bytesPerInvoke3/prevPhase->grainSize);
854
855       if(bytesPerInvoke2 > bytesPerInvoke3)
856         prevPhase->bytesPerInvoke = bytesPerInvoke2;
857       else
858         prevPhase->bytesPerInvoke = bytesPerInvoke3;
859
860       //      prevPhase->print();
861       // CkPrintf("prevPhase=%p number of timings=%d\n", prevPhase, prevPhase->times.size() );
862
863       //      std::string a = allData.toString();
864
865       //CkPrintf("Before:\n%s\nAfter:\n%s\n\n", b.c_str(), a.c_str());
866       
867     } else {
868       CkPrintf("There is no previous phase to store measurements\n");
869     }
870     
871     alreadyRequestedAll = false;
872     checkForShutdown();
873     delete msg;
874 #else
875     CkAbort("Should not get here\n");
876 #endif
877   }
878
879
880
881
882   void controlPointManager::checkForShutdown(){
883     if( exitWhenReady && !alreadyRequestedAll && !alreadyRequestedMemoryUsage && !alreadyRequestedIdleTime && CkMyPe()==0){
884       doExitNow();
885     }
886   }
887
888
889   void controlPointManager::exitIfReady(){
890      if( !alreadyRequestedMemoryUsage && !alreadyRequestedAll && !alreadyRequestedIdleTime && CkMyPe()==0){
891        CkPrintf("controlPointManager::exitIfReady exiting immediately\n");
892        doExitNow();
893      } else {
894        CkPrintf("controlPointManager::exitIfReady Delaying exiting\n");
895        exitWhenReady = true;
896      }
897   }
898
899
900
901   void controlPointManager::doExitNow(){
902           writeOutputToDisk();
903           CkPrintf("[%d] Control point manager calling CkExit()\n", CkMyPe());
904           CkExit();
905   }
906
907   void controlPointManager::writeOutputToDisk(){
908           if(writeDataFileAtShutdown){
909                   controlPointManagerProxy.ckLocalBranch()->writeDataFile();
910           }
911   }
912
913
914   /// Entry method called on all PEs to request memory usage
915   void controlPointManager::requestMemoryUsage(CkCallback cb){
916     int m = CmiMaxMemoryUsage() / 1024 / 1024;
917     CmiResetMaxMemory();
918     //    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
919     contribute(sizeof(int),&m,CkReduction::max_int, cb);
920   }
921
922   /// All processors reduce their memory usages to this method
923   void controlPointManager::gatherMemoryUsage(CkReductionMsg *msg){
924     int size=msg->getSize() / sizeof(int);
925     CkAssert(size==1);
926     int *m=(int *) msg->getData();
927
928     CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
929     
930     instrumentedPhase* prevPhase = previousPhaseData();
931     if(prevPhase != NULL){
932       prevPhase->memoryUsageMB = m[0];
933     } else {
934       CkPrintf("No place to store memory usage");
935     }
936
937     alreadyRequestedMemoryUsage = false;
938     checkForShutdown();
939     delete msg;
940   }
941
942
943 //   /// Inform the control point framework that a named control point affects the priorities of some array  
944 //   void controlPointManager::associatePriorityArray(const char *name, int groupIdx){
945 //     CkPrintf("Associating control point \"%s\" affects priority of array id=%d\n", name, groupIdx );
946     
947 //     if(affectsPrioritiesArray.count(std::string(name)) > 0 ) {
948 //       affectsPrioritiesArray[std::string(name)].insert(groupIdx);
949 //     } else {
950 //       std::set<int> s;
951 //       s.insert(groupIdx);
952 //       affectsPrioritiesArray[std::string(name)] = s;
953 //     }
954     
955 // #if DEBUGPRINT   
956 //     std::map<std::string, std::set<int> >::iterator f;
957 //     for(f=affectsPrioritiesArray.begin(); f!=affectsPrioritiesArray.end();++f){
958 //       std::string name = f->first;
959 //       std::set<int> &vals = f->second;
960 //       cout << "Control point " << name << " affects arrays: ";
961 //       std::set<int>::iterator i;
962 //       for(i=vals.begin(); i!=vals.end();++i){
963 //      cout << *i << " ";
964 //       }
965 //       cout << endl;
966 //     }
967 // #endif
968     
969 //   }
970   
971 //   /// Inform the control point framework that a named control point affects the priority of some entry method
972 //   void controlPointManager::associatePriorityEntry(const char *name, int idx){
973 //     CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
974
975 //       if(affectsPrioritiesEP.count(std::string(name)) > 0 ) {
976 //       affectsPrioritiesEP[std::string(name)].insert(idx);
977 //     } else {
978 //       std::set<int> s;
979 //       s.insert(idx);
980 //       affectsPrioritiesEP[std::string(name)] = s;
981 //     }
982     
983 // #if DEBUGPRINT
984 //     std::map<std::string, std::set<int> >::iterator f;
985 //     for(f=affectsPrioritiesEP.begin(); f!=affectsPrioritiesEP.end();++f){
986 //       std::string name = f->first;
987 //       std::set<int> &vals = f->second;
988 //       cout << "Control point " << name << " affects EP: ";
989 //       std::set<int>::iterator i;
990 //       for(i=vals.begin(); i!=vals.end();++i){
991 //      cout << *i << " ";
992 //       }
993 //       cout << endl;
994 //     }
995 // #endif
996
997
998 //   }
999   
1000
1001
1002 /// An interface callable by the application.
1003 void gotoNextPhase(){
1004   controlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
1005 }
1006
1007 FDECL void FTN_NAME(GOTONEXTPHASE,gotonextphase)()
1008 {
1009   gotoNextPhase();
1010 }
1011
1012
1013 /// A mainchare that is used just to create our controlPointManager group at startup
1014 class controlPointMain : public CBase_controlPointMain {
1015 public:
1016   controlPointMain(CkArgMsg* args){
1017 #if OLDRANDSEED
1018     struct timeval tp;
1019     gettimeofday(& tp, NULL);
1020     random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
1021 #else
1022     double time = CmiWallTimer();
1023     int sec = (int)time;
1024     int usec = (int)((time - (double)sec)*1000000.0);
1025     random_seed =  sec ^ usec;
1026 #endif
1027     
1028     
1029     double period;
1030     bool haveSamplePeriod = CmiGetArgDoubleDesc(args->argv,"+CPSamplePeriod", &period,"The time between Control Point Framework samples (in seconds)");
1031     if(haveSamplePeriod){
1032       CkPrintf("controlPointSamplePeriod = %ld sec\n", period);
1033       controlPointSamplePeriod =  period * 1000; /**< A readonly */
1034     } else {
1035       controlPointSamplePeriod =  DEFAULT_CONTROL_POINT_SAMPLE_PERIOD;
1036     }
1037     
1038     
1039     
1040     whichTuningScheme = RandomSelection;
1041
1042
1043     if( CmiGetArgFlagDesc(args->argv,"+CPSchemeRandom","Randomly Select Control Point Values") ){
1044       whichTuningScheme = RandomSelection;
1045     } else if ( CmiGetArgFlagDesc(args->argv,"+CPExhaustiveSearch","Exhaustive Search of Control Point Values") ){
1046       whichTuningScheme = ExhaustiveSearch;
1047     } else if ( CmiGetArgFlagDesc(args->argv,"+CPAlwaysUseDefaults","Always Use The Provided Default Control Point Values") ){
1048       whichTuningScheme = AlwaysDefaults;
1049     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimulAnneal","Simulated Annealing Search of Control Point Values") ){
1050       whichTuningScheme = SimulatedAnnealing;
1051     } else if ( CmiGetArgFlagDesc(args->argv,"+CPCriticalPathPrio","Use Critical Path to adapt Control Point Values") ){
1052       whichTuningScheme = CriticalPathAutoPrioritization;
1053     } else if ( CmiGetArgFlagDesc(args->argv,"+CPBestKnown","Use BestKnown Timing for Control Point Values") ){
1054       whichTuningScheme = UseBestKnownTiming;
1055     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSteering","Use Steering to adjust Control Point Values") ){
1056       whichTuningScheme = UseSteering;
1057     } else if ( CmiGetArgFlagDesc(args->argv,"+CPMemoryAware", "Adjust control points to approach available memory") ){
1058       whichTuningScheme = MemoryAware;
1059     } else if ( CmiGetArgFlagDesc(args->argv,"+CPSimplex", "Nelder-Mead Simplex Algorithm") ){
1060       whichTuningScheme = Simplex;
1061     } else if ( CmiGetArgFlagDesc(args->argv,"+CPDivideConquer", "A divide and conquer program specific steering scheme") ){
1062       whichTuningScheme = DivideAndConquer;
1063     }
1064
1065     char *defValStr = NULL;
1066     if( CmiGetArgStringDesc(args->argv, "+CPDefaultValues", &defValStr, "Specify the default control point values used for the first couple phases") ){
1067       CkPrintf("You specified default value string: %s\n", defValStr);
1068       
1069       // Parse the string, looking for commas
1070      
1071
1072       char *tok = strtok(defValStr, ",");
1073       while (tok) {
1074         // split on the equal sign
1075         int len = strlen(tok);
1076         char *eqsign = strchr(tok, '=');
1077         if(eqsign != NULL && eqsign != tok){
1078           *eqsign = '\0';
1079           char *cpname = tok;
1080           std::string cpName(tok);
1081           char *cpDefVal = eqsign+1;      
1082           int v=-1;
1083           if(sscanf(cpDefVal, "%d", &v) == 1){
1084             CkPrintf("Command Line Argument Specifies that Control Point \"%s\" defaults to %d\n", cpname, v);
1085             CkAssert(CkMyPe() == 0); // can only access defaultControlPointValues on PE 0
1086             defaultControlPointValues[cpName] = v;
1087           }
1088         }
1089         tok = strtok(NULL, ",");
1090       }
1091
1092     }
1093
1094     shouldGatherAll = false;
1095     shouldGatherMemoryUsage = false;
1096     shouldGatherUtilization = false;
1097     
1098     if ( CmiGetArgFlagDesc(args->argv,"+CPGatherAll","Gather all types of measurements for each phase") ){
1099       shouldGatherAll = true;
1100     } else {
1101       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherMemoryUsage","Gather memory usage after each phase") ){
1102         shouldGatherMemoryUsage = true;
1103       }
1104       if ( CmiGetArgFlagDesc(args->argv,"+CPGatherUtilization","Gather utilization & Idle time after each phase") ){
1105         shouldGatherUtilization = true;
1106       }
1107     }
1108     
1109     writeDataFileAtShutdown = false;   
1110     if( CmiGetArgFlagDesc(args->argv,"+CPSaveData","Save Control Point timings & configurations at completion") ){
1111       writeDataFileAtShutdown = true;
1112     }
1113
1114     shouldFilterOutputData = true;
1115     if( CmiGetArgFlagDesc(args->argv,"+CPNoFilterData","Don't filter phases from output data") ){
1116       shouldFilterOutputData = false;
1117     }
1118
1119
1120    loadDataFileAtStartup = false;   
1121     if( CmiGetArgFlagDesc(args->argv,"+CPLoadData","Load Control Point timings & configurations at startup") ){
1122       loadDataFileAtStartup = true;
1123     }
1124
1125     char *cpdatafile;
1126     if( CmiGetArgStringDesc(args->argv, "+CPDataFilename", &cpdatafile, "Specify control point data file to save/load") ){
1127       sprintf(CPDataFilename, "%s", cpdatafile);
1128     } else {
1129       sprintf(CPDataFilename, "controlPointData.txt");
1130     }
1131
1132
1133     controlPointManagerProxy = CProxy_controlPointManager::ckNew();
1134   }
1135   ~controlPointMain(){}
1136 };
1137
1138 /// An interface callable by the application.
1139 void registerCPChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
1140   CkAssert(CkMyPe() == 0);
1141   CkPrintf("Application has registered a control point change callback\n");
1142   controlPointManagerProxy.ckLocalBranch()->setCPCallback(cb, frameworkShouldAdvancePhase);
1143 }
1144
1145 /// An interface callable by the application.
1146 void setFrameworkAdvancePhase(bool frameworkShouldAdvancePhase){
1147   if(CkMyPe() == 0){
1148     CkPrintf("Application has specified that framework should %sadvance phase\n", frameworkShouldAdvancePhase?"":"not ");
1149     controlPointManagerProxy.ckLocalBranch()->setFrameworkAdvancePhase(frameworkShouldAdvancePhase);
1150   }
1151 }
1152
1153 /// An interface callable by the application.
1154 void registerControlPointTiming(double time){
1155   CkAssert(CkMyPe() == 0);
1156 #if DEBUGPRINT>0
1157   CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
1158 #endif
1159   controlPointManagerProxy.ckLocalBranch()->setTiming(time);
1160 }
1161
1162 /// An interface callable by the application.
1163 void controlPointTimingStamp() {
1164   CkAssert(CkMyPe() == 0);
1165 #if DEBUGPRINT>0
1166   CkPrintf("Program registering its own timing with controlPointTimingStamp()\n", time);
1167 #endif
1168   
1169   static double prev_time = 0.0;
1170   double t = CmiWallTimer();
1171   double duration = t - prev_time;
1172   prev_time = t;
1173     
1174   controlPointManagerProxy.ckLocalBranch()->setTiming(duration);
1175 }
1176
1177 FDECL void FTN_NAME(CONTROLPOINTTIMINGSTAMP,controlpointtimingstamp)()
1178 {
1179   controlPointTimingStamp();
1180 }
1181
1182
1183
1184
1185
1186 /// Shutdown the control point framework, writing data to disk if necessary
1187 extern "C" void controlPointShutdown(){
1188   if(CkMyPe() == 0){
1189
1190     // wait for gathering of idle time & memory usage to complete
1191     controlPointManagerProxy.ckLocalBranch()->exitIfReady();
1192
1193   }
1194 }
1195
1196 /// A function called at startup on each node to register controlPointShutdown() to be called at CkExit()
1197 void controlPointInitNode(){
1198 //  CkPrintf("controlPointInitNode()\n");
1199   registerExitFn(controlPointShutdown);
1200 }
1201
1202 /// Called periodically to allow control point framework to do things periodically
1203 static void periodicProcessControlPoints(void* ptr, double currWallTime){
1204 #ifdef DEBUGPRINT
1205   CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
1206 #endif
1207   controlPointManagerProxy.ckLocalBranch()->processControlPoints();
1208   CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, controlPointSamplePeriod, CkMyPe());
1209 }
1210
1211
1212
1213
1214
1215 /// Determine a control point value using some optimization scheme (use max known, simmulated annealling, 
1216 /// user observed characteristic to adapt specific control point values.
1217 /// This function must return valid values for newControlPoints.
1218 void controlPointManager::generatePlan() {
1219   const double startGenerateTime = CmiWallTimer();
1220   const int phase_id = this->phase_id;
1221   const int effective_phase = allData.phases.size();
1222
1223   // Only generate a plan once per phase
1224   if(generatedPlanForStep == phase_id) 
1225     return;
1226   generatedPlanForStep = phase_id;
1227  
1228   CkPrintf("Generating Plan for phase %d\n", phase_id); 
1229   printTuningScheme();
1230
1231   // By default lets put the previous phase data into newControlPoints
1232   instrumentedPhase *prevPhase = previousPhaseData();
1233   for(std::map<std::string, int >::const_iterator cpsIter=prevPhase->controlPoints.begin(); cpsIter != prevPhase->controlPoints.end(); ++cpsIter){
1234           newControlPoints[cpsIter->first] = cpsIter->second;
1235   }
1236
1237
1238   if( whichTuningScheme == RandomSelection ){
1239     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1240     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1241       const std::string &name = cpsIter->first;
1242       const std::pair<int,int> &bounds = cpsIter->second;
1243       const int lb = bounds.first;
1244       const int ub = bounds.second;
1245       newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1246     }
1247   } else if( whichTuningScheme == CriticalPathAutoPrioritization) {
1248     // -----------------------------------------------------------
1249     //  USE CRITICAL PATH TO ADJUST PRIORITIES
1250     //   
1251     // This scheme will return the median value for the range 
1252     // early on, and then will transition over to the new control points
1253     // determined by the critical path adapting code
1254
1255     // This won't work until the periodic function is fixed up a bit
1256
1257     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1258     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1259       const std::string &name = cpsIter->first;
1260       const std::pair<int,int> &bounds = cpsIter->second;
1261       const int lb = bounds.first;
1262       const int ub = bounds.second;
1263       newControlPoints[name] =  (lb+ub)/2;
1264     }
1265
1266   } else if ( whichTuningScheme == MemoryAware ) {
1267
1268     // -----------------------------------------------------------
1269     //  STEERING BASED ON MEMORY USAGE
1270
1271     instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1272     instrumentedPhase *prevPhase = previousPhaseData();
1273  
1274     if(phase_id%2 == 0){
1275       CkPrintf("Steering (memory based) based on 2 phases ago:\n");
1276       twoAgoPhase->print();
1277       CkPrintf("\n");
1278       fflush(stdout);
1279       
1280       // See if memory usage is low:
1281       double memUsage = twoAgoPhase->memoryUsageMB;
1282       CkPrintf("Steering (memory based) encountered memory usage of (%f MB)\n", memUsage);
1283       fflush(stdout);
1284       if(memUsage < 1100.0 && memUsage > 0.0){ // Kraken has about 16GB and 12 cores per node
1285         CkPrintf("Steering (memory based) encountered low memory usage (%f) < 1200 \n", memUsage);
1286         CkPrintf("Steering (memory based) controlPointSpace.size()=\n", controlPointSpace.size());
1287         
1288         // Initialize plan to be the values from two phases ago (later we'll adjust this)
1289         newControlPoints = twoAgoPhase->controlPoints;
1290
1291
1292         CkPrintf("Steering (memory based) initialized plan\n");
1293         fflush(stdout);
1294
1295         // look for a possible control point knob to turn
1296         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["MemoryConsumption"];
1297         
1298         // FIXME: assume for now that we just have one control point with the effect, and one direction to turn it
1299         bool found = false;
1300         std::string cpName;
1301         std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1302         std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1303         for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1304           cpName = iter->first;
1305           info = &iter->second;
1306           found = true;
1307           break;
1308         }
1309
1310         // Adapt the control point value
1311         if(found){
1312           CkPrintf("Steering found knob to turn that should increase memory consumption\n");
1313           fflush(stdout);
1314           const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1315           const int maxValue = controlPointSpace[cpName].second;
1316           
1317           if(twoAgoValue+1 <= maxValue){
1318             newControlPoints[cpName] = twoAgoValue+1; // increase from two phases back
1319           }
1320         }
1321         
1322       }
1323     }
1324
1325     CkPrintf("Steering (memory based) done for this phase\n");
1326     fflush(stdout);
1327
1328   } else if ( whichTuningScheme == UseBestKnownTiming ) {
1329
1330     // -----------------------------------------------------------
1331     //  USE BEST KNOWN TIME  
1332     static bool firstTime = true;
1333     if(firstTime){
1334       firstTime = false;
1335       instrumentedPhase *best = allData.findBest(); 
1336       CkPrintf("Best known phase is:\n");
1337       best->print();
1338       CkPrintf("\n");
1339       
1340       std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1341       for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
1342         const std::string &name = cpsIter->first;
1343         newControlPoints[name] =  best->controlPoints[name];
1344       }
1345     }
1346
1347   } else if ( whichTuningScheme == UseSteering ) {
1348           // -----------------------------------------------------------
1349           //  STEERING BASED ON KNOWLEDGE
1350
1351           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1352           // plans are only generated after 3 phases
1353
1354           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1355           instrumentedPhase *prevPhase = previousPhaseData();
1356
1357           if(phase_id%4 == 0){
1358                   CkPrintf("Steering based on 2 phases ago:\n");
1359                   twoAgoPhase->print();
1360                   CkPrintf("\n");
1361                   fflush(stdout);
1362
1363                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1364
1365
1366                   // ========================================= Concurrency =============================================
1367                   // See if idle time is high:
1368                   {
1369                           double idleTime = twoAgoPhase->idleTime.avg;
1370                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1371                           fflush(stdout);
1372                           if(idleTime > 0.10){
1373                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1374                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1375
1376                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1377
1378                                   bool found = false;
1379                                   std::string cpName;
1380                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1381                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1382                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1383                                           cpName = iter->first;
1384                                           info = &iter->second;
1385
1386                                           // Initialize a new plan based on two phases ago
1387                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1388
1389                                           CkPrintf("Steering found knob to turn\n");
1390                                           fflush(stdout);
1391
1392                                           if(info->first == ControlPoint::EFF_INC){
1393                                                   const int maxValue = controlPointSpace[cpName].second;
1394                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1395                                                   if(twoAgoValue+1 <= maxValue){
1396                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1397                                                   }
1398                                           } else {
1399                                                   const int minValue = controlPointSpace[cpName].second;
1400                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1401                                                   if(twoAgoValue-1 >= minValue){
1402                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1403                                                   }
1404                                           }
1405
1406                                           possibleNextStepPlans.push_back(aNewPlan);
1407
1408                                   }
1409                           }
1410                   }
1411
1412                   // ========================================= Grain Size =============================================
1413                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1414                   {
1415                           double overheadTime = twoAgoPhase->overheadTime.avg;
1416                           CkPrintf("Steering encountered overhead time (%f)\n", overheadTime);
1417                           fflush(stdout);
1418                           if(overheadTime > 0.10){
1419                                   CkPrintf("Steering encountered high overhead time(%f) > 10%%\n", overheadTime);
1420                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1421
1422                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GrainSize"];
1423
1424                                   bool found = false;
1425                                   std::string cpName;
1426                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1427                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1428                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1429                                           cpName = iter->first;
1430                                           info = &iter->second;
1431
1432                                           // Initialize a new plan based on two phases ago
1433                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1434
1435
1436
1437                                           CkPrintf("Steering found knob to turn\n");
1438                                           fflush(stdout);
1439
1440                                           if(info->first == ControlPoint::EFF_INC){
1441                                                   const int maxValue = controlPointSpace[cpName].second;
1442                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1443                                                   if(twoAgoValue+1 <= maxValue){
1444                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1445                                                   }
1446                                           } else {
1447                                                   const int minValue = controlPointSpace[cpName].second;
1448                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1449                                                   if(twoAgoValue-1 >= minValue){
1450                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1451                                                   }
1452                                           }
1453
1454                                           possibleNextStepPlans.push_back(aNewPlan);
1455
1456                                   }
1457
1458                           }
1459                   }
1460                   // ========================================= GPU Offload =============================================
1461                   // If the grain size is too small, there may be tons of messages and overhead time associated with scheduling
1462                   {
1463                           double idleTime = twoAgoPhase->idleTime.avg;
1464                           CkPrintf("Steering encountered idle time (%f)\n", idleTime);
1465                           fflush(stdout);
1466                           if(idleTime > 0.10){
1467                                   CkPrintf("Steering encountered high idle time(%f) > 10%%\n", idleTime);
1468                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1469
1470                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["GPUOffloadedWork"];
1471
1472                                   bool found = false;
1473                                   std::string cpName;
1474                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1475                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1476                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1477                                           cpName = iter->first;
1478                                           info = &iter->second;
1479
1480                                           // Initialize a new plan based on two phases ago
1481                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1482
1483
1484                                           CkPrintf("Steering found knob to turn\n");
1485                                           fflush(stdout);
1486
1487                                           if(info->first == ControlPoint::EFF_DEC){
1488                                                   const int maxValue = controlPointSpace[cpName].second;
1489                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1490                                                   if(twoAgoValue+1 <= maxValue){
1491                                                           aNewPlan[cpName] = twoAgoValue+1; // increase from two phases back
1492                                                   }
1493                                           } else {
1494                                                   const int minValue = controlPointSpace[cpName].second;
1495                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1496                                                   if(twoAgoValue-1 >= minValue){
1497                                                           aNewPlan[cpName] = twoAgoValue-1; // decrease from two phases back
1498                                                   }
1499                                           }
1500
1501                                           possibleNextStepPlans.push_back(aNewPlan);
1502
1503                                   }
1504
1505                           }
1506                   }
1507
1508                   // ========================================= Done =============================================
1509
1510
1511                   if(possibleNextStepPlans.size() > 0){
1512                           newControlPoints = possibleNextStepPlans[0];
1513                   }
1514
1515
1516                   CkPrintf("Steering done for this phase\n");
1517                   fflush(stdout);
1518
1519           }  else {
1520                   // This is not a phase to do steering, so stick with previously used values (one phase ago)
1521                   CkPrintf("not a phase to do steering, so stick with previously planned values (one phase ago)\n");
1522                   fflush(stdout);
1523           }
1524
1525
1526
1527   } else if( whichTuningScheme == SimulatedAnnealing ) {
1528
1529     // -----------------------------------------------------------
1530     //  SIMULATED ANNEALING
1531     //  Simulated Annealing style hill climbing method
1532     //
1533     //  Find the best search space configuration, and try something
1534     //  nearby it, with a radius decreasing as phases increase
1535
1536     CkPrintf("Finding best phase\n");
1537     instrumentedPhase *bestPhase = allData.findBest();  
1538     CkPrintf("best found:\n"); 
1539     bestPhase->print(); 
1540     CkPrintf("\n"); 
1541     
1542
1543     const int convergeByPhase = 100;
1544     // Determine from 0.0 to 1.0 how far along we are
1545     const double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
1546
1547     std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1548     for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1549       const std::string &name = cpsIter->first;
1550       const std::pair<int,int> &bounds = cpsIter->second;
1551       const int minValue = bounds.first;
1552       const int maxValue = bounds.second;
1553       
1554       const int before = bestPhase->controlPoints[name];   
1555   
1556       const int range = (maxValue-minValue+1)*(1.0-progress);
1557
1558       int high = min(before+range, maxValue);
1559       int low = max(before-range, minValue);
1560       
1561       newControlPoints[name] = low;
1562       if(high-low > 0){
1563         newControlPoints[name] += randInt(high-low, name.c_str(), phase_id); 
1564       } 
1565       
1566     }
1567
1568   } else if ( whichTuningScheme == DivideAndConquer ) {
1569
1570           // -----------------------------------------------------------
1571           //  STEERING FOR Divide & Conquer Programs
1572           //  This scheme uses no timing information. It just tries to converge to the point where idle time = overhead time.
1573           //  For a Fibonacci example, this appears to be a good heurstic for finding the best performing program.
1574           //  The scheme can be applied within a single program tree computation, if the tree is being traversed depth first.
1575
1576           // after 3 phases (and only on even steps), do steering performance. Otherwise, just use previous phase's configuration
1577           // plans are only generated after 3 phases
1578
1579           instrumentedPhase *twoAgoPhase = twoAgoPhaseData();
1580           instrumentedPhase *prevPhase = previousPhaseData();
1581
1582           if(phase_id%4 == 0){
1583                   CkPrintf("Divide & Conquer Steering based on 2 phases ago:\n");
1584                   twoAgoPhase->print();
1585                   CkPrintf("\n");
1586                   fflush(stdout);
1587
1588                   std::vector<std::map<std::string,int> > possibleNextStepPlans;
1589
1590
1591                   // ========================================= Concurrency =============================================
1592                   // See if idle time is high:
1593                   {
1594                           double idleTime = twoAgoPhase->idleTime.avg;
1595                           double overheadTime = twoAgoPhase->overheadTime.avg;
1596
1597
1598                           CkPrintf("Divide & Conquer Steering encountered overhead time (%f) idle time (%f)\n",overheadTime, idleTime);
1599                           fflush(stdout);
1600                           if(idleTime+overheadTime > 0.10){
1601                                   CkPrintf("Steering encountered high idle+overheadTime time(%f) > 10%%\n", idleTime+overheadTime);
1602                                   CkPrintf("Steering controlPointSpace.size()=\n", controlPointSpace.size());
1603
1604                                   int direction = -1;
1605                                   if (idleTime>overheadTime){
1606                                           // We need to decrease the grain size, or increase the available concurrency
1607                                           direction = 1;
1608                                   }
1609
1610                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > > &possibleCPsToTune = CkpvAccess(cp_effects)["Concurrency"];
1611
1612                                   bool found = false;
1613                                   std::string cpName;
1614                                   std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > *info;
1615                                   std::map<std::string, std::pair<int, std::vector<ControlPoint::ControlPointAssociation> > >::iterator iter;
1616                                   for(iter = possibleCPsToTune.begin(); iter != possibleCPsToTune.end(); iter++){
1617                                           cpName = iter->first;
1618                                           info = &iter->second;
1619                                           
1620                                           // Initialize a new plan based on two phases ago
1621                                           std::map<std::string,int> aNewPlan = twoAgoPhase->controlPoints;
1622                                           bool newPlanExists = false;
1623
1624                                           CkPrintf("Divide & Conquer Steering found knob to turn\n");
1625                                           fflush(stdout);
1626
1627
1628
1629                                           if(info->first == ControlPoint::EFF_INC){
1630                                                   const int minValue = controlPointSpace[cpName].first;
1631                                                   const int maxValue = controlPointSpace[cpName].second;
1632                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1633                                                   const int newVal = twoAgoValue+1*direction;
1634                                                   if(newVal <= maxValue && newVal >= minValue){
1635                                                     aNewPlan[cpName] = newVal;
1636                                                     newPlanExists = true;
1637                                                   } else {
1638                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1639                                                   }
1640                                           } else {
1641                                                   const int minValue = controlPointSpace[cpName].first;
1642                                                   const int maxValue = controlPointSpace[cpName].second;
1643                                                   const int twoAgoValue =  twoAgoPhase->controlPoints[cpName];
1644                                                   const int newVal = twoAgoValue-1*direction;
1645                                                   if(newVal <= maxValue && newVal >= minValue){
1646                                                           aNewPlan[cpName] = newVal;
1647                                                           newPlanExists = true;
1648                                                   } else {
1649                                                     CkPrintf("Divide & Conquer Steering found that turning the knob exceeds the control point's range (newVal=%d)\n", newVal);
1650                                                   }
1651                                           }
1652
1653                                           if(newPlanExists){
1654                                             possibleNextStepPlans.push_back(aNewPlan);
1655                                           }
1656                                   }
1657                           }
1658                   }
1659
1660                   if(possibleNextStepPlans.size() > 0){
1661                     CkPrintf("Divide & Conquer Steering found %d possible next phases, using first one\n", possibleNextStepPlans.size());
1662                     newControlPoints = possibleNextStepPlans[0];
1663                   } else {
1664                     CkPrintf("Divide & Conquer Steering found no possible next phases\n");
1665                   }
1666           }
1667
1668   } else if( whichTuningScheme == Simplex ) {
1669
1670           // -----------------------------------------------------------
1671           //  Nelder Mead Simplex Algorithm
1672           //
1673           //  A scheme that takes a simplex (n+1 points) and moves it
1674           //  toward the minimum, eventually converging there.
1675
1676           s.adapt(controlPointSpace, newControlPoints, phase_id, allData);
1677
1678   } else if( whichTuningScheme == ExhaustiveSearch ) {
1679     
1680     // -----------------------------------------------------------
1681     // EXHAUSTIVE SEARCH
1682    
1683     int numDimensions = controlPointSpace.size();
1684     CkAssert(numDimensions > 0);
1685   
1686     vector<int> lowerBounds(numDimensions);
1687     vector<int> upperBounds(numDimensions); 
1688   
1689     int d=0;
1690     std::map<std::string, pair<int,int> >::iterator iter;
1691     for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
1692       //    CkPrintf("Examining dimension %d\n", d);
1693       lowerBounds[d] = iter->second.first;
1694       upperBounds[d] = iter->second.second;
1695       d++;
1696     }
1697    
1698     // get names for each dimension (control point)
1699     vector<std::string> names(numDimensions);
1700     d=0;
1701     for(std::map<std::string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
1702       names[d] = niter->first;
1703       d++;
1704     }
1705   
1706   
1707     // Create the first possible configuration
1708     vector<int> config = lowerBounds;
1709     config.push_back(0);
1710   
1711     // Increment until finding an unused configuration
1712     allData.cleanupNames(); // put -1 values in for any control points missing
1713     std::vector<instrumentedPhase*> &phases = allData.phases;     
1714
1715     while(true){
1716     
1717       std::vector<instrumentedPhase*>::iterator piter; 
1718       bool testedConfiguration = false; 
1719       for(piter = phases.begin(); piter != phases.end(); piter++){ 
1720       
1721         // Test if the configuration matches this phase
1722         bool match = true;
1723         for(int j=0;j<numDimensions;j++){
1724           match &= (*piter)->controlPoints[names[j]] == config[j];
1725         }
1726       
1727         if(match){
1728           testedConfiguration = true; 
1729           break;
1730         } 
1731       } 
1732     
1733       if(testedConfiguration == false){ 
1734         break;
1735       } 
1736     
1737       // go to next configuration
1738       config[0] ++;
1739       // Propagate "carrys"
1740       for(int i=0;i<numDimensions;i++){
1741         if(config[i] > upperBounds[i]){
1742           config[i+1]++;
1743           config[i] = lowerBounds[i];
1744         }
1745       }
1746       // check if we have exhausted all possible values
1747       if(config[numDimensions] > 0){
1748         break;
1749       }
1750        
1751     }
1752   
1753     if(config[numDimensions] > 0){
1754       for(int i=0;i<numDimensions;i++){ 
1755         config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
1756       }
1757     }
1758
1759     // results are now in config[i]
1760     
1761     for(int i=0; i<numDimensions; i++){
1762       newControlPoints[names[i]] = config[i];
1763       CkPrintf("Exhaustive search chose:   %s   -> %d\n", names[i].c_str(), config[i]);
1764     }
1765
1766
1767   } else {
1768     CkAbort("Some Control Point tuning strategy must be enabled.\n");
1769   }
1770
1771
1772   const double endGenerateTime = CmiWallTimer();
1773   
1774   CkPrintf("Time to generate next control point configuration(s): %f sec\n", (endGenerateTime - startGenerateTime) );
1775   
1776 }
1777
1778
1779
1780
1781 #define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
1782
1783
1784 /// Get control point value from range of integers [lb,ub]
1785 int controlPoint(const char *name, int lb, int ub){
1786   instrumentedPhase *thisPhaseData = controlPointManagerProxy.ckLocalBranch()->currentPhaseData();
1787   const int phase_id = controlPointManagerProxy.ckLocalBranch()->phase_id;
1788   std::map<std::string, pair<int,int> > &controlPointSpace = controlPointManagerProxy.ckLocalBranch()->controlPointSpace;
1789   int result;
1790
1791   // if we already have control point values for phase, return them
1792   if( thisPhaseData->controlPoints.count(std::string(name))>0 && thisPhaseData->controlPoints[std::string(name)]>=0 ){
1793     CkPrintf("Already have control point values for phase. %s -> %d\n", name, (int)(thisPhaseData->controlPoints[std::string(name)]) );
1794     return thisPhaseData->controlPoints[std::string(name)];
1795   }
1796   
1797
1798   if( phase_id < 4 || whichTuningScheme == AlwaysDefaults){
1799     // For the first few phases, just use the lower bound, or the default if one was provided 
1800     // This ensures that the ranges for all the control points are known before we do anything fancy
1801     result = lb;
1802
1803     if(defaultControlPointValues.count(std::string(name)) > 0){
1804       int v = defaultControlPointValues[std::string(name)];
1805       CkPrintf("Startup phase using default value of %d for  \"%s\"\n", v, name);   
1806       result = v;
1807     }
1808
1809   } else if(controlPointSpace.count(std::string(name)) == 0){
1810     // if this is the first time we've seen the CP, then return the lower bound
1811     result = lb;
1812     
1813   }  else {
1814     // Generate a plan one time for each phase
1815     controlPointManagerProxy.ckLocalBranch()->generatePlan();
1816     
1817     // Use the planned value:
1818     result = controlPointManagerProxy.ckLocalBranch()->newControlPoints[std::string(name)];
1819     
1820   }
1821
1822   if(!isInRange(result,ub,lb)){
1823     std::cerr << "control point out of range: " << result << " " << lb << " " << ub << std::endl;
1824     fflush(stdout);
1825     fflush(stderr);
1826   }
1827   CkAssert(isInRange(result,ub,lb));
1828   thisPhaseData->controlPoints[std::string(name)] = result; // was insert() 
1829
1830   controlPointSpace.insert(std::make_pair(std::string(name),std::make_pair(lb,ub))); 
1831
1832   CkPrintf("Control Point \"%s\" for phase %d is: %d\n", name, phase_id, result);
1833   //  thisPhaseData->print();
1834   
1835   return result;
1836 }
1837
1838
1839 FDECL int FTN_NAME(CONTROLPOINT, controlpoint)(CMK_TYPEDEF_INT4 *lb, CMK_TYPEDEF_INT4 *ub){
1840   CkAssert(sizeof(lb) == 4);
1841   CkAssert(CkMyPe() == 0);
1842   return controlPoint("FortranCP", *lb, *ub);
1843 }
1844
1845
1846
1847
1848 /// Inform the control point framework that a named control point affects the priorities of some array  
1849 // void controlPointPriorityArray(const char *name, CProxy_ArrayBase &arraybase){
1850 //   CkGroupID aid = arraybase.ckGetArrayID();
1851 //   int groupIdx = aid.idx;
1852 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityArray(name, groupIdx);
1853 //   //  CkPrintf("Associating control point \"%s\" with array id=%d\n", name, groupIdx );
1854 // }
1855
1856
1857 // /// Inform the control point framework that a named control point affects the priorities of some entry method  
1858 // void controlPointPriorityEntry(const char *name, int idx){
1859 //   controlPointManagerProxy.ckLocalBranch()->associatePriorityEntry(name, idx);
1860 //   //  CkPrintf("Associating control point \"%s\" with EP id=%d\n", name, idx);
1861 // }
1862
1863
1864
1865
1866
1867 /** Determine the next configuration to try using the Nelder Mead Simplex Algorithm.
1868
1869     This function decomposes the algorithm into a state machine that allows it to
1870     evaluate one or more configurations through subsequent clls to this function.
1871     The state diagram is pictured in the NelderMeadStateDiagram.pdf diagram.
1872
1873     At one point in the algorithm, n+1 parameter configurations must be evaluated,
1874     so a list of them will be created and they will be evaluated, one per call.
1875
1876     Currently there is no stopping criteria, but the simplex ought to contract down
1877     to a few close configurations, and hence not much change will happen after this 
1878     point.
1879
1880  */
1881 void simplexScheme::adapt(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
1882
1883         if(useBestKnown){
1884                 CkPrintf("Simplex Tuning: Simplex algorithm is done, using best known phase:\n");
1885                 return;
1886         }
1887
1888
1889         if(firstSimplexPhase< 0){
1890                 firstSimplexPhase = allData.phases.size()-1;
1891                 CkPrintf("First simplex phase is %d\n", firstSimplexPhase);
1892         }
1893
1894         int n = controlPointSpace.size();
1895
1896         CkAssert(n>=2);
1897
1898
1899         if(simplexState == beginning){
1900                 // First we evaluate n+1 random points, then we go to a different state
1901                 if(allData.phases.size() < firstSimplexPhase + n+2      ){
1902                         CkPrintf("Simplex Tuning: chose random configuration\n");
1903
1904                         // Choose random values from the middle third of the range in each dimension
1905                         std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
1906                         for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
1907                                 const std::string &name = cpsIter->first;
1908                                 const std::pair<int,int> &bounds = cpsIter->second;
1909                                 const int lb = bounds.first;
1910                                 const int ub = bounds.second;
1911                                 newControlPoints[name] = lb + randInt(ub-lb+1, name.c_str(), phase_id);
1912                         }
1913                 } else {
1914                         // Set initial simplex:
1915                         for(int i=0; i<n+1; i++){
1916                                 simplexIndices.insert(firstSimplexPhase+i);
1917                         }
1918                         CkAssert(simplexIndices.size() == n+1);
1919
1920                         // Transition to reflecting state
1921                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1922
1923                 }
1924
1925         } else if (simplexState == reflecting){
1926                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1927                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1928
1929                 // Find the highest time from other points in the simplex
1930                 double highestTimeForOthersInSimplex = 0.0;
1931                 for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
1932                         double t = allData.phases[*iter]->medianTime();
1933                         if(*iter != worstPhase && t > highestTimeForOthersInSimplex) {
1934                                 highestTimeForOthersInSimplex = t;
1935                         }
1936                 }
1937
1938                 CkPrintf("After reflecting, the median time for the phase is %f, previous worst phase %d time was %f\n", recentPhaseTime, worstPhase, previousWorstPhaseTime);
1939
1940                 if(recentPhaseTime < highestTimeForOthersInSimplex){
1941                         // if y* < yl,  transition to "expanding"
1942                         doExpansion(controlPointSpace, newControlPoints, phase_id, allData);
1943
1944                 } else if (recentPhaseTime <= highestTimeForOthersInSimplex){
1945                         // else if y* <= yi replace ph with p* and transition to "evaluatingOne"
1946                         CkAssert(simplexIndices.size() == n+1);
1947                         simplexIndices.erase(worstPhase);
1948                         CkAssert(simplexIndices.size() == n);
1949                         simplexIndices.insert(pPhase);
1950                         CkAssert(simplexIndices.size() == n+1);
1951                         // Transition to reflecting state
1952                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1953
1954                 } else {
1955                         // if y* > yh
1956                         if(recentPhaseTime <= worstTime){
1957                                 // replace Ph with P*
1958                                 CkAssert(simplexIndices.size() == n+1);
1959                                 simplexIndices.erase(worstPhase);
1960                                 CkAssert(simplexIndices.size() == n);
1961                                 simplexIndices.insert(pPhase);
1962                                 CkAssert(simplexIndices.size() == n+1);
1963                                 // Because we later will possibly replace Ph with P**, and we just replaced it with P*, we need to update our Ph reference
1964                                 worstPhase = pPhase;
1965                                 // Just as a sanity check, make sure we don't use the non-updated values.
1966                                 worst.clear();
1967                         }
1968
1969                         // Now, form P** and do contracting phase
1970                         doContraction(controlPointSpace, newControlPoints, phase_id, allData);
1971
1972                 }
1973
1974         } else if (simplexState == doneExpanding){
1975                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
1976                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
1977                 // A new configuration has been evaluated
1978
1979                 // Check to see if y** < y1
1980                 if(recentPhaseTime < bestTime){
1981                         // replace Ph by P**
1982                         CkAssert(simplexIndices.size() == n+1);
1983                         simplexIndices.erase(worstPhase);
1984                         CkAssert(simplexIndices.size() == n);
1985                         simplexIndices.insert(p2Phase);
1986                         CkAssert(simplexIndices.size() == n+1);
1987                 } else {
1988                         //      replace Ph by P*
1989                         CkAssert(simplexIndices.size() == n+1);
1990                         simplexIndices.erase(worstPhase);
1991                         CkAssert(simplexIndices.size() == n);
1992                         simplexIndices.insert(pPhase);
1993                         CkAssert(simplexIndices.size() == n+1);
1994                 }
1995
1996                 // Transition to reflecting state
1997                 doReflection(controlPointSpace, newControlPoints, phase_id, allData);
1998
1999         }  else if (simplexState == contracting){
2000                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2001                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2002                 // A new configuration has been evaluated
2003
2004                 // Check to see if y** > yh
2005                 if(recentPhaseTime <= worstTime){
2006                         // replace Ph by P**
2007                         CkPrintf("Replacing phase %d with %d\n", worstPhase, p2Phase);
2008                         CkAssert(simplexIndices.size() == n+1);
2009                         simplexIndices.erase(worstPhase);
2010                         CkAssert(simplexIndices.size() == n);
2011                         simplexIndices.insert(p2Phase);
2012                         CkAssert(simplexIndices.size() == n+1);
2013                         // Transition to reflecting state
2014                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2015
2016                 } else {
2017                         //      conceptually we will replace all Pi by (Pi+Pl)/2, but there is nothing to store this until after we have tried all of them
2018                         simplexState = stillContracting;
2019
2020                         // A set of phases for which (P_i+P_l)/2 ought to be evaluated
2021                         stillMustContractList = simplexIndices;
2022
2023                         CkPrintf("Simplex Tuning: Switched to state: stillContracting\n");
2024                 }
2025
2026         } else if (simplexState == stillContracting){
2027                 CkPrintf("Simplex Tuning: stillContracting found %d configurations left to try\n", stillMustContractList.size());
2028
2029                 if(stillMustContractList.size()>0){
2030                         int c = *stillMustContractList.begin();
2031                         stillMustContractList.erase(c);
2032                         CkPrintf("Simplex Tuning: stillContracting evaluating configuration derived from phase %d\n", c);
2033
2034                         std::vector<double> cPhaseConfig = pointCoords(allData, c);
2035
2036                         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2037                         int v = 0;
2038                         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2039                                 const std::string &name = cpsIter->first;
2040                                 const std::pair<int,int> &bounds = cpsIter->second;
2041                                 const int lb = bounds.first;
2042                                 const int ub = bounds.second;
2043
2044                                 double val = (cPhaseConfig[v] + best[v])/2.0;
2045
2046                                 newControlPoints[name] = keepInRange(val,lb,ub);
2047                                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2048                                 v++;
2049                         }
2050                 } else {
2051                         // We have tried all configurations. We should update the simplex to refer to all the newly tried configurations, and start over
2052                         CkAssert(stillMustContractList.size() == 0);
2053                         simplexIndices.clear();
2054                         CkAssert(simplexIndices.size()==0);
2055                         for(int i=0; i<n+1; i++){
2056                                 simplexIndices.insert(allData.phases.size()-2-i);
2057                         }
2058                         CkAssert(simplexIndices.size()==n+1);
2059
2060                         // Transition to reflecting state
2061                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2062
2063                 }
2064
2065
2066         } else if (simplexState == expanding){
2067                 const double recentPhaseTime = allData.phases[allData.phases.size()-2]->medianTime();
2068                 const double previousWorstPhaseTime = allData.phases[worstPhase]->medianTime();
2069                 // A new configuration has been evaluated
2070
2071                 // determine if y** < yl
2072                 if(recentPhaseTime < bestTime){
2073                         // replace Ph by P**
2074                         CkAssert(simplexIndices.size() == n+1);
2075                         simplexIndices.erase(worstPhase);
2076                         CkAssert(simplexIndices.size() == n);
2077                         simplexIndices.insert(p2Phase);
2078                         CkAssert(simplexIndices.size() == n+1);
2079                         // Transition to reflecting state
2080                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2081
2082                 } else {
2083                         // else, replace ph with p*
2084                         CkAssert(simplexIndices.size() == n+1);
2085                         simplexIndices.erase(worstPhase);
2086                         CkAssert(simplexIndices.size() == n);
2087                         simplexIndices.insert(pPhase);
2088                         CkAssert(simplexIndices.size() == n+1);
2089                         // Transition to reflecting state
2090                         doReflection(controlPointSpace, newControlPoints, phase_id, allData);
2091                 }
2092
2093
2094         } else {
2095                 CkAbort("Unknown simplexState");
2096         }
2097
2098 }
2099
2100
2101
2102 /** Replace the worst point with its reflection across the centroid. */
2103 void simplexScheme::doReflection(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2104
2105         int n = controlPointSpace.size();
2106
2107         printSimplex(allData);
2108
2109         computeCentroidBestWorst(controlPointSpace, newControlPoints, phase_id, allData);
2110
2111
2112         // Quit if the diameter of our simplex is small
2113         double maxr = 0.0;
2114         for(int i=0; i<n+1; i++){
2115                 //              Compute r^2 of this simplex point from the centroid
2116                 double r2 = 0.0;
2117                 std::vector<double> p = pointCoords(allData, i);
2118                 for(int d=0; d<p.size(); d++){
2119                         double r1 = (p[d] * centroid[d]);
2120                         r2 += r1*r1;
2121                 }
2122                 if(r2 > maxr)
2123                         maxr = r2;
2124         }
2125
2126 #if 0
2127         // At some point we quit this tuning
2128         if(maxr < 10){
2129                 useBestKnown = true;
2130                 instrumentedPhase *best = allData.findBest();
2131                 CkPrintf("Simplex Tuning: Simplex diameter is small, so switching over to best known phase:\n");
2132
2133                 std::map<std::string, std::pair<int,int> >::const_iterator cpsIter;
2134                 for(cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter) {
2135                         const std::string &name = cpsIter->first;
2136                         newControlPoints[name] =  best->controlPoints[name];
2137                 }
2138         }
2139 #endif
2140
2141         // Compute new point P* =(1+alpha)*centroid - alpha(worstPoint)
2142
2143         pPhase = allData.phases.size()-1;
2144         P.resize(n);
2145         for(int i=0; i<n; i++){
2146                 P[i] = (1.0+alpha) * centroid[i] - alpha * worst[i] ;
2147         }
2148
2149         for(int i=0; i<P.size(); i++){
2150                 CkPrintf("Simplex Tuning: P dimension %d is %f\n", i, P[i]);
2151         }
2152
2153
2154         // Evaluate point P by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2155         int v = 0;
2156         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2157                 const std::string &name = cpsIter->first;
2158                 const std::pair<int,int> &bounds = cpsIter->second;
2159                 const int lb = bounds.first;
2160                 const int ub = bounds.second;
2161                 newControlPoints[name] = keepInRange(P[v],lb,ub);
2162                 CkPrintf("Simplex Tuning: v=%d Reflected worst %d %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2163                 v++;
2164         }
2165
2166
2167         // Transition to "reflecting" state
2168         simplexState = reflecting;
2169         CkPrintf("Simplex Tuning: Switched to state: reflecting\n");
2170
2171 }
2172
2173
2174
2175
2176 /** Replace the newly tested reflection with a further expanded version of itself. */
2177 void simplexScheme::doExpansion(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2178         int n = controlPointSpace.size();
2179         printSimplex(allData);
2180
2181         // Note that the original Nelder Mead paper has an error when it displays the equation for P** in figure 1.
2182         // I believe the equation for P** in the text on page 308 is correct.
2183
2184         // Compute new point P2 = (1+gamma)*P - gamma(centroid)
2185
2186
2187         p2Phase = allData.phases.size()-1;
2188         P2.resize(n);
2189         for(int i=0; i<n; i++){
2190                 P2[i] = ( (1.0+gamma) * P[i] - gamma * centroid[i] );
2191         }
2192
2193         for(int i=0; i<P2.size(); i++){
2194                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2195         }
2196
2197
2198         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2199         int v = 0;
2200         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2201                 const std::string &name = cpsIter->first;
2202                 const std::pair<int,int> &bounds = cpsIter->second;
2203                 const int lb = bounds.first;
2204                 const int ub = bounds.second;
2205                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2206                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Expanding %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2207                 v++;
2208         }
2209
2210
2211         // Transition to "doneExpanding" state
2212         simplexState = doneExpanding;
2213         CkPrintf("Simplex Tuning: Switched to state: doneExpanding\n");
2214
2215 }
2216
2217
2218
2219
2220 /** Replace the newly tested reflection with a further expanded version of itself. */
2221 void simplexScheme::doContraction(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2222         int n = controlPointSpace.size();
2223         printSimplex(allData);
2224
2225         // Compute new point P2 = beta*Ph + (1-beta)*centroid
2226
2227
2228         p2Phase = allData.phases.size()-1;
2229         P2.resize(n);
2230         for(int i=0; i<n; i++){
2231                 P2[i] = ( beta*worst[i] + (1.0-beta)*centroid[i] );
2232         }
2233
2234         for(int i=0; i<P2.size(); i++){
2235                 CkPrintf("P2 aka P** dimension %d is %f\n", i, P2[i]);
2236         }
2237
2238
2239         // Evaluate point P** by storing new configuration in newControlPoints, and by transitioning to "reflecting" state
2240         int v = 0;
2241         for(std::map<std::string, std::pair<int,int> >::iterator cpsIter=controlPointSpace.begin(); cpsIter != controlPointSpace.end(); ++cpsIter){
2242                 const std::string &name = cpsIter->first;
2243                 const std::pair<int,int> &bounds = cpsIter->second;
2244                 const int lb = bounds.first;
2245                 const int ub = bounds.second;
2246                 newControlPoints[name] = keepInRange(P2[v],lb,ub);
2247                 CkPrintf("Simplex Tuning: v=%d worstPhase=%d Contracting %s -> %f (ought to be %f )\n", (int)v, (int)worstPhase, (char*)name.c_str(), (double)newControlPoints[name], (double)P[v]);
2248                 v++;
2249         }
2250
2251
2252         // Transition to "contracting" state
2253         simplexState = contracting;
2254         CkPrintf("Simplex Tuning: Switched to state: contracting\n");
2255
2256 }
2257
2258
2259
2260
2261 void simplexScheme::computeCentroidBestWorst(std::map<std::string, std::pair<int,int> > & controlPointSpace, std::map<std::string,int> &newControlPoints, const int phase_id, instrumentedData &allData){
2262         int n = controlPointSpace.size();
2263
2264         // Find worst performing point in the simplex
2265         worstPhase = -1;
2266         worstTime = -1.0;
2267         bestPhase = 10000000;
2268         bestTime = 10000000;
2269         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2270                 double t = allData.phases[*iter]->medianTime();
2271                 if(t > worstTime){
2272                         worstTime = t;
2273                         worstPhase = *iter;
2274                 }
2275                 if(t < bestTime){
2276                         bestTime = t;
2277                         bestPhase = *iter;
2278                 }
2279         }
2280         CkAssert(worstTime != -1.0 && worstPhase != -1 && bestTime != 10000000 && bestPhase != 10000000);
2281
2282         best = pointCoords(allData, bestPhase);
2283         CkAssert(best.size() == n);
2284
2285         worst = pointCoords(allData, worstPhase);
2286         CkAssert(worst.size() == n);
2287
2288         // Calculate centroid of the remaining points in the simplex
2289         centroid.resize(n);
2290         for(int i=0; i<n; i++){
2291                 centroid[i] = 0.0;
2292         }
2293
2294         int numPts = 0;
2295
2296         for(std::set<int>::iterator iter = simplexIndices.begin(); iter != simplexIndices.end(); ++iter){
2297                 if(*iter != worstPhase){
2298                         numPts ++;
2299                         // Accumulate into the result vector
2300                         int c = 0;
2301                         for(std::map<std::string,int>::iterator citer = allData.phases[*iter]->controlPoints.begin(); citer != allData.phases[*iter]->controlPoints.end(); ++citer){
2302                                 centroid[c] += citer->second;
2303                                 c++;
2304                         }
2305
2306                 }
2307         }
2308
2309         // Now divide the sums by the number of points.
2310         for(int v = 0; v<centroid.size(); v++) {
2311                 centroid[v] /= (double)numPts;
2312         }
2313
2314         CkAssert(centroid.size() == n);
2315
2316         for(int i=0; i<centroid.size(); i++){
2317                 CkPrintf("Centroid dimension %d is %f\n", i, centroid[i]);
2318         }
2319
2320
2321 }
2322
2323
2324
2325 std::vector<double> simplexScheme::pointCoords(instrumentedData &allData, int i){
2326         std::vector<double> result;
2327         for(std::map<std::string,int>::iterator citer = allData.phases[i]->controlPoints.begin(); citer != allData.phases[i]->controlPoints.end(); ++citer){
2328                 result.push_back((double)citer->second);
2329         }
2330         return result;
2331 }
2332
2333
2334
2335
2336 void ControlPointWriteOutputToDisk(){
2337         CkAssert(CkMyPe() == 0);
2338         controlPointManagerProxy.ckLocalBranch()->writeOutputToDisk();
2339 }
2340
2341
2342
2343 /*! @} */
2344
2345 #ifdef CP_DISABLE_TRACING
2346 #include "ControlPointsNoTrace.def.h"
2347 #else
2348 #include "ControlPoints.def.h"
2349 #endif