Adding the initial control point framework
authorIsaac Dooley <idooley2@illinois.edu>
Wed, 12 Nov 2008 19:25:17 +0000 (19:25 +0000)
committerIsaac Dooley <idooley2@illinois.edu>
Wed, 12 Nov 2008 19:25:17 +0000 (19:25 +0000)
src/ck-cp/controlPoints.C [new file with mode: 0644]
src/ck-cp/controlPoints.ci [new file with mode: 0644]
src/ck-cp/controlPoints.h [new file with mode: 0644]

diff --git a/src/ck-cp/controlPoints.C b/src/ck-cp/controlPoints.C
new file mode 100644 (file)
index 0000000..7a3a50b
--- /dev/null
@@ -0,0 +1,1089 @@
+#include <charm++.h>
+#include <cmath>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <sstream>
+#include <map>
+#include <set>
+#include <vector>
+#include <utility>
+#include <sys/time.h>
+
+#include "ControlPoints.decl.h"
+
+#include "LBDatabase.h"
+#include "controlPoints.h"
+
+using namespace std;
+
+#define CONTROL_POINT_PERIOD 8000
+#define OPTIMIZER_TRANSITION 5
+
+static void periodicProcessControlPoints(void* ptr, double currWallTime);
+
+
+// A pointer to this PE's controlpoint manager Proxy
+/* readonly */ CProxy_controlPointManager localControlPointManagerProxy;
+/* readonly */ int random_seed;
+
+
+class instrumentedPhase {
+public:
+  std::map<string,int> controlPoints; // The control point values for this phase(don't vary within the phase)
+  std::multiset<double> times;  // A list of times observed for iterations in this phase
+  
+  int memoryUsageMB;
+  
+  instrumentedPhase(){
+    memoryUsageMB = -1;
+  }
+  
+  void clear(){
+    controlPoints.clear();
+    times.clear();
+  }
+
+  // Provide a previously computed value, or a value from a previous run
+  bool haveValueForName(const char* name){
+    string n(name);
+    return (controlPoints.count(n)>0);
+  }
+
+  void operator=(const instrumentedPhase& p){
+    controlPoints = p.controlPoints;
+    times = p.times;
+    memoryUsageMB = p.memoryUsageMB;
+  }
+
+
+
+  bool operator<(const instrumentedPhase& p){
+    CkAssert(hasSameKeysAs(p)); 
+    std::map<string,int>::iterator iter1 = controlPoints.begin();
+    std::map<string,int>::const_iterator iter2 = p.controlPoints.begin();
+    for(;iter1 != controlPoints.end() && iter2 != p.controlPoints.end(); iter1++, iter2++){
+      if(iter1->second < iter2->second){
+       return true;
+      }
+    }
+    return false;
+  }
+
+
+  // Determines if the control point values and other information exists
+  bool hasValidControlPointValues(){
+    std::map<string,int>::iterator iter;
+    for(iter = controlPoints.begin();iter != controlPoints.end(); iter++){
+      if(iter->second == -1){ 
+        return false; 
+      }  
+    }
+
+    return true;
+  }
+
+
+
+  bool operator==(const instrumentedPhase& p){
+    CkAssert(hasSameKeysAs(p));
+    std::map<string,int>::iterator iter1 = controlPoints.begin();
+    std::map<string,int>::const_iterator iter2 = p.controlPoints.begin();
+    for(;iter1 != controlPoints.end() && iter2 != p.controlPoints.end(); iter1++, iter2++){ 
+      if(iter1->second != iter2->second){ 
+        return false; 
+      }  
+    }
+    return true;
+  }
+
+  /// Verify the names of the control points are consistent
+  /// note: std::map stores the pairs in a sorted order based on their first component 
+  bool hasSameKeysAs(const instrumentedPhase& p){
+    
+    if(controlPoints.size() != p.controlPoints.size())
+      return false;
+
+    std::map<string,int>::iterator iter1 = controlPoints.begin(); 
+    std::map<string,int>::const_iterator iter2 = p.controlPoints.begin(); 
+
+    for(;iter1 != controlPoints.end() && iter2 != p.controlPoints.end(); iter1++, iter2++){  
+      if(iter1->first != iter2->first)
+       return false;
+    } 
+
+    return true; 
+  }
+
+
+  void addAllNames(std::set<string> names_) {
+    
+    std::set<string> names = names_;
+    
+    // Remove all the names that we already have
+    std::map<std::string,int>::iterator iter;
+    
+    for(iter = controlPoints.begin(); iter != controlPoints.end(); iter++){
+      names.erase(iter->first);
+    }
+    
+    // Add -1 values for each name we didn't find
+    std::set<std::string>::iterator iter2;
+    for(iter2 = names.begin(); iter2 != names.end(); iter2++){
+      controlPoints.insert(make_pair(*iter2,-1));
+      CkPrintf("One of the datasets was missing a value for %s, so -1 was used\n", iter2->c_str());
+    }
+
+  }
+
+
+
+  void print() {
+    std::map<std::string,int>::iterator iter;
+
+    if(controlPoints.size() == 0){
+      CkPrintf("no control point values found\n");
+    }
+    
+    for(iter = controlPoints.begin(); iter != controlPoints.end(); iter++){
+      std::string name = iter->first;
+      int val = iter->second;
+      CkPrintf("%s ---> %d\n",  name.c_str(),  val);
+    } 
+    
+  }
+  
+  
+};
+
+
+
+class instrumentedData {
+public:
+  std::vector<instrumentedPhase> phases;
+
+  /// get control point names for all phases
+  std::set<string> getNames(){
+    std::set<string> names;
+    
+    std::vector<instrumentedPhase>::iterator iter;
+    for(iter = phases.begin();iter!=phases.end();iter++) {
+      
+      std::map<string,int>::iterator iter2;
+      for(iter2 = iter->controlPoints.begin(); iter2 != iter->controlPoints.end(); iter2++){
+       names.insert(iter2->first);
+      }
+      
+    }  
+    return names;
+
+  } 
+
+
+  void cleanupNames(){
+    std::set<string> names = getNames();
+    
+    std::vector<instrumentedPhase>::iterator iter;
+    for(iter = phases.begin();iter!=phases.end();iter++) {
+      iter->addAllNames(names);
+    }
+  }
+
+
+  /// Remove one phase with invalid control point values if found
+  bool filterOutOnePhase(){
+    // Note: calling erase on a vector will invalidate any iterators beyond the deletion point
+    std::vector<instrumentedPhase>::iterator iter;
+    for(iter = phases.begin(); iter != phases.end(); iter++) {
+      if(! iter->hasValidControlPointValues()  || iter->times.size()==0){
+       // CkPrintf("Filtered out a phase with incomplete control point values\n");
+       phases.erase(iter);
+       return true;
+      } else {
+       //      CkPrintf("Not filtering out some phase with good control point values\n");
+      }
+    }
+    return false;
+  }
+  
+  /// Drop any phases that do not contain timings or control point values
+  void filterOutIncompletePhases(){
+    bool done = false;
+    while(filterOutOnePhase()){
+      // do nothing
+    }
+  }
+
+
+  string toString(){
+    ostringstream s;
+
+    verify();
+
+    filterOutIncompletePhases();
+
+    // HEADER:
+    s << "# HEADER:\n";
+    s << "# Data for use with Isaac Dooley's Control Point Framework\n";
+    s << string("# Number of instrumented timings in this file:\n"); 
+    s << phases.size() << "\n" ;
+    
+    if(phases.size() > 0){
+      
+      std::map<string,int> &ps = phases[0].controlPoints; 
+      std::map<string,int>::iterator cpiter;
+
+      // SCHEMA:
+      s << "# SCHEMA:\n";
+      s << "# number of named control points:\n";
+      s << ps.size() << "\n";
+      
+      for(cpiter = ps.begin(); cpiter != ps.end(); cpiter++){
+       s << cpiter->first << "\n";
+      }
+      
+      // DATA:
+      s << "# DATA:\n";
+      s << "# first field is memory usage (MB). Then there are the " << ps.size()  << " control points values, followed by one or more timings" << "\n";
+      s << "# number of control point sets: " << phases.size() << "\n";
+      std::vector<instrumentedPhase>::iterator runiter;
+      for(runiter=phases.begin();runiter!=phases.end();runiter++){
+
+       // Print the memory usage
+        s << runiter->memoryUsageMB << "    "; 
+
+       // Print the control point values
+       for(cpiter = runiter->controlPoints.begin(); cpiter != runiter->controlPoints.end(); cpiter++){ 
+         s << cpiter->second << " "; 
+       }
+
+       s << "     ";
+
+       // Print the times
+       std::multiset<double>::iterator titer;
+       for(titer = runiter->times.begin(); titer != runiter->times.end(); titer++){
+         s << *titer << " ";
+       }
+
+       s << "\n";
+       
+      }
+    }
+
+    return s.str();
+    
+  }
+
+
+  /// Verify that all our phases of data have the same sets of control point names
+  void verify(){
+    if(phases.size() > 1){
+      instrumentedPhase & firstpoint = phases[0];
+      std::vector<instrumentedPhase>::iterator iter;
+      for(iter = phases.begin();iter!=phases.end();iter++){
+       CkAssert( firstpoint.hasSameKeysAs(*iter));
+      }  
+    } 
+  }
+
+
+  // Find the fastest time from previous runs
+  instrumentedPhase findBest(){
+    CkAssert(phases.size()>0);
+
+    double total_time = 0.0; // total for all times
+    int total_count = 0;
+
+    instrumentedPhase best_phase;
+    double best_phase_avgtime = std::numeric_limits<double>::max();
+
+    int valid_phase_count = 0;
+
+    std::vector<instrumentedPhase>::iterator iter;
+    for(iter = phases.begin();iter!=phases.end();iter++){
+      if(iter->hasValidControlPointValues()){
+       valid_phase_count++;
+
+       double total_for_phase = 0.0;
+       int phase_count = 0;
+
+       // iterate over all times for this control point configuration
+       std::multiset<double>::iterator titer;
+       for(titer = iter->times.begin(); titer != iter->times.end(); titer++){
+         total_count++;
+         total_time += *titer;
+         total_for_phase += *titer;
+         phase_count ++;
+       }
+
+       double phase_average_time = total_for_phase / (double)phase_count;
+
+       if(phase_average_time  < best_phase_avgtime){
+         best_phase = *iter;
+         best_phase_avgtime = phase_average_time; 
+       }
+
+      }
+    }
+    
+    CkAssert(total_count > 0);
+
+    double avg = total_time / total_count;
+
+    if(CkMyPe() == 0){
+      CkPrintf("Best average time for a phase was %.1lf\n", best_phase_avgtime);
+      CkPrintf("Mean time for all %d times in the %d valid recorded phases was %.1lf\n", total_count, valid_phase_count, avg );
+    }
+
+    // Compute standard deviation
+    double sumx=0.0;
+    for(iter = phases.begin();iter!=phases.end();iter++){
+      if(iter->hasValidControlPointValues()){
+       std::multiset<double>::iterator titer;
+       for(titer = iter->times.begin(); titer != iter->times.end(); titer++){
+         sumx += (avg - *titer)*(avg - *titer);
+       } 
+      }
+    }
+    
+    double std_dev = sqrt(sumx / total_count);
+
+    if(CkMyPe() == 0){
+      CkPrintf("Standard Deviation for previous runs was %.2lf   or %.1lf%% of mean\n", std_dev, std_dev/avg*100.0);
+      CkPrintf("The best phase average time was %.1lf%% faster than the mean\n", (avg-best_phase_avgtime)/avg*100.0);
+
+    }
+
+    return best_phase;
+  }
+  
+};
+
+
+
+/// Return an integer between 0 and num-1 inclusive
+/// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
+unsigned int randInt(unsigned int num, const char* name, int seed=0){
+  CkAssert(num > 0);
+  CkAssert(num < 1000);
+
+  unsigned long hash = 0;
+  unsigned int c;
+  unsigned char * str = (unsigned char*)name;
+
+  while (c = *str++){
+    unsigned int c2 = (c+64)%128;
+    unsigned int c3 = (c2*5953)%127;
+    hash = c3 + (hash << 6) + (hash << 16) - hash;
+  }
+
+  unsigned long shuffled1 = (hash*2083)%7907;
+  unsigned long shuffled2 = (seed*4297)%2017;
+
+  unsigned long shuffled3 = (random_seed*4799)%7919;
+
+  unsigned int namehash = shuffled3 ^ shuffled1 ^ shuffled2;
+
+  unsigned int result = ((namehash * 6029) % 1117) % num;
+
+  CkAssert(result >=0 && result < num);
+  return result;
+}
+
+
+
+
+//=============================================================================
+// THE MODULE CODE IS HERE: 
+//=============================================================================
+
+
+// A group with representatives on all nodes
+class controlPointManager : public CBase_controlPointManager {
+public:
+  
+  char * dataFilename;
+  
+  instrumentedData allData;
+  instrumentedPhase thisPhaseData;
+  instrumentedPhase best_phase;
+  
+  std::map<string, pair<int,int> > controlPointSpace;
+
+  std::set<string> staticControlPoints;
+
+
+  CkCallback granularityCallback;
+  bool haveGranularityCallback;
+  bool frameworkShouldAdvancePhase;
+  
+  int phase_id;
+
+  bool alreadyRequestedMemoryUsage;
+
+  controlPointManager(){
+    alreadyRequestedMemoryUsage = false;
+    
+    dataFilename = (char*)malloc(128);
+    sprintf(dataFilename, "controlPointData.txt");
+    
+    frameworkShouldAdvancePhase = false;
+    haveGranularityCallback = false;
+    CkPrintf("[%d] controlPointManager() Constructor Initializing control points, and loading data file\n", CkMyPe());
+    
+    phase_id = 0;
+    
+    localControlPointManagerProxy = thisProxy;
+    
+    loadDataFile();
+    
+    if(allData.phases.size()>0){
+      allData.findBest();
+    }
+    
+    if(CkMyPe() == 0)
+      CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, CONTROL_POINT_PERIOD, CkMyPe());
+
+  }
+  
+  ~controlPointManager(){
+    CkPrintf("[%d] controlPointManager() Destructor\n", CkMyPe());
+  }
+
+
+  /// Loads the previous run data file
+  void loadDataFile(){
+    ifstream infile(dataFilename);
+    vector<string> names;
+    string line;
+  
+    while(getline(infile,line)){
+      if(line[0] != '#')
+       break;
+    }
+  
+    int numTimings = 0;
+    istringstream n(line);
+    n >> numTimings;
+  
+    while(getline(infile,line)){ 
+      if(line[0] != '#') 
+       break; 
+    }
+
+    int numControlPointNames = 0;
+    istringstream n2(line);
+    n2 >> numControlPointNames;
+  
+    for(int i=0; i<numControlPointNames; i++){
+      getline(infile,line);
+      names.push_back(line);
+    }
+
+    for(int i=0;i<numTimings;i++){
+      getline(infile,line);
+      while(line[0] == '#')
+       getline(infile,line); 
+
+      instrumentedPhase ips;
+
+      istringstream iss(line);
+
+      // Read memory usage for phase
+      iss >> ips.memoryUsageMB;
+      //     CkPrintf("Memory usage loaded from file: %d\n", ips.memoryUsageMB);
+
+
+      // Read control point values
+      for(int cp=0;cp<numControlPointNames;cp++){
+       int cpvalue;
+       iss >> cpvalue;
+       ips.controlPoints.insert(make_pair(names[cp],cpvalue));
+      }
+
+      double time;
+
+      while(iss >> time){
+       ips.times.insert(time);
+#if DEBUG > 5
+       CkPrintf("read time %lf from file\n", time);
+#endif
+      }
+
+      allData.phases.push_back(ips);
+
+    }
+
+    infile.close();
+  }
+
+
+
+  /// Add the current data to allData and output it to a file
+  void writeDataFile(){
+    CkPrintf("============= writeDataFile() ============\n");
+    ofstream outfile(dataFilename);
+    allData.phases.push_back(thisPhaseData);
+    allData.cleanupNames();
+    outfile << allData.toString();
+    outfile.close();
+  }
+
+
+  void setGranularityCallback(CkCallback cb, bool _frameworkShouldAdvancePhase){
+    frameworkShouldAdvancePhase = _frameworkShouldAdvancePhase;
+    granularityCallback = cb;
+    haveGranularityCallback = true;
+  }
+
+  /// Called periodically by the runtime to handle the control points
+  /// Currently called on each PE
+  void processControlPoints(){
+#if DEBUG
+    CkPrintf("[%d] processControlPoints()\n", CkMyPe());
+#endif
+    if(haveGranularityCallback){
+      if(frameworkShouldAdvancePhase){
+       gotoNextPhase();        
+      }
+      
+      controlPointMsg *msg = new(0) controlPointMsg;
+      granularityCallback.send(msg); 
+    }
+
+
+    if(CkMyPe() == 0 && !alreadyRequestedMemoryUsage){
+      alreadyRequestedMemoryUsage = true;
+      CkCallback *cb = new CkCallback(CkIndex_controlPointManager::gatherMemoryUsage(NULL), 0, thisProxy);
+      //      thisProxy.requestMemoryUsage(*cb);
+      delete cb;
+    }
+
+  }
+  
+  instrumentedPhase *previousPhaseData(){
+    int s = allData.phases.size();
+    if(s >= 1 && phase_id > 0) {
+      return &(allData.phases[s-1]);
+    } else {
+      return NULL;
+    }
+  }
+  
+  
+  void gotoNextPhase(){
+
+    LBDatabase * myLBdatabase = LBDatabaseObj();
+    LBDB * myLBDB = myLBdatabase->getLBDB();       // LBDB is Defined in LBDBManager.h
+    const CkVec<LBObj*> objs = myLBDB->getObjs();
+    const int objCount = myLBDB->getObjCount();
+
+    CkPrintf("LBDB info: objCount=%d objs contains %d LBObj* \n", objCount, objs.length());
+
+    floatType maxObjWallTime = -1.0;
+    
+    for(int i=0;i<objs.length();i++){
+      LBObj* o = objs[i];
+      const LDObjData d = o->ObjData();
+      floatType cpuTime = d.cpuTime;
+      floatType wallTime = d.wallTime;
+      // can also get object handles from the LDObjData struct
+      CkPrintf("[%d] LBDB Object[%d]: cpuTime=%f wallTime=%f\n", CkMyPe(), i, cpuTime, wallTime);
+      if(wallTime > maxObjWallTime){
+
+      }
+      
+    }
+
+    myLBDB->ClearLoads(); // BUG: Probably very dangerous if we are actually using load balancing
+    
+    
+    
+    
+    // increment phase id
+    phase_id++;
+    
+    CkPrintf("Now in phase %d\n", phase_id);
+    
+    // save the timing information from this phase
+    allData.phases.push_back(thisPhaseData);
+        
+    // clear the timing information that will be used for the next phase
+    thisPhaseData.clear();
+    
+  }
+
+  // The application can set the instrumented time for this phase
+  void setTiming(double time){
+     thisPhaseData.times.insert(time);
+  }
+  
+
+
+  void requestMemoryUsage(CkCallback cb){
+    int m = CmiMaxMemoryUsage() / 1024 / 1024;
+    CmiResetMaxMemory();
+    CkPrintf("PE %d Memory Usage is %d MB\n",CkMyPe(), m);
+    contribute(sizeof(int),&m,CkReduction::max_int, cb);
+  }
+
+  void gatherMemoryUsage(CkReductionMsg *msg){
+    int size=msg->getSize() / sizeof(int);
+    CkAssert(size==1);
+    int *m=(int *) msg->getData();
+
+    CkPrintf("[%d] Max Memory Usage for all processors is %d MB\n", CkMyPe(), m[0]);
+    
+    instrumentedPhase* prevPhase = previousPhaseData();
+    if(prevPhase != NULL){
+      prevPhase->memoryUsageMB = m[0];
+    } else {
+      CkPrintf("No place to store memory usage");
+    }
+
+    alreadyRequestedMemoryUsage = false;
+    delete msg;
+  }
+
+
+
+};
+
+
+
+void gotoNextPhase(){
+  localControlPointManagerProxy.ckLocalBranch()->gotoNextPhase();
+}
+
+
+// A mainchare that is used just to create our controlPointManager group at startup
+class controlPointMain : public CBase_controlPointMain {
+public:
+  controlPointMain(CkArgMsg* args){
+    struct timeval tp;
+    gettimeofday(& tp, NULL);
+    random_seed = (int)tp.tv_usec ^ (int)tp.tv_sec;
+
+    localControlPointManagerProxy = CProxy_controlPointManager::ckNew();
+  }
+  ~controlPointMain(){}
+};
+
+void registerGranularityChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase){
+  CkAssert(CkMyPe() == 0);
+  CkPrintf("registerGranularityChangeCallback\n");
+  localControlPointManagerProxy.ckLocalBranch()->setGranularityCallback(cb, frameworkShouldAdvancePhase);
+}
+
+
+void registerControlPointTiming(double time){
+  CkAssert(CkMyPe() == 0);
+#if DEBUG>0
+  CkPrintf("Program registering its own timing with registerControlPointTiming(time=%lf)\n", time);
+#endif
+  localControlPointManagerProxy.ckLocalBranch()->setTiming(time);
+}
+
+extern "C" void controlPointShutdown(){
+  CkAssert(CkMyPe() == 0);
+  CkPrintf("[%d] controlPointShutdown() at CkExit()\n", CkMyPe());
+
+  localControlPointManagerProxy.ckLocalBranch()->writeDataFile();
+  CkExit();
+}
+
+
+void controlPointInitNode(){
+  CkPrintf("controlPointInitNode()\n");
+  registerExitFn(controlPointShutdown);
+}
+
+static void periodicProcessControlPoints(void* ptr, double currWallTime){
+#ifdef DEBUG
+  CkPrintf("[%d] periodicProcessControlPoints()\n", CkMyPe());
+#endif
+  localControlPointManagerProxy.ckLocalBranch()->processControlPoints();
+  CcdCallFnAfterOnPE((CcdVoidFn)periodicProcessControlPoints, (void*)NULL, CONTROL_POINT_PERIOD, CkMyPe());
+}
+
+
+
+
+
+// Static point for life of program: randomly chosen, no optimizer
+int staticPoint(const char *name, int lb, int ub){
+  instrumentedPhase &thisPhaseData = localControlPointManagerProxy.ckLocalBranch()->thisPhaseData;
+  std::set<string> &staticControlPoints = localControlPointManagerProxy.ckLocalBranch()->staticControlPoints;  
+
+  int result = lb + randInt(ub-lb+1, name);
+  
+  localControlPointManagerProxy.ckLocalBranch()->controlPointSpace.insert(std::make_pair(string(name),std::make_pair(lb,ub))); 
+  thisPhaseData.controlPoints.insert(std::make_pair(string(name),result)); 
+  staticControlPoints.insert(string(name));
+
+  return result;
+}
+
+
+
+bool valueShouldBeProvidedByOptimizer(){
+  
+  const int effective_phase = localControlPointManagerProxy.ckLocalBranch()->allData.phases.size();
+  const int phase_id = localControlPointManagerProxy.ckLocalBranch()->phase_id; 
+  
+  std::map<string, pair<int,int> > &controlPointSpace = localControlPointManagerProxy.ckLocalBranch()->controlPointSpace; 
+  
+  double spaceSize = 1.0;
+  std::map<string, pair<int,int> >::iterator iter;
+  for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
+    spaceSize *= iter->second.second - iter->second.first + 1;
+  }
+
+  //  CkPrintf("Control Point Space:\n\t\tnumber of control points = %d\n\t\tnumber of possible configurations = %.0lf\n", controlPointSpace.size(), spaceSize);
+
+#if 1
+  return effective_phase > 1 && phase_id > 1;
+#else
+  return effective_phase >= OPTIMIZER_TRANSITION && phase_id > 3;
+#endif
+}
+
+
+
+
+
+
+
+
+
+int valueProvidedByOptimizer(const char * name){
+  const int phase_id = localControlPointManagerProxy.ckLocalBranch()->phase_id;
+  const int effective_phase = localControlPointManagerProxy.ckLocalBranch()->allData.phases.size();
+
+  //#define OPTIMIZER_USE_BEST_TIME  
+#if OPTIMIZER_USE_BEST_TIME  
+  static bool firstTime = true;
+  if(firstTime){
+    firstTime = false;
+    CkPrintf("Finding best phase\n");
+    instrumentedPhase p = localControlPointManagerProxy.ckLocalBranch()->allData.findBest(); 
+    CkPrintf("p=:\n");
+    p.print();
+    CkPrintf("\n");
+    localControlPointManagerProxy.ckLocalBranch()->best_phase = p;
+  }
+  
+  
+  instrumentedPhase &p = localControlPointManagerProxy.ckLocalBranch()->best_phase;
+  int result = p.controlPoints[std::string(name)];
+  CkPrintf("valueProvidedByOptimizer(): Control Point \"%s\" for phase %d chosen out of best previous phase to be: %d\n", name, phase_id, result);
+  return result;
+#elsif SIMULATED_ANNEALING
+  
+  // Simulated Annealing style hill climbing method
+  //
+  // Find the best search space configuration, and try something
+  // nearby it, with a radius decreasing as phases increase
+  
+  std::map<string, pair<int,int> > &controlPointSpace = localControlPointManagerProxy.ckLocalBranch()->controlPointSpace;  
+  
+  CkPrintf("Finding best phase\n"); 
+  instrumentedPhase p = localControlPointManagerProxy.ckLocalBranch()->allData.findBest();  
+  CkPrintf("best found:\n"); 
+  p.print(); 
+  CkPrintf("\n"); 
+  
+  int before = p.controlPoints[std::string(name)];   
+  
+  int minValue =  controlPointSpace[std::string(name)].first;
+  int maxValue =  controlPointSpace[std::string(name)].second;
+  
+  int convergeByPhase = 100;
+  
+  // Determine from 0.0 to 1.0 how far along we are
+  double progress = (double) min(effective_phase, convergeByPhase) / (double)convergeByPhase;
+  
+  int range = (maxValue-minValue+1)*(1.0-progress);
+
+  CkPrintf("========================== Hill climbing progress = %lf  range=%d\n", progress, range); 
+
+  int high = min(before+range, maxValue);
+  int low = max(before-range, minValue);
+  
+  int result = low;
+
+  if(high-low > 0){
+    result += randInt(high-low, name, phase_id); 
+  } 
+
+  CkPrintf("valueProvidedByOptimizer(): Control Point \"%s\" for phase %d chosen by hill climbing to be: %d\n", name, phase_id, result); 
+  return result; 
+
+#else
+
+  std::map<string, pair<int,int> > &controlPointSpace = localControlPointManagerProxy.ckLocalBranch()->controlPointSpace;
+  std::set<string> &staticControlPoints = localControlPointManagerProxy.ckLocalBranch()->staticControlPoints;  
+   
+  int numDimensions = controlPointSpace.size();
+  CkAssert(numDimensions > 0);
+  
+  int lowerBounds[numDimensions];
+  int upperBounds[numDimensions]; 
+  
+  int d=0;
+  std::map<string, pair<int,int> >::iterator iter;
+  for(iter = controlPointSpace.begin(); iter != controlPointSpace.end(); iter++){
+    //    CkPrintf("Examining dimension %d\n", d);
+
+    string name = iter->first;
+    if(staticControlPoints.count(name) >0 ){
+      cout << " control point " << name << " is static " << endl;
+    } else{
+      cout << " control point " << name << " is not static " << endl;
+    }
+
+    lowerBounds[d] = iter->second.first;
+    upperBounds[d] = iter->second.second;
+    d++;
+  }
+   
+
+  std::string s[numDimensions];
+  d=0;
+  for(std::map<string, pair<int,int> >::iterator niter=controlPointSpace.begin(); niter!=controlPointSpace.end(); niter++){
+    s[d] = niter->first;
+    // cout << "s[" << d << "]=" << s[d] << endl;
+    d++;
+  }
+  
+  
+  // Create the first possible configuration
+  int config[numDimensions+1]; // one value for each dimension and a
+                              // final one to hold the carry
+                              // producing an invalid config
+  config[numDimensions] = 0;
+  for(int i=0;i<numDimensions;i++){
+    config[i] = lowerBounds[i];
+  }
+  
+  // Increment until finding an unused configuration
+  localControlPointManagerProxy.ckLocalBranch()->allData.cleanupNames(); // put -1 values in for any control points missing
+  std::vector<instrumentedPhase> &phases = localControlPointManagerProxy.ckLocalBranch()->allData.phases;     
+
+  while(true){
+    
+    std::vector<instrumentedPhase>::iterator piter; 
+    bool testedConfiguration = false; 
+    for(piter = phases.begin(); piter != phases.end(); piter++){ 
+      
+      // Test if the configuration matches this phase
+      bool match = true;
+      for(int j=0;j<numDimensions;j++){
+       match &= piter->controlPoints[s[j]] == config[j];
+      }
+      
+      if(match){
+       testedConfiguration = true; 
+       break;
+      } 
+    } 
+    
+    if(testedConfiguration == false){ 
+      break;
+    } 
+    
+    // go to next configuration
+    config[0] ++;
+    // Propagate "carrys"
+    for(int i=0;i<numDimensions;i++){
+      if(config[i] > upperBounds[i]){
+       config[i+1]++;
+       config[i] = lowerBounds[i];
+      }
+    }
+    // check if we have exhausted all possible values
+    if(config[numDimensions] > 0){
+      break;
+    }
+       
+  }
+  
+  if(config[numDimensions] > 0){
+    for(int i=0;i<numDimensions;i++){ 
+      config[i]= (lowerBounds[i]+upperBounds[i]) / 2;
+    }
+  }
+  
+  
+  int result=-1;  
+
+  std::string name_s(name);
+  for(int i=0;i<numDimensions;i++){
+    //    cout << "Comparing " << name_s <<  " with s[" << i << "]=" << s[i] << endl;
+    if(name_s.compare(s[i]) == 0){
+      result = config[i];
+    }
+  }
+
+  CkAssert(result != -1);
+
+
+  CkPrintf("valueProvidedByOptimizer(): Control Point \"%s\" for phase %d chosen by exhaustive search to be: %d\n", name, phase_id, result); 
+  return result; 
+
+
+#endif
+  
+}
+
+
+
+
+
+#define isInRange(v,a,b) ( ((v)<=(a)&&(v)>=(b)) || ((v)<=(b)&&(v)>=(a)) )
+
+
+// Dynamic point varies throughout the life of program
+// The value it returns is based upon phase_id, a counter that changes for each phase of computation
+int controlPoint2Pow(const char *name, int fine_granularity, int coarse_granularity){
+  instrumentedPhase &thisPhaseData = localControlPointManagerProxy.ckLocalBranch()->thisPhaseData;
+  const int phase_id = localControlPointManagerProxy.ckLocalBranch()->phase_id;
+
+  int result;
+
+  // Use best configuration after a certain point
+  if(valueShouldBeProvidedByOptimizer()){
+    result = valueProvidedByOptimizer(name);
+  } 
+  else {
+
+    int l1 = log2l(fine_granularity);
+    int l2 = log2l(coarse_granularity);
+  
+    if (l1 > l2){
+      int tmp;
+      tmp = l2;
+      l2 = l1; 
+      l1 = tmp;
+    }
+    CkAssert(l1 <= l2);
+    
+    int l = l1 + randInt(l2-l1+1,name, phase_id);
+
+    result = 1 << l;
+
+    CkAssert(isInRange(result,fine_granularity,coarse_granularity));
+    CkPrintf("Control Point \"%s\" for phase %d chosen randomly to be: %d\n", name, phase_id, result);
+  }
+
+  thisPhaseData.controlPoints.insert(std::make_pair(string(name),result));
+
+  return result;
+}
+
+
+
+int controlPoint(const char *name, int lb, int ub){
+  instrumentedPhase &thisPhaseData = localControlPointManagerProxy.ckLocalBranch()->thisPhaseData;
+  const int phase_id = localControlPointManagerProxy.ckLocalBranch()->phase_id;
+  std::map<string, pair<int,int> > &controlPointSpace = localControlPointManagerProxy.ckLocalBranch()->controlPointSpace;
+
+  int result;
+
+  // Use best configuration after a certain point
+  if(valueShouldBeProvidedByOptimizer()){
+    result = valueProvidedByOptimizer(name);
+  } 
+  else {
+    result = lb + randInt(ub-lb+1, name, phase_id);
+    CkPrintf("Control Point \"%s\" for phase %d chosen randomly to be: %d\n", name, phase_id, result); 
+  } 
+   
+  CkAssert(isInRange(result,ub,lb));
+  thisPhaseData.controlPoints.insert(std::make_pair(string(name),result)); 
+  controlPointSpace.insert(std::make_pair(string(name),std::make_pair(lb,ub))); 
+  //  CkPrintf("Inserting control point value to thisPhaseData.controlPoints with value %d; thisPhaseData.controlPoints.size=%d\n", result, thisPhaseData.controlPoints.size());
+  return result;
+}
+
+
+int controlPoint(const char *name, std::vector<int>& values){
+  instrumentedPhase &thisPhaseData = localControlPointManagerProxy.ckLocalBranch()->thisPhaseData;
+  const int phase_id = localControlPointManagerProxy.ckLocalBranch()->phase_id;
+
+  int result;
+  if(valueShouldBeProvidedByOptimizer()){
+    result = valueProvidedByOptimizer(name);
+  } 
+  else { 
+    result = values[randInt(values.size(), name, phase_id)];
+  }
+
+  bool found = false;
+  for(int i=0;i<values.size();i++){
+    if(values[i] == result)
+      found = true;
+  }
+  CkAssert(found);
+
+  thisPhaseData.controlPoints.insert(std::make_pair(string(name),result)); 
+  return result;
+}
+
+
+
+
+// The index in the global array for my top row  
+int redistributor2D::top_data_idx(){ 
+  return (data_height * thisIndex.y) / y_chares; 
+} 
+int redistributor2D::bottom_data_idx(){ 
+  return ((data_height * (thisIndex.y+1)) / y_chares) - 1; 
+} 
+int redistributor2D::left_data_idx(){ 
+  return (data_width * thisIndex.x) / x_chares; 
+} 
+int redistributor2D::right_data_idx(){ 
+  return ((data_width * (thisIndex.x+1)) / x_chares) - 1; 
+} 
+int redistributor2D::top_neighbor(){ 
+  return (thisIndex.y + y_chares - 1) % y_chares; 
+}  
+   
+int redistributor2D::bottom_neighbor(){ 
+  return (thisIndex.y + 1) % y_chares; 
+} 
+   
+int redistributor2D::left_neighbor(){ 
+  return (thisIndex.x + x_chares - 1) % x_chares; 
+} 
+int redistributor2D::right_neighbor(){ 
+  return (thisIndex.x + 1) % x_chares; 
+} 
+  
+  
+/// the width of the non-ghost part of the local partition 
+int redistributor2D::mywidth(){ 
+  return right_data_idx() - left_data_idx() + 1; 
+} 
+   
+   
+// the height of the non-ghost part of the local partition 
+int redistributor2D::myheight(){ 
+  return bottom_data_idx() - top_data_idx() + 1; 
+} 
+
+
+
+
+
+
+
+
+#include "ControlPoints.def.h"
diff --git a/src/ck-cp/controlPoints.ci b/src/ck-cp/controlPoints.ci
new file mode 100644 (file)
index 0000000..917362e
--- /dev/null
@@ -0,0 +1,50 @@
+module ControlPoints {
+
+  readonly CProxy_controlPointManager localControlPointManagerProxy;
+  readonly int random_seed;
+
+  mainchare controlPointMain {
+    entry controlPointMain(CkArgMsg*);
+  };
+
+ initnode void controlPointInitNode();
+
+ nodegroup controlPointManager {
+    entry controlPointManager();
+
+    entry void requestMemoryUsage(CkCallback cb);
+    entry void gatherMemoryUsage(CkReductionMsg *msg);
+ }   
+
+
+
+
+
+ message controlPointMsg { 
+        char data[];
+ };
+
+
+
+
+
+ message redistributor2DMsg {  
+       double data[]; 
+ }; 
+  array [2D] redistributor2D {
+   entry redistributor2D(void);
+
+   entry void startup();       
+
+    entry void resizeGranules(int, int);
+//    entry void receiveTransposeData(int top, int left, int height, int width, int new_chare_cols, int  new_chare_rows, int which_array, double d[width*height]);
+    entry void receiveTransposeData(redistributor2DMsg *msg);
+
+  } 
+
+
+
+
+};
diff --git a/src/ck-cp/controlPoints.h b/src/ck-cp/controlPoints.h
new file mode 100644 (file)
index 0000000..a65e6a6
--- /dev/null
@@ -0,0 +1,665 @@
+/** 
+
+    A system for exposing application and runtime "control points" 
+    to the dynamic optimization framework.
+
+*/
+#ifndef __CONTROLPOINTS_H__
+#define __CONTROLPOINTS_H__
+
+#include <vector>
+#include <map>
+#include <cmath>
+#include "ControlPoints.decl.h"
+
+#include<pup_stl.h>
+
+#define DEBUG 0
+
+void registerGranularityChangeCallback(CkCallback cb, bool frameworkShouldAdvancePhase);
+void registerControlPointTiming(double time);
+
+// The application specifies that it is ready to proceed to a new set of control point values.
+// This should be called after registerControlPointTiming()
+// This should be called before calling controlPoint()
+void gotoNextPhase();
+
+/// Return an integral power of 2 between c1 and c2
+/// The value returned will likely change between subsequent invocations
+int controlPoint2Pow(const char *name, int c1, int c2);
+
+/// Return an integer between lb and ub inclusive
+/// The value returned will likely change between subsequent invocations
+int controlPoint(const char *name, int lb, int ub);
+
+/// Return an integer from the provided vector of values
+/// The value returned will likely change between subsequent invocations
+int controlPoint(const char *name, std::vector<int>& values);
+
+/// Return an integral power of 2 between c1 and c2
+/// The value returned is static throughout the life of the program
+//int beginPoint2Pow(const char *name, int c1, int c2);
+
+/// Return an integer between lb and ub inclusive
+/// The value returned is static throughout the life of the program
+int staticPoint(const char *name, int lb, int ub);
+
+/// Return an integer from the provided vector of values
+/// The value returned is static throughout the life of the program
+//int beginPoint(const char *name, std::vector<int>& values);
+
+static int maxi(int a, int b){
+  if(a>b)
+    return a;
+  else
+    return b;
+}
+
+static int mini(int a, int b){
+  if(a<b)
+    return a;
+  else
+    return b;
+}
+
+
+
+class controlPointMsg : public CMessage_controlPointMsg {
+ public:
+  char *data;
+};
+
+
+
+
+
+class redistributor2DMsg : public CMessage_redistributor2DMsg { 
+ public: 
+  int top;         
+  int left; 
+  int height;      
+  int width; 
+  int new_chare_cols; 
+  int  new_chare_rows; 
+  int which_array; 
+  double *data;
+}; 
+
+
+
+
+// A array that can redistribute user data arrays
+// It will be used by binding it to some user Chare Array
+class redistributor2D: public CBase_redistributor2D {
+ public:
+
+  std::map<int,double*> data_arrays;
+  std::map<int,int> data_arrays_sizes;
+
+  /// The array associated with this data redistribution
+  CProxyElement_ArrayElement associatedArray;
+  
+  int incoming_count;
+  std::map<int,double*> data_arrays_incoming;
+  std::map<int,int> data_arrays_incoming_sizes;
+
+  /// Is this array element active
+  bool thisElemActive;
+
+ private:
+
+
+  void *fakeMemoryUsage;
+
+
+  CkCallback dataRedistributedCallback;
+
+  int x_chares; // number of active chares in x dimension
+  int y_chares; // number of active chares in y dimension
+
+  int data_width;  // The width of the global array, not the local piece
+  int data_height; // The height of the global array, not the local piece
+
+  int data_x_ghost; // The padding in the x dimension on each side of the data
+  int data_y_ghost; // The padding in the y dimension on each side of the data
+
+
+ public:
+
+  void pup(PUP::er &p) {
+    CBase_redistributor2D::pup(p);
+
+    p | data_arrays_sizes;
+    p | data_arrays_incoming_sizes;
+    p | incoming_count;
+    p | associatedArray;
+    p | thisElemActive;
+
+    p | dataRedistributedCallback;
+
+    p | x_chares;
+    p | y_chares;
+    p | data_width;
+    p | data_height;
+    p | data_x_ghost;
+    p | data_y_ghost;
+
+    if(p.isPacking() && fakeMemoryUsage!=NULL)
+      free(fakeMemoryUsage);
+
+    fakeMemoryUsage = NULL;
+
+    ////////////////////////////////
+    // when packing, iterate through data_arrays
+    // when unpacking
+
+    {
+      std::map<int,int>::iterator iter;
+      for(iter = data_arrays_sizes.begin(); iter != data_arrays_sizes.end(); iter++){
+       int whichArray = iter->first;
+       int arraySize = iter->second;
+
+       //      CkPrintf("Pupping data array %d\n",whichArray);
+       p | whichArray;
+
+       if(p.isUnpacking())
+         data_arrays[whichArray] = new double[arraySize];
+
+       PUParray(p,data_arrays[whichArray] ,arraySize);
+       
+       if(p.isPacking())
+         delete[] data_arrays[whichArray];
+       
+      }
+    }
+
+
+    ///////////////////////////////
+    {
+      std::map<int,int>::iterator iter;
+      for(iter = data_arrays_incoming_sizes.begin(); iter != data_arrays_incoming_sizes.end(); iter++){
+       int whichArray = iter->first;
+       int arraySize = iter->second;
+
+       //      CkPrintf("Pupping incoming array %d\n",whichArray);
+       p | whichArray;
+
+       if(p.isUnpacking() && data_arrays_incoming_sizes[whichArray] > 0)
+         data_arrays_incoming[whichArray] = new double[arraySize];
+       
+       PUParray(p,data_arrays_incoming[whichArray],arraySize);
+       
+       if(p.isPacking())
+         delete[] data_arrays_incoming[whichArray];
+       
+      }
+    }
+
+    //    CkPrintf("pup redistributor2D\n");
+
+  } 
+
+
+  void ckJustMigrated(){
+    //   CkPrintf("redistributor element %02d %02d migrated to %d", thisIndex.x, thisIndex.y, CkMyPe());
+  }
+
+
+  // ------------ Some routines for computing the array bounds for this chare  ------------ 
+
+  // The index in the global array for my top row  
+  int top_data_idx();
+  int bottom_data_idx();
+  int left_data_idx();
+  int right_data_idx();
+  int top_neighbor();
+   
+  int bottom_neighbor();
+   
+  int left_neighbor();
+  int right_neighbor();
+  
+  
+  /// the width of the non-ghost part of the local partition 
+  int mywidth();
+    
+    
+  // the height of the non-ghost part of the local partition 
+  int myheight();
+    
+
+
+  // ------------ Some routines for computing the array bounds for arbitrary chares  ------------ 
+
+  int top_data_idx(int y, int y_total){
+    return (data_height * y) / y_total;
+  }
+
+  int bottom_data_idx(int y, int y_total){
+    return ((data_height * (y+1)) / y_total) - 1;
+  }
+
+  int left_data_idx(int x, int x_total){
+    return (data_width * x) / x_total;
+  }
+
+  int right_data_idx(int x, int x_total){
+    return ((data_width * (x+1)) / x_total) - 1;
+  }
+
+
+  int top_data_idx(int y){
+    return (data_height * y) / y_chares;
+  }
+
+  int bottom_data_idx(int y){
+    return ((data_height * (y+1)) / y_chares) - 1;
+  }
+
+  int left_data_idx(int x){
+    return (data_width * x) / x_chares;
+  }
+
+  int right_data_idx(int x){
+    return ((data_width * (x+1)) / x_chares) - 1;
+  }
+
+  /// Return which chare array element(x index) owns the global data item i
+  int who_owns_idx_x(int i){
+    int w=0;
+    while(1){
+      if( i >= left_data_idx(w) && i <= right_data_idx(w) ){
+       return w;
+      }
+      w++;
+    }
+  }
+  
+  /// Return which chare array element(y index) owns the global data item i
+  int who_owns_idx_y(int i){
+    int w=0;
+    while(1){
+      if( i >= top_data_idx(w) && i <= bottom_data_idx(w) ){
+       return w;
+      }
+      w++;
+    }
+  }
+  
+
+
+
+  
+  // Convert a local column,row id (0 to mywidth-1, 0 to data_width-1) to the index in the padded array
+  int local_to_padded(int x, int y){
+    return (mywidth()+2*data_x_ghost)*(y+data_y_ghost)+x+data_x_ghost;
+  }
+
+  // get a data value
+  double data_local(int which, int x, int y){
+    CkAssert(local_to_padded(x,y) < data_arrays_sizes[which]);
+    return data_arrays[which][local_to_padded(x,y)];
+  }
+
+
+  // Convert a local column id (0 to mywidth-1) to the global column id (0 to data_width-1)
+  int local_to_global_x(int x){
+    return left_data_idx() + x;
+  }
+
+  // Convert a local row id (0 to myheight-1) to the global row id (0 to data_height-1)
+  int local_to_global_y(int y){
+    return top_data_idx() + y;
+  }
+
+  int global_array_width(){
+    return data_width;
+  }
+
+  int global_array_height(){
+    return data_height;
+  }
+
+  int global_array_size(){
+    return global_array_width() * global_array_height();
+  }
+
+  int my_array_width(){
+    return mywidth()+2*data_x_ghost;
+  }
+
+  int my_array_height(){
+    return myheight()+2*data_y_ghost;
+  }
+
+  // Total size of arrays including ghost layers
+  int my_array_size(){
+    return my_array_width() * my_array_height();
+  }
+
+  /// Create an array. If multiple arrays are needed, each should have its own index
+  template <typename t> t* createDataArray(int which=0) {
+    t* data = new t[my_array_size()];
+    data_arrays[which] = data;
+    data_arrays_sizes[which] = my_array_size();
+
+    if(thisIndex.x==0 && thisIndex.y==0)  
+      CkPrintf("data_arrays_sizes[which] set to %d\n", data_arrays_sizes[which] );  
+
+
+    CkAssert(data_arrays[which] != NULL);
+#if DEBUG > 2
+    CkPrintf("Allocated array of size %d at %p\n", my_array_size(), data_arrays[which] );
+#endif
+    return data;
+  }
+  
+  template <typename t> t* getDataArray(int which=0) {
+    return data_arrays[which]; 
+  }
+
+  /// Constructor takes in the dimensions of the array, including any desired ghost layers
+  /// The local part of the arrays will have (mywidth+x_ghosts*2)*(myheight+y_ghosts*2) elements 
+  void setInitialDimensions(int width, int height, int x_chares_, int y_chares_, int x_ghosts=0, int y_ghosts=0){
+    data_width = width;      // These values cannot change after this method is called.
+    data_height = height;
+    data_x_ghost = x_ghosts;
+    data_y_ghost = y_ghosts;
+    
+    setDimensions(x_chares_, y_chares_);
+  }
+  
+
+  void setDimensions( int x_chares_, int y_chares_){
+    x_chares = x_chares_;
+    y_chares = y_chares_;
+    
+    
+    if( thisIndex.x < x_chares && thisIndex.y < y_chares ){
+      thisElemActive = true;
+    } else {
+      thisElemActive = false;
+    }
+    
+  }
+
+
+  redistributor2D(){
+    incoming_count = 0;
+    fakeMemoryUsage = NULL;
+  }
+
+
+  redistributor2D(CkMigrateMessage*){
+  }
+
+
+  void startup(){
+#if DEBUG > 3 
+   CkPrintf("redistributor 2D startup %03d,%03d\n", thisIndex.x, thisIndex.y);
+#endif
+    contribute();
+  }
+  
+
+  void printArrays(){
+#if DEBUG > 2
+    CkAssert(data_arrays.size()==2);
+    for(std::map<int,double*>::iterator diter = data_arrays.begin(); diter != data_arrays.end(); diter++){
+      int which_array = diter->first;
+      double *data = diter->second;
+      CkPrintf("%d,%d data_arrays[%d] = %p\n", thisIndex.x, thisIndex.y, which_array, data);
+    }
+#endif
+  }
+
+  
+  // Called on all elements involved with the new granularity or containing part of the old data
+  void resizeGranules(int new_active_chare_cols, int new_active_chare_rows){
+
+#if DEBUG>1
+    CkPrintf("Resize Granules called for elem %d,%d\n", thisIndex.x, thisIndex.y);     
+#endif
+
+    const bool previouslyActive = thisElemActive;
+    const int old_top = top_data_idx();
+    const int old_left = left_data_idx();
+    const int old_bottom = top_data_idx()+myheight()-1;
+    const int old_right = left_data_idx()+mywidth()-1;
+    const int old_myheight = myheight();
+    const int old_mywidth = mywidth();
+
+    setDimensions(new_active_chare_cols, new_active_chare_rows);
+    
+    const int new_mywidth = mywidth();
+    const int new_myheight = myheight();
+
+    // Transpose Data
+    // Assume only one new owner of my data
+
+    if(previouslyActive){
+     
+      // Send all my data to any blocks that will need it
+
+      int newOwnerXmin = who_owns_idx_x(old_left);
+      int newOwnerXmax = who_owns_idx_x(old_right);
+      int newOwnerYmin = who_owns_idx_y(old_top);
+      int newOwnerYmax = who_owns_idx_y(old_bottom);
+
+      for(int newx=newOwnerXmin; newx<=newOwnerXmax; newx++){
+       for(int newy=newOwnerYmin; newy<=newOwnerYmax; newy++){
+         
+         // Determine overlapping region between my data and this destination
+#if DEBUG > 2
+         CkPrintf("newy(%d)*new_myheight(%d)=%d, old_top=%d\n",newy,new_myheight,newy*new_myheight,old_top);
+#endif
+         // global range for overlapping area
+         int global_top = maxi(top_data_idx(newy),old_top);
+         int global_left = maxi(left_data_idx(newx),old_left);
+         int global_bottom = mini(bottom_data_idx(newy),old_bottom);
+         int global_right = mini(right_data_idx(newx),old_right);
+         int w = global_right-global_left+1;
+         int h = global_bottom-global_top+1;
+        
+         CkAssert(w*h>0);
+
+         int x_offset = global_left - old_left;
+         int y_offset = global_top - old_top;
+
+#if DEBUG > 2    
+         CkPrintf("w=%d h=%d x_offset=%d y_offset=%d\n", w, h, x_offset, y_offset);
+#endif
+         
+         std::map<int,double*>::iterator diter;
+         for(diter =data_arrays.begin(); diter != data_arrays.end(); diter++){
+           
+           redistributor2DMsg* msg = new(w*h) redistributor2DMsg;  
+           //      CkPrintf("Created message msg %p\n", msg);  
+           
+           int which_array = diter->first;
+           double *t = diter->second;
+           int s = data_arrays_sizes[which_array];
+           
+           for(int j=0; j<h; j++){
+             for(int i=0; i<w; i++){           
+               CkAssert(j*w+i < w*h);
+               CkAssert((data_x_ghost*2+old_mywidth)*(j+y_offset+data_y_ghost)+(i+ x_offset+data_x_ghost) < s);
+               msg->data[j*w+i] = t[(data_x_ghost*2+old_mywidth)*(j+y_offset+data_y_ghost)+(i+ x_offset+data_x_ghost)];
+             }
+           }
+           
+           msg->top = global_top;
+           msg->left = global_left;
+           msg->height = h;
+           msg->width = w;
+           msg->new_chare_cols = new_active_chare_cols;
+           msg->new_chare_rows = new_active_chare_rows; 
+           msg->which_array = which_array;
+
+           //      CkPrintf("Sending message msg %p\n", msg);      
+           thisProxy(newx, newy).receiveTransposeData(msg);
+           
+         }
+         
+       }
+       
+       
+      }
+    } 
+    
+    if(!thisElemActive){
+#if DEBUG > 2
+      CkPrintf("Element %d,%d is no longer active\n", thisIndex.x, thisIndex.y);
+#endif
+
+      // Free my arrays
+      for(std::map<int,double*>::iterator diter = data_arrays.begin(); diter != data_arrays.end(); diter++){
+       int which_array = diter->first;
+       delete data_arrays[which_array]; 
+       data_arrays[which_array] = NULL;
+       data_arrays_sizes[which_array] = 0;
+      }
+      continueToNextStep();
+      
+    }
+
+    int newPe = (thisIndex.y * new_active_chare_cols + thisIndex.x) % CkNumPes();
+    
+    if(newPe == CkMyPe()){
+      //      CkPrintf("Keeping %02d , %02d on PE %d\n", thisIndex.x, thisIndex.y, newPe);
+    }
+    else{
+      // CkPrintf("Migrating %02d , %02d to PE %d\n", thisIndex.x, thisIndex.y, newPe);
+      migrateMe(newPe);
+    }
+    
+  }
+  
+
+  void continueToNextStep(){
+#if DEBUG > 2
+    CkPrintf("Elem %d,%d is ready to continue\n", thisIndex.x, thisIndex.y);
+#endif
+    for(std::map<int,double*>::iterator diter =data_arrays.begin(); diter != data_arrays.end(); diter++){
+      int which_array = diter->first;
+      double *data = diter->second;
+      CkAssert( (data==NULL && !thisElemActive) || (data!=NULL && thisElemActive) );
+    }
+
+
+#if USE_EXTRAMEMORY
+#error NO USE_EXTRAMEMORY ALLOWED YET
+    if(thisElemActive){
+
+      long totalArtificialMemory = controlPoint("Artificial Memory Usage", 100, 500);
+      long artificialMemoryPerChare = totalArtificialMemory *1024*1024 / x_chares / y_chares;
+      
+      CkPrintf("Allocating fake memory of %d MB (of the total %d MB) (xchares=%d y_chares=%d)\n", artificialMemoryPerChare/1024/1024, totalArtificialMemory, x_chares, y_chares);
+      free(fakeMemoryUsage);
+      fakeMemoryUsage = malloc(artificialMemoryPerChare);
+      CkAssert(fakeMemoryUsage != NULL);
+    } else {
+      free(fakeMemoryUsage);
+      fakeMemoryUsage = NULL;
+    }
+#endif
+
+
+
+    incoming_count = 0; // prepare for future granularity change 
+    contribute();
+  }
+  
+  
+
+
+
+  void receiveTransposeData(redistributor2DMsg *msg){
+    int top_new = top_data_idx(thisIndex.y, msg->new_chare_rows);
+    int bottom_new = bottom_data_idx(thisIndex.y, msg->new_chare_rows);
+    int left_new = left_data_idx(thisIndex.x, msg->new_chare_cols);
+    int right_new = right_data_idx(thisIndex.x, msg->new_chare_cols);    
+
+    int new_height = bottom_new - top_new + 1;
+    int new_width = right_new - left_new + 1;
+
+    if(incoming_count == 0){
+      // Allocate new arrays 
+      std::map<int,double*>::iterator diter;
+      for(diter =data_arrays.begin(); diter != data_arrays.end(); diter++){
+       int w = diter->first;
+       data_arrays_incoming[w] = new double[(new_width+2*data_x_ghost)*(new_height+2*data_y_ghost)];
+       data_arrays_incoming_sizes[w] = (new_width+2*data_x_ghost)*(new_height+2*data_y_ghost);
+
+       //      CkPrintf("data_arrays_incoming_sizes[%d] set to %d\n", w, data_arrays_incoming_sizes[w] );  
+
+      }
+    }
+    
+    
+    // Copy values from the incoming array to the appropriate place in data_arrays_incoming
+    // Current top left of my new array
+
+
+    double *localData = data_arrays_incoming[msg->which_array];
+    int s = data_arrays_incoming_sizes[msg->which_array];
+
+    //    CkPrintf("%d,%d data_arrays_incoming.size() = %d\n", thisIndex.x, thisIndex.y, data_arrays_incoming.size() );
+    //    CkPrintf("msg->which_array=%d   localData=%p   s=%d\n", msg->which_array, localData, s);
+    CkAssert(localData != NULL);
+
+    for(int j=0; j<msg->height; j++){
+      for(int i=0; i<msg->width; i++){
+
+       if( (msg->top+j >= top_new) && (msg->top+j <= bottom_new) && (msg->left+i >= left_new) && (msg->left+i <= right_new) ) {
+         CkAssert(j*msg->width+i<msg->height*msg->width);
+         CkAssert((msg->top+j-top_new)*new_width+(msg->left+i-left_new) < new_width*new_height);
+         CkAssert((msg->top+j-top_new)*new_width+(msg->left+i-left_new) >= 0);
+         
+         CkAssert((msg->top+j-top_new+data_y_ghost)*(new_width+2*data_x_ghost)+(msg->left+i-left_new+data_x_ghost) < s);
+         localData[(msg->top+j-top_new+data_y_ghost)*(new_width+2*data_x_ghost)+(msg->left+i-left_new+data_x_ghost)] = msg->data[j*msg->width+i];
+         incoming_count++;
+         
+       }
+       
+      }
+    }
+    
+    //    CkPrintf("Deleting message msg %p\n", msg); 
+    delete msg;
+
+
+    if(incoming_count == new_height*new_width*data_arrays.size()){
+
+      std::map<int,double*>::iterator diter;
+      for(diter =data_arrays.begin(); diter != data_arrays.end(); diter++){
+       int w = diter->first;
+       delete[] data_arrays[w];
+       data_arrays[w] = data_arrays_incoming[w];
+       data_arrays_sizes[w] = data_arrays_incoming_sizes[w];
+       data_arrays_incoming[w] = NULL;
+       data_arrays_incoming_sizes[w] = 0;
+
+       //        if(thisIndex.x==0 && thisIndex.y==0)   
+         //          CkPrintf("data_arrays_incoming_sizes[%d] set to %d\n",w, data_arrays_incoming_sizes[w] );   
+
+         //        if(thisIndex.x==0 && thisIndex.y==0) 
+         //  CkPrintf("data_arrays_sizes[%d] set to %d\n",w, data_arrays_sizes[w] ); 
+
+      }
+
+      continueToNextStep();
+    }
+    
+  }
+};
+
+
+
+#endif