Adding Communication aware refinement based strategy called CommAwareRefineLB. Made...
authorHarshitha <gplkrsh2@illinois.edu>
Mon, 2 Apr 2012 17:24:12 +0000 (12:24 -0500)
committerHarshitha <gplkrsh2@illinois.edu>
Mon, 2 Apr 2012 17:24:12 +0000 (12:24 -0500)
13 files changed:
src/ck-ldb/CommAwareRefineLB.C [new file with mode: 0644]
src/ck-ldb/CommAwareRefineLB.ci [new file with mode: 0644]
src/ck-ldb/CommAwareRefineLB.h [new file with mode: 0644]
src/ck-ldb/CommonLBs.ci
src/ck-ldb/EveryLB.ci
src/ck-ldb/Make.lb
src/ck-ldb/Makefile_lb.sh
src/ck-ldb/MetisLB.C
src/ck-ldb/RefineSwapLB.C
src/ck-ldb/ScotchLB.C
src/ck-ldb/ScotchRefineLB.C
src/scripts/Make.cidepends
src/scripts/Make.depends

diff --git a/src/ck-ldb/CommAwareRefineLB.C b/src/ck-ldb/CommAwareRefineLB.C
new file mode 100644 (file)
index 0000000..977cc5a
--- /dev/null
@@ -0,0 +1,444 @@
+/** \file CommAwareRefineLB.C
+ *
+ *  Written by Harshitha Menon
+ *  
+ *  This Loadbalancer strategy is Refine but taking into consideration the
+ *  Communication between the processors.
+ *  The following are the steps in the loadbalancing strategy
+ *
+ *  1. Construct a max heap of processor load whose load is greater than avg
+ *  2. Construct a sorted array of processor load whose load is less than avg
+ *  3. Pick the heaviest processor from the heap, randomly select a chare in
+ *  that processor and decide on which processor in the underloaded processor
+ *  list to transfer it to based on the one with which it is 
+ *  heavily communicating.
+ *  4. If the load of the processors after the transfer is less than the avg
+ *  load, then push it into the underloaded processor list, else push it into
+ *  the max heap.
+ */
+
+/**
+ * \addtogroup CkLdb
+*/
+/*@{*/
+
+#include "CommAwareRefineLB.h"
+#include "ckgraph.h"
+#include <algorithm>
+#include <map>
+
+#include <time.h>
+
+#define THRESHOLD 0.1
+#define SWAP_MULTIPLIER 5 
+
+inline void eraseObjFromParrObjs(std::vector<int> & parr_objs, int remove_objid);
+inline void printMapping(std::vector<Vertex> &vertices);
+inline void removeFromArray(int pe_id, std::vector<int> &array);
+inline int popFromProcHeap(std::vector<int> & parr_above_avg, ProcArray *parr);
+inline void handleTransfer(int randomly_obj_id, ProcInfo& p, int possible_pe, std::vector<int> *parr_objs, ObjGraph *ogr, ProcArray* parr);
+inline void updateLoadInfo(int p_index, int possible_pe, double upper_threshold, double lower_threshold,
+                           std::vector<int> &parr_above_avg, std::vector<int> &parr_below_avg,
+                           bool* proc_load_info, ProcArray *parr);
+
+double upper_threshold;
+double lower_threshold;
+
+CreateLBFunc_Def(CommAwareRefineLB, "always assign the heaviest obj onto lightest loaded processor.")
+
+CommAwareRefineLB::CommAwareRefineLB(const CkLBOptions &opt): CentralLB(opt)
+{
+  lbname = "CommAwareRefineLB";
+  if (CkMyPe()==0)
+    CkPrintf("[%d] CommAwareRefineLB created\n",CkMyPe());
+}
+
+CmiBool CommAwareRefineLB::QueryBalanceNow(int _step)
+{
+  //  CkPrintf("[%d] Balancing on step %d\n",CkMyPe(),_step);
+  return CmiTrue;
+}
+
+class ProcLoadGreater {
+  public:
+    ProcLoadGreater(ProcArray *parr) : parr(parr) {
+    }
+    bool operator()(int lhs, int rhs) {
+      return (parr->procs[lhs].getTotalLoad() < parr->procs[rhs].getTotalLoad());
+    }
+
+  private:
+    ProcArray *parr;
+};
+
+class ObjLoadGreater {
+  public:
+    bool operator()(Vertex v1, Vertex v2) {
+      return (v1.getVertexLoad() > v2.getVertexLoad());
+    }
+};
+
+class PeCommInfo {
+  public:
+    PeCommInfo() : num_msg(0), num_bytes(0) {
+    }
+
+    PeCommInfo(int pe_id) : pe_id(pe_id), num_msg(0) , num_bytes(0) {
+    }
+    int pe_id;
+    int num_msg;
+    int num_bytes;
+    // TODO: Should probably have a communication cost
+};
+
+// Consists of communication information of an object with is maintained
+// as a list of PeCommInfo containing the processor id and the bytes transferred
+class ObjPeCommInfo {
+  public:
+    ObjPeCommInfo() {
+    }
+
+    int obj_id;
+    std::vector<PeCommInfo> pcomm;
+};
+
+class ProcCommGreater {
+  public:
+    bool operator()(PeCommInfo p1, PeCommInfo p2) {
+      // TODO(Harshitha): Should probably consider total communication cost
+      return (p1.num_bytes > p2.num_bytes);
+    }
+};
+
+void PrintProcLoad(ProcArray *parr) {
+  int vert;
+  double pe_load;
+  for (vert = 0; vert < parr->procs.size(); vert++) {
+    pe_load = parr->procs[vert].getTotalLoad();
+    if (pe_load > upper_threshold) {
+      CkPrintf("Above load : %d load : %E overhead : %E\n",
+        parr->procs[vert].getProcId(), parr->procs[vert].getTotalLoad(),
+        parr->procs[vert].overhead());
+    } else if (pe_load < lower_threshold) {
+      CkPrintf("Below load : %d load : %E overhead : %E\n",
+        parr->procs[vert].getProcId(), parr->procs[vert].getTotalLoad(),
+        parr->procs[vert].overhead());
+    } else {
+      CkPrintf("Within avg load : %d load : %E overhead : %E\n",
+        parr->procs[vert].getProcId(), parr->procs[vert].getTotalLoad(),
+        parr->procs[vert].overhead());
+    }
+  }
+}
+
+void PrintProcObj(ProcArray *parr, std::vector<int>* parr_objs) {
+  int i, j;
+  CkPrintf("---------------------\n");
+  for (i = 0; i < parr->procs.size(); i++) {
+    CkPrintf("[%d] contains ", i);
+    for (j = 0; j < parr_objs[i].size(); j++) {
+      CkPrintf(" %d, ", parr_objs[i][j]);
+    }
+    CkPrintf("\n");
+  }
+  CkPrintf("---------------------\n");
+}
+
+
+void CommAwareRefineLB::work(LDStats* stats)
+{
+  /** ========================== INITIALIZATION ============================= */
+  ProcArray *parr = new ProcArray(stats);       // Processor Array
+  ObjGraph *ogr = new ObjGraph(stats);          // Object Graph
+  int vertexid_pe[ogr->vertices.size()]; // Stores the cur pe of obj
+  double avgload = parr->getAverageLoad();      // Average load of processors
+
+  // Sets to false if it is overloaded, else to true
+  bool* proc_load_info = new bool[parr->procs.size()];
+  memset(proc_load_info, false, parr->procs.size());
+
+  // Create an array of vectors for each processor mapping to the objects in
+  // that processor
+  std::vector<int>* parr_objs = new std::vector<int>[parr->procs.size()];
+
+  // Create an array for each chare with processor communication info
+  ObjPeCommInfo* objpcomm = new ObjPeCommInfo[ogr->vertices.size()];
+
+  upper_threshold = avgload + (avgload * THRESHOLD);
+  lower_threshold = avgload - (avgload * THRESHOLD * THRESHOLD);
+
+  int less_loaded_counter = 0;
+
+  srand(time(NULL));
+  /** ============================= STRATEGY ================================ */
+
+  if (_lb_args.debug()>1) 
+    CkPrintf("[%d] In CommAwareRefineLB strategy\n",CkMyPe());
+
+  CkPrintf("Average load %E\n", avgload);
+
+  int vert, i, j;
+  int curr_pe;
+
+  // Iterate over all the chares and construct the peid, vector<chareid> array
+  for(vert = 0; vert < ogr->vertices.size(); vert++) {
+    curr_pe = ogr->vertices[vert].getCurrentPe();
+    parr_objs[curr_pe].push_back(vert);
+    vertexid_pe[ogr->vertices[vert].getVertexId()] = curr_pe;
+  }
+
+
+  std::vector<int> parr_above_avg;
+  std::vector<int> parr_below_avg;
+
+  double pe_load;
+
+  // Insert into parr_above_avg if the processor fits under the criteria of
+  // overloaded processor.
+  // Insert the processor id into parr_below_avg if the processor is underloaded 
+  for (vert = 0; vert < parr->procs.size(); vert++) {
+    pe_load = parr->procs[vert].getTotalLoad();
+    if (pe_load > upper_threshold) {
+      // Pushing ProcInfo into this list
+      parr_above_avg.push_back(vert);
+    } else if (pe_load < lower_threshold) {
+      parr_below_avg.push_back(parr->procs[vert].getProcId());
+      proc_load_info[parr->procs[vert].getProcId()] = true;
+      less_loaded_counter++;
+    }
+  }
+
+  std::make_heap(parr_above_avg.begin(), parr_above_avg.end(),
+      ProcLoadGreater(parr));
+
+  // Construct the datastructure that stores communication information for each
+  // chare with respect to processors
+  for (vert = 0; vert < ogr->vertices.size(); vert++) {
+    std::map<int, int> tmp_map_pid_index;
+    int counter = objpcomm[vert].pcomm.size();
+    int index;
+    for (i = 0; i < ogr->vertices[vert].sendToList.size(); i++) {
+      j = vertexid_pe[ogr->vertices[vert].sendToList[i].getNeighborId()];
+      // TODO: Should it index with vertexId?
+      if (tmp_map_pid_index.count(j) == 0) {
+        tmp_map_pid_index[j] = counter;
+        PeCommInfo pecomminf(j);
+        // TODO: Shouldn't it use vertexId instead of vert?
+        objpcomm[vert].pcomm.push_back(pecomminf);
+        counter++;
+      }
+      index = tmp_map_pid_index[j];
+    
+      objpcomm[vert].pcomm[index].num_msg +=
+        ogr->vertices[vert].sendToList[i].getNumMsgs();
+      objpcomm[vert].pcomm[index].num_bytes +=
+        ogr->vertices[vert].sendToList[i].getNumBytes();
+    }
+
+    for (i = 0; i < ogr->vertices[vert].recvFromList.size(); i++) {
+      j = vertexid_pe[ogr->vertices[vert].recvFromList[i].getNeighborId()];
+
+      if (tmp_map_pid_index.count(j) == 0) {
+        tmp_map_pid_index[j] = counter;
+        PeCommInfo pecomminf(j);
+        // TODO: Shouldn't it use vertexId instead of vert?
+        objpcomm[vert].pcomm.push_back(pecomminf);
+        counter++;
+      }
+      index = tmp_map_pid_index[j];
+
+      objpcomm[vert].pcomm[index].num_msg +=
+        ogr->vertices[vert].sendToList[i].getNumMsgs();
+      objpcomm[vert].pcomm[index].num_bytes +=
+        ogr->vertices[vert].sendToList[i].getNumBytes();
+    }
+
+    // Sort the pe communication vector for this chare
+    std::sort(objpcomm[vert].pcomm.begin(), objpcomm[vert].pcomm.end(),
+        ProcCommGreater());
+  }
+
+  int random;
+  int randomly_obj_id;
+  bool obj_allocated;
+  int num_tries;
+  // Allow as many swaps as there are chares
+  int total_swaps = ogr->vertices.size() * SWAP_MULTIPLIER;
+  int possible_pe;
+  double obj_load;
+
+  // Keep on loadbalancing until the number of above avg processors is 0
+  while (parr_above_avg.size() != 0 && total_swaps > 0 && parr_below_avg.size() != 0) {
+      // CkPrintf("Above avg : %d Below avg : %d Total swaps: %d\n", parr_above_avg.size(),
+      //    parr_below_avg.size(), total_swaps);
+      obj_allocated = false;
+      num_tries = 0;
+
+      // Pop the heaviest processor
+      int p_index = popFromProcHeap(parr_above_avg, parr);
+      ProcInfo& p = parr->procs[p_index];
+
+      while (!obj_allocated && num_tries < 5) {
+
+        // It might so happen that due to overhead load, it might not have any
+        // more objects in its list
+        if (parr_objs[p.getProcId()].size() == 0) {
+          // CkPrintf("No obj left to be allocated\n");
+          obj_allocated = true;
+          break;
+        }
+
+        int randd = rand();
+        random = randd % parr_objs[p.getProcId()].size();
+        randomly_obj_id = parr_objs[p.getProcId()][random];
+        obj_load = ogr->vertices[randomly_obj_id].getVertexLoad();
+
+        // CkPrintf("Heavy %d: Parr obj size : %d random : %d random obj id : %d\n", p_index,
+        //     parr_objs[p.getProcId()].size(), randd, randomly_obj_id);
+        for (i = 0; i < objpcomm[randomly_obj_id].pcomm.size(); i++) {
+
+          // If the heaviest communicating processor is there in the list, then
+          // assign it to that.
+          possible_pe = objpcomm[randomly_obj_id].pcomm[i].pe_id;
+
+          if ((parr->procs[possible_pe].getTotalLoad() + obj_load) < upper_threshold) {
+            //CkPrintf("** Iter[%d] : Transfered %d from %d : Load %E to %d : Load %E Comm : %d\n",
+            //    i, randomly_obj_id, p.getProcId(), p.getTotalLoad(),
+            //    possible_pe, parr->procs[possible_pe].getTotalLoad(),
+            //    objpcomm[randomly_obj_id].pcomm[i].num_msg);
+
+            handleTransfer(randomly_obj_id, p, possible_pe, parr_objs, ogr, parr);
+            obj_allocated = true;
+            total_swaps--;
+            updateLoadInfo(p_index, possible_pe, upper_threshold, lower_threshold,
+                parr_above_avg, parr_below_avg, proc_load_info, parr);
+
+            break;
+          }
+        }
+
+        // Since there is no processor in the least loaded list with which this
+        // chare communicates, pick a random least loaded processor.
+        if (!obj_allocated) {
+          // CkPrintf(":( Could not transfer to the nearest communicating ones\n");
+          for (int x = 0; x < parr_below_avg.size(); x++) {
+            int random_pe = parr_below_avg[x];
+            if ((parr->procs[random_pe].getTotalLoad() + obj_load) < upper_threshold) {
+              obj_allocated = true;
+              total_swaps--;
+              handleTransfer(randomly_obj_id, p, random_pe, parr_objs, ogr, parr);
+              updateLoadInfo(p_index, random_pe, upper_threshold, lower_threshold,
+                  parr_above_avg, parr_below_avg, proc_load_info, parr);
+              break;
+            }
+            num_tries++;
+          }
+        }
+      }
+
+      if (!obj_allocated) {
+        CkPrintf("!!!! Could not handle the heavy proc %d so giving up\n", p_index);
+       // parr_above_avg.push_back(p_index);
+       // std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
+       //     ProcLoadGreater(parr));
+      }
+    }
+
+
+  /** ============================== CLEANUP ================================ */
+  ogr->convertDecisions(stats);         // Send decisions back to LDStats
+  delete parr;
+  delete ogr;
+  delete[] parr_objs;
+  delete[] objpcomm;
+}
+
+inline void eraseObjFromParrObjs(std::vector<int> & parr_objs, int remove_objid) {
+  for (int i = 0; i < parr_objs.size(); i++) {
+    if (parr_objs[i] == remove_objid) {
+      parr_objs.erase(parr_objs.begin() + i);
+      return;
+    }
+  }
+}
+
+
+inline void printMapping(std::vector<Vertex> &vertices) {
+  for (int i = 0; i < vertices.size(); i++) {
+    CkPrintf("%d: old map : %d new map : %d\n", i, vertices[i].getCurrentPe(),
+        vertices[i].getNewPe());
+  }
+}
+
+inline void removeFromArray(int pe_id, std::vector<int> &array) {
+  for (int i = 0; i < array.size(); i++) {
+    if (array[i] == pe_id) {
+      array.erase(array.begin() + i);
+    }
+  }
+}
+
+inline int popFromProcHeap(std::vector<int> & parr_above_avg, ProcArray *parr) {
+  int p_index = parr_above_avg.front();
+  std::pop_heap(parr_above_avg.begin(), parr_above_avg.end(),
+      ProcLoadGreater(parr));
+  parr_above_avg.pop_back();
+  return p_index;
+}
+
+    
+inline void handleTransfer(int randomly_obj_id, ProcInfo& p, int possible_pe, std::vector<int>* parr_objs, ObjGraph *ogr, ProcArray* parr) {
+  ogr->vertices[randomly_obj_id].setNewPe(possible_pe);
+  parr_objs[possible_pe].push_back(randomly_obj_id);
+  ProcInfo &possible_pe_procinfo = parr->procs[possible_pe];
+
+  p.totalLoad() -= ogr->vertices[randomly_obj_id].getVertexLoad();
+  possible_pe_procinfo.totalLoad() += ogr->vertices[randomly_obj_id].getVertexLoad();
+  eraseObjFromParrObjs(parr_objs[p.getProcId()], randomly_obj_id);
+  //CkPrintf("After transfered %d from %d : Load %E to %d : Load %E\n", randomly_obj_id, p.getProcId(), p.getTotalLoad(),
+  //    possible_pe, possible_pe_procinfo.getTotalLoad());
+}
+
+inline void updateLoadInfo(int p_index, int possible_pe, double upper_threshold, double lower_threshold,
+                           std::vector<int>& parr_above_avg, std::vector<int>& parr_below_avg,
+                           bool* proc_load_info, ProcArray *parr) {
+
+  ProcInfo& p = parr->procs[p_index];
+  ProcInfo& possible_pe_procinfo = parr->procs[possible_pe];
+
+  // If the updated load is still greater than the average by the
+  // threshold value, then push it back to the max heap
+  if (p.getTotalLoad() > upper_threshold) {
+    parr_above_avg.push_back(p_index);
+    std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
+        ProcLoadGreater(parr));
+    //CkPrintf("\t Pushing pe : %d to max heap\n", p.getProcId());
+  } else if (p.getTotalLoad() < lower_threshold) {
+    parr_below_avg.push_back(p_index);
+    proc_load_info[p_index] = true;
+    //CkPrintf("\t Adding pe : %d to less loaded\n", p.getProcId());
+  }
+
+  // If the newly assigned processor's load is greater than the average
+  // by the threshold value, then push it into the max heap.
+  if (possible_pe_procinfo.getTotalLoad() > upper_threshold) {
+    // TODO: It should be the index in procarray :(
+    parr_above_avg.push_back(possible_pe);
+    std::push_heap(parr_above_avg.begin(), parr_above_avg.end(),
+        ProcLoadGreater(parr));
+    removeFromArray(possible_pe, parr_below_avg);
+    proc_load_info[possible_pe] = false;
+    //CkPrintf("\t Pusing pe : %d to max heap\n", possible_pe);
+  } else if (possible_pe_procinfo.getTotalLoad() < lower_threshold) {
+  } else {
+    removeFromArray(possible_pe, parr_below_avg);
+    proc_load_info[possible_pe] = false;
+    //CkPrintf("\t Removing from lower list pe : %d\n", possible_pe);
+  }
+
+}
+
+#include "CommAwareRefineLB.def.h"
+
+/*@}*/
+
diff --git a/src/ck-ldb/CommAwareRefineLB.ci b/src/ck-ldb/CommAwareRefineLB.ci
new file mode 100644 (file)
index 0000000..36a02d7
--- /dev/null
@@ -0,0 +1,9 @@
+module CommAwareRefineLB {
+
+extern module CentralLB;
+initnode void lbinit(void);
+group [migratable] CommAwareRefineLB : CentralLB {
+  entry void CommAwareRefineLB(const CkLBOptions &);  
+};
+
+};
diff --git a/src/ck-ldb/CommAwareRefineLB.h b/src/ck-ldb/CommAwareRefineLB.h
new file mode 100644 (file)
index 0000000..86c15ab
--- /dev/null
@@ -0,0 +1,43 @@
+/**
+ * \addtogroup CkLdb
+*/
+/*@{*/
+
+#ifndef _COMMAWARELB_H_
+#define _COMMAWARELB_H_
+
+#include "CentralLB.h"
+#include "CommAwareRefineLB.decl.h"
+
+void CreateCommAwareRefineLB();
+BaseLB * AllocateCommAwareRefineLB();
+
+class CommAwareRefineLB : public CentralLB {
+
+public:
+  struct HeapData {
+    double load;
+    int    pe;
+    int    id;
+
+  };
+
+  CommAwareRefineLB(const CkLBOptions &);
+  CommAwareRefineLB(CkMigrateMessage *m):CentralLB(m) {
+    lbname = "CommAwareRefineLB";
+  }
+  void work(LDStats* stats);
+private:
+       enum           HeapCmp {GT = '>', LT = '<'};
+       void           Heapify(HeapData*, int, int, HeapCmp);
+       void           HeapSort(HeapData*, int, HeapCmp);
+       void           BuildHeap(HeapData*, int, HeapCmp);
+       CmiBool        Compare(double, double, HeapCmp);
+       HeapData*      BuildCpuArray(BaseLB::LDStats*, int, int *);  
+       HeapData*      BuildObjectArray(BaseLB::LDStats*, int, int *);      
+       CmiBool        QueryBalanceNow(int step);
+};
+
+#endif /* _COMMAWARELB_H_ */
+
+/*@}*/
index dc9bd6e7f86a66300967575e2b8098c43af14c17..f7c80eb30f9dd3808a2b3b87f18a6199d3ae5d98 100644 (file)
@@ -17,6 +17,8 @@ module CommonLBs {
   extern module RefineCommLB;
   extern module RotateLB;
   extern module TreeMatchLB;
+  extern module RefineSwapLB;
+  extern module CommAwareRefineLB;
 
   initnode void initCommonLBs(void);
 };
index 5bb718348fed287f03a0418db732dbebb45f31f4..11eda6e8bb34d3175c7855c8620054837f0bf66f 100644 (file)
@@ -17,6 +17,8 @@ module EveryLB {
   extern module RefineCommLB;
   extern module RotateLB;
   extern module TreeMatchLB;
+  extern module RefineSwapLB;
+  extern module CommAwareRefineLB;
   extern module ComboCentLB;
   extern module GraphPartLB;
   extern module GraphBFTLB;
index 8269e5737df706dc0a811078af2dd46137f97ddf..74131d577282d9332990994f2ea628e5cd9560a2 100644 (file)
@@ -16,6 +16,8 @@ ALL_LDBS=\
    $(L)/libmoduleRefineCommLB.a \
    $(L)/libmoduleRotateLB.a \
    $(L)/libmoduleTreeMatchLB.a \
+   $(L)/libmoduleRefineSwapLB.a \
+   $(L)/libmoduleCommAwareRefineLB.a \
    $(L)/libmoduleComboCentLB.a \
    $(L)/libmoduleGraphPartLB.a \
    $(L)/libmoduleGraphBFTLB.a \
@@ -129,6 +131,18 @@ $(L)/libmoduleTreeMatchLB.a: TreeMatchLB.o tm_tree.o tm_bucket.o tm_timings.o tm
 LBHEADERS += TreeMatchLB.h TreeMatchLB.decl.h
 
 
+$(L)/libmoduleRefineSwapLB.a: RefineSwapLB.o 
+       $(CHARMC) -o $(L)/libmoduleRefineSwapLB.a RefineSwapLB.o 
+       
+LBHEADERS += RefineSwapLB.h RefineSwapLB.decl.h
+
+
+$(L)/libmoduleCommAwareRefineLB.a: CommAwareRefineLB.o 
+       $(CHARMC) -o $(L)/libmoduleCommAwareRefineLB.a CommAwareRefineLB.o 
+       
+LBHEADERS += CommAwareRefineLB.h CommAwareRefineLB.decl.h
+
+
 $(L)/libmoduleComboCentLB.a: ComboCentLB.o 
        $(CHARMC) -o $(L)/libmoduleComboCentLB.a ComboCentLB.o 
        
@@ -261,6 +275,8 @@ ALL_LB_OBJS=EveryLB.o \
     RefineCommLB.o \
     RotateLB.o \
     TreeMatchLB.o \
+    RefineSwapLB.o \
+    CommAwareRefineLB.o \
     ComboCentLB.o \
     GraphPartLB.o \
     GraphBFTLB.o \
@@ -303,6 +319,8 @@ EVERYLB_DEPS=EveryLB.o \
     RefineCommLB.o \
     RotateLB.o \
     TreeMatchLB.o \
+    RefineSwapLB.o \
+    CommAwareRefineLB.o \
     ComboCentLB.o \
     GraphPartLB.o \
     GraphBFTLB.o \
@@ -340,6 +358,8 @@ COMMONLBS_DEPS=CommonLBs.o \
     RefineCommLB.o \
     RotateLB.o \
     TreeMatchLB.o \
+    RefineSwapLB.o \
+    CommAwareRefineLB.o \
     manager.o \
     tm_tree.o  \
     tm_timings.o  \
index e59e977e3ad320fa56b009d910974c9e12cab3b6..0fdfd88139f14508c6274e3e04c112b2158cfe16 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/sh
 UNCOMMON_LDBS="TempAwareGreedyLB MetisLB ScotchLB TeamLB WSLB"
-COMMON_LDBS="BlockLB CommLB DummyLB GreedyAgentLB GreedyCommLB GreedyLB NeighborCommLB NeighborLB OrbLB PhasebyArrayLB RandCentLB RecBipartLB RefineLB RefineCommLB RotateLB TreeMatchLB"
+COMMON_LDBS="BlockLB CommLB DummyLB GreedyAgentLB GreedyCommLB GreedyLB NeighborCommLB NeighborLB OrbLB PhasebyArrayLB RandCentLB RecBipartLB RefineLB RefineCommLB RotateLB TreeMatchLB RefineSwapLB CommAwareRefineLB"
 OTHER_LDBS="ComboCentLB GraphPartLB GraphBFTLB GridCommLB GridCommRefineLB GridHybridLB GridHybridSeedLB GridMetisLB HbmLB HybridLB RefineKLB RefineTopoLB TopoCentLB TopoLB"
 ALL_LDBS="$COMMON_LDBS $OTHER_LDBS"
 
index eb57e86a47193e9d156ea5b800b450a9929e85f6..9c7b7338e23877dfd1b307f7d1ee979102f8d8e4 100644 (file)
@@ -175,6 +175,8 @@ void MetisLB::work(LDStats* stats)
 
   /** ============================== CLEANUP ================================ */
   ogr->convertDecisions(stats);
+  delete parr;
+  delete ogr;
 }
 
 #include "MetisLB.def.h"
index 8a5b86da1ef2c99b008b4bb3ec314e36baf72171..2ece1ba73230018dbacd6e12b1bb74f49b5605cf 100644 (file)
@@ -1,7 +1,6 @@
 /** \file RefineSwapLB.C
  *
- *  Written by Gengbin Zheng
- *  Updated by Abhinav Bhatele, 2010-12-09 to use ckgraph
+ *  Written by Harshitha Menon 
  *
  *  Status:
  *    -- Does not support pe_speed's currently
@@ -21,7 +20,8 @@
 using std::cout;
 using std::endl;
 
-CreateLBFunc_Def(RefineSwapLB, "always assign the heaviest obj onto lightest loaded processor.")
+CreateLBFunc_Def(RefineSwapLB,
+    "always assign the heaviest obj onto lightest loaded processor.")
 
 RefineSwapLB::RefineSwapLB(const CkLBOptions &opt): CentralLB(opt)
 {
@@ -73,16 +73,15 @@ inline void addObjToProc(ProcArray* parr, ObjGraph* ogr, std::vector<int>*
   pe_obj[pe_index].push_back(obj_index);
 
   // Update load
-  parr->procs[pe_index].setTotalLoad(parr->procs[pe_index].getTotalLoad() +
-      ogr->vertices[obj_index].getVertexLoad());
+  parr->procs[pe_index].totalLoad() += ogr->vertices[obj_index].getVertexLoad();
 }
 
 inline void removeObjFromProc(ProcArray* parr, ObjGraph* ogr, std::vector<int>*
     pe_obj, int pe_index, int arr_index) {
 
   // Update load
-  parr->procs[pe_index].setTotalLoad(parr->procs[pe_index].getTotalLoad() -
-      ogr->vertices[pe_obj[pe_index][arr_index]].getVertexLoad());
+  parr->procs[pe_index].totalLoad() -=
+      ogr->vertices[pe_obj[pe_index][arr_index]].getVertexLoad();
 
   // Remove from pe_obj
   pe_obj[pe_index].erase(pe_obj[pe_index].begin() + arr_index);
@@ -101,7 +100,6 @@ bool refine(ProcArray* parr, ObjGraph* ogr, std::vector<int>& max_pe_heap,
     std::vector<int>& min_pe_heap, std::vector<int>* pe_obj, int max_pe,
     double avg_load, double threshold) {
 
-  //cout << "Picked max pe " << max_pe << " (" << parr->procs[max_pe].getTotalLoad() << ")" << endl;
   int best_p, best_p_iter, arr_index;
   bool allocated = false;
   int pe_considered;
@@ -117,7 +115,8 @@ bool refine(ProcArray* parr, ObjGraph* ogr, std::vector<int>& max_pe_heap,
       obj_considered = pe_obj[max_pe][i];
       pe_considered = min_pe_heap[j];
    
-      if (parr->procs[pe_considered].getTotalLoad() + ogr->vertices[obj_considered].getVertexLoad() < (avg_load + threshold)) {
+      if (parr->procs[pe_considered].getTotalLoad() +
+          ogr->vertices[obj_considered].getVertexLoad() < (avg_load + threshold)) {
     //    if (ogr->vertices[obj_considered].getVertexLoad() > best_size) {
           best_size = ogr->vertices[obj_considered].getVertexLoad();
           best_p = pe_considered;
@@ -136,11 +135,6 @@ bool refine(ProcArray* parr, ObjGraph* ogr, std::vector<int>& max_pe_heap,
     addObjToProc(parr, ogr, pe_obj, best_p, best_obj);
     removeObjFromProc(parr, ogr, pe_obj, max_pe, arr_index);
 
-   // std::cout << " Moving obj " << best_obj << " (" <<
-   //   ogr->vertices[best_obj].getVertexLoad() << ") from " << max_pe << " to " <<
-   //   best_p << " New load " << max_pe << ":" << parr->procs[max_pe].getTotalLoad()
-   //   << " " << best_p << ":" << parr->procs[best_p].getTotalLoad()<< std::endl; 
-
     // Update the max heap and min list
     if (parr->procs[max_pe].getTotalLoad() > (avg_load + threshold)) {
       // Reinsert
@@ -252,7 +246,6 @@ void RefineSwapLB::work(LDStats* stats)
 
 
   /** ============================= STRATEGY ================================ */
-  //parr->resetTotalLoad();
 
   if (_lb_args.debug()>1) 
     CkPrintf("[%d] In RefineSwapLB strategy\n",CkMyPe());
@@ -271,7 +264,6 @@ void RefineSwapLB::work(LDStats* stats)
 
 
   // Create a datastructure to store the objects in a processor
-//  CkPrintf("Object load\n");
   for (int i = 0; i < ogr->vertices.size(); i++) {
     pe_obj[ogr->vertices[i].getCurrentPe()].push_back(i);
 //    CkPrintf("%d pe %d: %lf\n", i, ogr->vertices[i].getCurrentPe(), ogr->vertices[i].getVertexLoad());
@@ -310,22 +302,6 @@ void RefineSwapLB::work(LDStats* stats)
     }
   }
 
-  //std::cout << "Overloaded Processor load " << avg_load << endl;
-  for (int p_index = 0; p_index < max_pe_heap.size(); p_index++) {
-    //std::cout << max_pe_heap[p_index] << ": " << parr->procs[max_pe_heap[p_index]].getTotalLoad() << std::endl;
-  }
-
-  //std::cout << "Underloaded Processor load"<< endl;
-  for (int p_index = 0; p_index < min_pe_heap.size(); p_index++) {
-    //std::cout << min_pe_heap[p_index] << ": " << parr->procs[min_pe_heap[p_index]].getTotalLoad() << std::endl;
-  }
-
-
-  //std::cout << "Processor load"<< endl;
-  for (int i = 0; i < parr->procs.size(); i++) {
-    //CkPrintf("%d : %lf\n", i, parr->procs[i].getTotalLoad());
-  }
-
   /** ============================== CLEANUP ================================ */
   ogr->convertDecisions(stats);         // Send decisions back to LDStats
   delete[] pe_obj;
index a0d1d736ec25f3f6760ddac718d4d958fc64dc7d..f68f5f290a2023df87207122410a2440ef45800c 100644 (file)
@@ -150,6 +150,7 @@ void ScotchLB::work(LDStats *stats) {
   free(pemap);
   /** ============================== CLEANUP ================================ */
   ogr->convertDecisions(stats);
+  delete parr;
   delete ogr;
 }
 
index e762bdf8d2ae5702445aa71991bc8842e31d7e9a..de52407430f67ed93f3f07afa0701f769da9d6ce 100644 (file)
@@ -164,6 +164,7 @@ void ScotchRefineLB::work(LDStats *stats) {
   free(oldpemap);
   /** ============================== CLEANUP ================================ */
   ogr->convertDecisions(stats);
+  delete parr;
   delete ogr;
 }
 
index 81627b0e64e0572f59b0c78a4fb89cdf38988703..63217d182ca28a00046d5ba8b502b6cd5adad123 100644 (file)
@@ -15,6 +15,7 @@ CkMemCheckpoint.decl.h CkMemCheckpoint.def.h: ckmemcheckpoint.ci.stamp
 CkReduction.decl.h CkReduction.def.h: ckreduction.ci.stamp
 ComboCentLB.decl.h ComboCentLB.def.h: ComboCentLB.ci.stamp
 comlib.decl.h comlib.def.h: ComlibManager.ci.stamp
+CommAwareRefineLB.decl.h CommAwareRefineLB.def.h: CommAwareRefineLB.ci.stamp
 CommLB.decl.h CommLB.def.h: CommLB.ci.stamp
 CommonLBs.decl.h CommonLBs.def.h: CommonLBs.ci.stamp
 ControlPoints.decl.h ControlPoints.def.h: controlPoints.ci.stamp
index b9215c2cf6aa23856f44ba41901c3424bdd4e2be..031fa625ae48f2209831ba186d4de68bb6f72190 100644 (file)
@@ -1479,12 +1479,12 @@ EveryLB.o: EveryLB.C LBDatabase.h lbdb.h converse.h conv-config.h \
  NeighborCommLB.decl.h NborBaseLB.decl.h NeighborLBMsg.h \
  NeighborLB.decl.h OrbLB.decl.h PhasebyArrayLB.decl.h RandCentLB.decl.h \
  RecBipartLB.decl.h RefineLB.decl.h RefineCommLB.decl.h RotateLB.decl.h \
- TreeMatchLB.decl.h ComboCentLB.decl.h GraphPartLB.decl.h \
GraphBFTLB.decl.h GridCommLB.decl.h GridCommRefineLB.decl.h \
- GridHybridLB.decl.h GridHybridSeedLB.decl.h GridMetisLB.decl.h \
HbmLB.decl.h HybridLBMsg.h HybridLB.decl.h HybridBaseLB.decl.h \
RefineKLB.decl.h RefineTopoLB.decl.h TopoCentLB.decl.h TopoLB.decl.h \
- EveryLB.def.h
+ TreeMatchLB.decl.h RefineSwapLB.decl.h CommAwareRefineLB.decl.h \
ComboCentLB.decl.h GraphPartLB.decl.h GraphBFTLB.decl.h \
+ GridCommLB.decl.h GridCommRefineLB.decl.h GridHybridLB.decl.h \
GridHybridSeedLB.decl.h GridMetisLB.decl.h HbmLB.decl.h HybridLBMsg.h \
HybridLB.decl.h HybridBaseLB.decl.h RefineKLB.decl.h RefineTopoLB.decl.h \
TopoCentLB.decl.h TopoLB.decl.h EveryLB.def.h
        $(CHARMC) -c -I. EveryLB.C
 
 CommonLBs.o: CommonLBs.C LBDatabase.h lbdb.h converse.h conv-config.h \
@@ -1508,7 +1508,8 @@ CommonLBs.o: CommonLBs.C LBDatabase.h lbdb.h converse.h conv-config.h \
  NeighborCommLB.decl.h NborBaseLB.decl.h NeighborLBMsg.h \
  NeighborLB.decl.h OrbLB.decl.h PhasebyArrayLB.decl.h RandCentLB.decl.h \
  RecBipartLB.decl.h RefineLB.decl.h RefineCommLB.decl.h RotateLB.decl.h \
- TreeMatchLB.decl.h CommonLBs.def.h
+ TreeMatchLB.decl.h RefineSwapLB.decl.h CommAwareRefineLB.decl.h \
+ CommonLBs.def.h
        $(CHARMC) -c -I. CommonLBs.C
 
 BlockLB.o: BlockLB.C BlockLB.decl.h charm++.h charm.h converse.h \
@@ -1822,6 +1823,45 @@ TreeMatchLB.o: TreeMatchLB.C charm++.h charm.h converse.h conv-config.h \
  ckgraph.h TreeMatchLB.def.h
        $(CHARMC) -c -I. TreeMatchLB.C
 
+RefineSwapLB.o: RefineSwapLB.C RefineSwapLB.h CentralLB.h BaseLB.h \
+ LBDatabase.h lbdb.h converse.h conv-config.h conv-autoconfig.h \
+ conv-common.h conv-mach.h conv-mach-opt.h pup_c.h queueing.h conv-cpm.h \
+ conv-cpath.h conv-qd.h conv-random.h conv-lists.h conv-trace.h \
+ persistent.h debug-conv.h charm.h pup.h middle.h middle-conv.h \
+ LBDBManager.h cklists.h LBObj.h LBOM.h LBComm.h LBMachineUtil.h lbdb++.h \
+ LBDatabase.decl.h charm++.h ckbitvector.h ckstream.h init.h \
+ ckhashtable.h debug-charm.h debug-conv++.h simd.h CkMarshall.decl.h \
+ ckarrayindex.h cksection.h ckcallback.h conv-ccs.h sockRoutines.h \
+ ccs-server.h ckobjQ.h ckreduction.h CkReduction.decl.h \
+ CkArrayReductionMgr.decl.h ckmemcheckpoint.h CkMemCheckpoint.decl.h \
+ readonly.h ckarray.h cklocation.h CkLocation.decl.h CkArray.decl.h \
+ ckfutures.h CkFutures.decl.h charisma.h charisma.decl.h tempo.h \
+ tempo.decl.h waitqd.h waitqd.decl.h sdag.h ckcheckpoint.h \
+ CkCheckpoint.decl.h ckevacuation.h ckarrayreductionmgr.h trace.h \
+ trace-bluegene.h envelope.h NullLB.decl.h BaseLB.decl.h CentralLB.decl.h \
+ CentralLBMsg.h RefineSwapLB.decl.h ckgraph.h RefineSwapLB.def.h
+       $(CHARMC) -c -I. RefineSwapLB.C
+
+CommAwareRefineLB.o: CommAwareRefineLB.C CommAwareRefineLB.h CentralLB.h \
+ BaseLB.h LBDatabase.h lbdb.h converse.h conv-config.h conv-autoconfig.h \
+ conv-common.h conv-mach.h conv-mach-opt.h pup_c.h queueing.h conv-cpm.h \
+ conv-cpath.h conv-qd.h conv-random.h conv-lists.h conv-trace.h \
+ persistent.h debug-conv.h charm.h pup.h middle.h middle-conv.h \
+ LBDBManager.h cklists.h LBObj.h LBOM.h LBComm.h LBMachineUtil.h lbdb++.h \
+ LBDatabase.decl.h charm++.h ckbitvector.h ckstream.h init.h \
+ ckhashtable.h debug-charm.h debug-conv++.h simd.h CkMarshall.decl.h \
+ ckarrayindex.h cksection.h ckcallback.h conv-ccs.h sockRoutines.h \
+ ccs-server.h ckobjQ.h ckreduction.h CkReduction.decl.h \
+ CkArrayReductionMgr.decl.h ckmemcheckpoint.h CkMemCheckpoint.decl.h \
+ readonly.h ckarray.h cklocation.h CkLocation.decl.h CkArray.decl.h \
+ ckfutures.h CkFutures.decl.h charisma.h charisma.decl.h tempo.h \
+ tempo.decl.h waitqd.h waitqd.decl.h sdag.h ckcheckpoint.h \
+ CkCheckpoint.decl.h ckevacuation.h ckarrayreductionmgr.h trace.h \
+ trace-bluegene.h envelope.h NullLB.decl.h BaseLB.decl.h CentralLB.decl.h \
+ CentralLBMsg.h CommAwareRefineLB.decl.h ckgraph.h \
+ CommAwareRefineLB.def.h
+       $(CHARMC) -c -I. CommAwareRefineLB.C
+
 ComboCentLB.o: ComboCentLB.C ComboCentLB.h CentralLB.h BaseLB.h \
  LBDatabase.h lbdb.h converse.h conv-config.h conv-autoconfig.h \
  conv-common.h conv-mach.h conv-mach-opt.h pup_c.h queueing.h conv-cpm.h \