Incorporating communication aware refinement vs scratch strategy
authorHarshitha <gplkrsh2@illinois.edu>
Mon, 2 Apr 2012 17:34:44 +0000 (12:34 -0500)
committerHarshitha <gplkrsh2@illinois.edu>
Mon, 2 Apr 2012 17:34:44 +0000 (12:34 -0500)
src/ck-ldb/AdaptiveLB.C
src/ck-ldb/BaseLB.h
src/ck-ldb/CentralLB.h
src/ck-ldb/LBDatabase.C
src/ck-ldb/LBDatabase.h

index ab85ed909ea9819fdcb0112f592d7558043d6fab..ea92b2f610da85d6db888e739db24dc96cb987ca 100644 (file)
@@ -84,24 +84,42 @@ void AdaptiveLB::work(LDStats* stats)
   CkPrintf("AdaptiveLB> Total Bytes %ld\n", totalBytes);
   CkPrintf("AdaptiveLB> Total Comm Overhead %E Total Load %E\n", commOverhead, totalLoad);
 
+  double tmp;
   double refine_max_avg_ratio, lb_max_avg_ratio;
-  int lb_type;
-  GetPrevLBData(lb_type, lb_max_avg_ratio);
-  GetLBDataForLB(1, refine_max_avg_ratio);
   double greedy_max_avg_ratio;
-  GetLBDataForLB(0, greedy_max_avg_ratio);
+  int lb_type;
+  double comm_ratio, comm_refine_ratio;
+
+  GetPrevLBData(lb_type, lb_max_avg_ratio, tmp);
+  GetLBDataForLB(1, refine_max_avg_ratio, tmp);
+  GetLBDataForLB(0, greedy_max_avg_ratio, tmp);
+  GetLBDataForLB(2, tmp, comm_ratio);
+  GetLBDataForLB(3, tmp, comm_refine_ratio);
 
   CkPrintf("AdaptiveLB> Previous LB %d\n", lb_type);
 
-  metisLB->work(stats);
-  return;
   // Choose the right LB
   //
   // If communication overhead is 10% computation, then choose Scotch LB
   if (isComm || (commOverhead > (totalLoad * percent_overhead / 100))) {
-    metisLB->work(stats);
-    lb_type = 2;
-    CkPrintf("---METIS LB\n");
+    if(lb_type == -1) {
+      lb_type = 2;
+      metisLB->work(stats);
+      CkPrintf("---METIS LB\n");
+    } else if (comm_refine_ratio <= 1.01) {
+      lb_type = 3;
+      //commRefineLB->work(stats);
+      CkPrintf("---CommAwareRefineLB\n");
+    } else if (comm_ratio <= 1.01) {
+      lb_type = 2;
+      metisLB->work(stats);
+      CkPrintf("---METIS LB\n");
+    } else {
+      lb_type = 3;
+      //commRefineLB->work(stats);
+      CkPrintf("---CommAwareRefineLB\n");
+    }
+
   } else {
     if (lb_type == -1) {
       lb_type = 0;
@@ -121,7 +139,8 @@ void AdaptiveLB::work(LDStats* stats)
       CkPrintf("---REFINE LB\n");
     }
   }
-  UpdateLBDBWithData(lb_type, stats->after_lb_max, stats->after_lb_avg);
+  UpdateLBDBWithData(lb_type, stats->after_lb_max, stats->after_lb_avg,
+      stats->local_comm, stats->remote_comm);
 
   delete parr;
   delete ogr;
index 08159134fad53e74be3225eef29239ba0c1864a8..e2bc817fea7f961bf76aa00f50f4c0e0add0e883 100644 (file)
@@ -105,6 +105,8 @@ public:
     int is_prev_lb_refine;
     double after_lb_max;
     double after_lb_avg;
+    double local_comm;
+    double remote_comm;
 
     LDStats(int c=0, int complete_flag=1);
     /// the functions below should be used to obtain the number of processors
index ec7ced4f1c481b22bf7db38edb8f2532f87af0aa..232b2c181df73f2f4c356141f239012b3d2bbd21 100644 (file)
@@ -240,16 +240,20 @@ protected:
   void removeNonMigratable(LDStats* statsDataList, int count);
 
   virtual void UpdateLBDBWithData(int is_prev_lb_refine, double lb_max,
-      double lb_avg) {
-    theLbdb->UpdateAfterLBData(is_prev_lb_refine, lb_max, lb_avg);
+      double lb_avg, double local_comm, double remote_comm) {
+    theLbdb->UpdateAfterLBData(is_prev_lb_refine, lb_max, lb_avg, local_comm,
+        remote_comm);
   }
 
-  virtual void GetPrevLBData(int& is_prev_lb_refine, double& lb_max_avg_ratio) {
-    theLbdb->GetPrevLBData(is_prev_lb_refine, lb_max_avg_ratio);
+  virtual void GetPrevLBData(int& is_prev_lb_refine, double& lb_max_avg_ratio,
+      double& local_remote_comm_ratio) {
+    theLbdb->GetPrevLBData(is_prev_lb_refine, lb_max_avg_ratio,
+        local_remote_comm_ratio);
   }
 
-  virtual void GetLBDataForLB(int prev_lb, double& lb_max_avg_ratio) {
-    theLbdb->GetLBDataForLB(prev_lb, lb_max_avg_ratio);
+  virtual void GetLBDataForLB(int prev_lb, double& lb_max_avg_ratio, double&
+      local_remote_comm_ratio) {
+    theLbdb->GetLBDataForLB(prev_lb, lb_max_avg_ratio, local_remote_comm_ratio);
   }
 
 
index 4f6c1fec1fcadf93baec1270293d7d096bf1a71c..0db4e75d6fcfbde3b40989bf31edc3a2773db063 100644 (file)
@@ -35,6 +35,7 @@ struct AdaptiveLBDatabase {
 
 struct AdaptiveLBInfo {
   double max_avg_ratio;
+  double local_remote_ratio;
 };
 
 struct AdaptiveLBStructure {
@@ -54,7 +55,8 @@ struct AdaptiveLBStructure {
   int last_lb_type;
   AdaptiveLBInfo greedy_info;
   AdaptiveLBInfo refine_info;
-  AdaptiveLBInfo metis_info;
+  AdaptiveLBInfo comm_info;
+  AdaptiveLBInfo comm_refine_info;
 } adaptive_struct;
 
 
@@ -680,8 +682,8 @@ void LBDatabase::ReceiveMinStats(CkReductionMsg *msg) {
   // step immediately after load balancing, carry out load balancing
   //if (max/avg >= 1.1 && adaptive_lbdb.history_data.size() > 4) {
   int tmp1;
-  double tmp2;
-  GetPrevLBData(tmp1, tmp2);
+  double tmp2, tmp3;
+  GetPrevLBData(tmp1, tmp2, tmp3);
   double tolerate_imb = IMB_TOLERANCE * tmp2;
 
   if ((max_idle_load_ratio >= IDLE_LOAD_TOLERANCE || max/avg >= tolerate_imb) && adaptive_lbdb.history_data.size() > 4) {
@@ -765,8 +767,8 @@ bool LBDatabase::generatePlan(int& period) {
   // average. Hence we pass 1, else pass in some other value which would be the
   // new max_load after load balancing.
   int tmp1;
-  double tmp2;
-  GetPrevLBData(tmp1, tmp2);
+  double tmp2, tmp3;
+  GetPrevLBData(tmp1, tmp2, tmp3);
   
   double tolerate_imb = tmp2;
   if (max/avg < tolerate_imb) {
@@ -979,30 +981,40 @@ void LBDatabase::SetStrategyCost(double lb_strategy_cost) {
   adaptive_struct.lb_strategy_cost = lb_strategy_cost;
 }
 
-void LBDatabase::UpdateAfterLBData(int lb, double lb_max, double lb_avg) {
+void LBDatabase::UpdateAfterLBData(int lb, double lb_max, double lb_avg, double
+    local_comm, double remote_comm) {
   adaptive_struct.last_lb_type = lb;
   if (lb == 0) {
     adaptive_struct.greedy_info.max_avg_ratio = lb_max/lb_avg;
   } else if (lb == 1) {
     adaptive_struct.refine_info.max_avg_ratio = lb_max/lb_avg;
+  } else if (lb == 2) {
+    adaptive_struct.comm_info.local_remote_ratio = local_comm/remote_comm;
+  } else if (lb == 3) {
+    adaptive_struct.comm_refine_info.local_remote_ratio =
+    local_comm/remote_comm;
   }
 }
 
-void LBDatabase::GetPrevLBData(int& lb_type, double& lb_max_avg_ratio) {
+void LBDatabase::GetPrevLBData(int& lb_type, double& lb_max_avg_ratio, double&
+    local_remote_comm_ratio) {
   lb_type = adaptive_struct.last_lb_type;
   lb_max_avg_ratio = 1;
-  if (lb_type == 0) {
-    lb_max_avg_ratio = adaptive_struct.greedy_info.max_avg_ratio;
-  } else if (lb_type == 1) {
-    lb_max_avg_ratio = adaptive_struct.refine_info.max_avg_ratio;
-  }
+  local_remote_comm_ratio = 1;
+  GetLBDataForLB(lb_type, lb_max_avg_ratio, local_remote_comm_ratio);
 }
 
-void LBDatabase::GetLBDataForLB(int lb_type, double& lb_max_avg_ratio) {
+void LBDatabase::GetLBDataForLB(int lb_type, double& lb_max_avg_ratio, double&
+    local_remote_comm_ratio) {
   if (lb_type == 0) {
     lb_max_avg_ratio = adaptive_struct.greedy_info.max_avg_ratio;
   } else if (lb_type == 1) {
     lb_max_avg_ratio = adaptive_struct.refine_info.max_avg_ratio;
+  } else if (lb_type == 2) {
+    local_remote_comm_ratio = adaptive_struct.comm_info.local_remote_ratio;
+  } else if (lb_type == 3) {
+    local_remote_comm_ratio =
+       adaptive_struct.comm_refine_info.local_remote_ratio;
   }
 }
 
index a37583a1c279e8be5c65f77e1c7262ca7319ee69..9c7f048fd6a64086c5a9a3fdcc3322170c9bbe43 100644 (file)
@@ -370,9 +370,12 @@ public:
 
   bool isStrategyComm();
 
-  void UpdateAfterLBData(int is_lb_refine, double lb_max, double lb_avg);
-  void GetPrevLBData(int& lb_type, double& lb_max_avg_ratio);
-  void GetLBDataForLB(int lb_type, double& lb_max_avg_ratio);
+  void UpdateAfterLBData(int is_lb_refine, double lb_max, double lb_avg, double
+      local_comm, double remote_comm);
+  void GetPrevLBData(int& lb_type, double& lb_max_avg_ratio, double&
+      local_remote_comm_ratio);
+  void GetLBDataForLB(int lb_type, double& lb_max_avg_ratio, double&
+      local_remote_comm_ratio);
 
   void SetMigrationCost(double lb_migration_cost);
   void SetStrategyCost(double lb_strategy_cost);