Ignoring idle time if it isnt communication intensive
authorHarshitha <gplkrsh2@illinois.edu>
Fri, 13 Apr 2012 04:43:25 +0000 (23:43 -0500)
committerHarshitha <gplkrsh2@illinois.edu>
Fri, 13 Apr 2012 04:43:25 +0000 (23:43 -0500)
src/ck-ldb/BaseLB.C
src/ck-ldb/BaseLB.h
src/ck-ldb/CentralLB.C
src/ck-ldb/LBDatabase.C
src/ck-ldb/LBDatabase.h

index 162a9ff6f3bd0958d81bb5acc08e42330b372aa4..4fbed6d5a8652226836b6373ed9a5972723d22d1 100644 (file)
@@ -224,6 +224,77 @@ void BaseLB::LDStats::clearCommHash() {
   }
 }
 
+// Get the nonlocal communication
+void BaseLB::LDStats::computeComm(long &nmsgs, long long &nbytes)
+{
+#if CMK_LBDB_ON
+       nmsgs = 0;
+       nbytes = 0;
+
+       makeCommHash();
+
+       int mcast_count = 0;
+        for (int cidx=0; cidx < n_comm; cidx++) {
+           LDCommData& cdata = commData[cidx];
+           int senderPE, receiverPE;
+           if (cdata.from_proc())
+             senderPE = cdata.src_proc;
+           else {
+             int idx = getHash(cdata.sender);
+             if (idx == -1) continue;    // sender has just migrated?
+             senderPE = to_proc[idx];
+             CmiAssert(senderPE != -1);
+           }
+           CmiAssert(senderPE < nprocs() && senderPE >= 0);
+
+            // find receiver: point-to-point and multicast two cases
+           int receiver_type = cdata.receiver.get_type();
+           if (receiver_type == LD_PROC_MSG || receiver_type == LD_OBJ_MSG) {
+              if (receiver_type == LD_PROC_MSG)
+               receiverPE = cdata.receiver.proc();
+              else  {  // LD_OBJ_MSG
+               int idx = getHash(cdata.receiver.get_destObj());
+               if (idx == -1) {                // receiver outside this domain
+                 if (complete_flag) continue;
+                 else receiverPE = -1;
+               }
+               else {
+                 receiverPE = to_proc[idx];
+                  CmiAssert(receiverPE < nprocs() && receiverPE >= 0);
+               }
+              }
+             if(senderPE != receiverPE)
+             {
+               nmsgs += cdata.messages;
+               nbytes += cdata.bytes;
+             }
+           }
+            else if (receiver_type == LD_OBJLIST_MSG) {
+              int nobjs;
+              LDObjKey *objs = cdata.receiver.get_destObjs(nobjs);
+             mcast_count ++;
+             CkVec<int> pes;
+             for (int i=0; i<nobjs; i++) {
+               int idx = getHash(objs[i]);
+               CmiAssert(idx != -1);
+               if (idx == -1) continue;    // receiver has just been removed?
+               receiverPE = to_proc[idx];
+               CmiAssert(receiverPE < nprocs() && receiverPE >= 0);
+               int exist = 0;
+               for (int p=0; p<pes.size(); p++) 
+                 if (receiverPE == pes[p]) { exist=1; break; }
+               if (exist) continue;
+               pes.push_back(receiverPE);
+               if(senderPE != receiverPE)
+               {
+                 nmsgs += cdata.messages;
+                 nbytes += cdata.bytes;
+               }
+              }
+           }
+       }   // end of for
+#endif
+}
 void BaseLB::LDStats::computeNonlocalComm(int &nmsgs, int &nbytes)
 {
 #if CMK_LBDB_ON
index e2bc817fea7f961bf76aa00f50f4c0e0add0e883..f7261c31e15113ac067995bacfe1fa7902f08386 100644 (file)
@@ -137,6 +137,7 @@ public:
       for (int i=0; i<nprocs(); i++) procs[i].clearBgLoad();
     }
     void computeNonlocalComm(int &nmsgs, int &nbytes);
+    void computeComm(long &nmsgs, long long &nbytes);
     double computeAverageLoad();
     void normalize_speed();
     void print();
index d084d51448467b30e2a8141e5b701ce4c301967a..5801800fb9f627ab1fbceeda891ef559586ff7e1 100644 (file)
 //#include "limits.h"
 #include <vector>
 
+#define alpha 4.0e-6
+#define beta 2.67e-9
+#define percent_overhead 10
+
+
 #define  DEBUGF(x)       // CmiPrintf x;
 #define  DEBUG(x)        // x;
 #define  DEBAD(x)        // CmiPrintf x
@@ -1496,18 +1501,27 @@ LBMigrateMsg* CentralLB::Strategy(LDStats* stats)
   int clients = CkNumPes();
   LBInfo info(clients);
   getPredictedLoadWithMsg(stats, clients, msg, info, 0);
-  LBRealType mLoad, mCpuLoad, totalLoad;
+  LBRealType mLoad, mCpuLoad, totalLoad, totalLoadWComm;
   info.getSummary(mLoad, mCpuLoad, totalLoad);
   CkPrintf("CharmLB> Max load w/o comm %lf Max cpu load %lf Avg load %lf\n", mLoad, mCpuLoad, totalLoad/clients);
   theLbdb->UpdateAfterLBData(mLoad, mCpuLoad, totalLoad/clients);
+
   getPredictedLoadWithMsg(stats, clients, msg, info,1);
-  info.getSummary(mLoad, mCpuLoad, totalLoad);
+  info.getSummary(mLoad, mCpuLoad, totalLoadWComm);
   CkPrintf("CharmLB> Max load with comm %lf Max cpu load %lf Avg load %lf\n", mLoad, mCpuLoad, totalLoad/clients);
   int nmsgs, nbytes;
   stats->computeNonlocalComm(nmsgs, nbytes);
   CkPrintf("CharmLB> Non local communication %d msg and %d bytes\n", nmsgs, nbytes);
 
 
+  long msg_n;
+  long long bytes_n;
+  stats->computeComm(msg_n, bytes_n);
+  CkPrintf("CharmLB> Total communication %ld msg and %lld bytes\n", nmsgs, nbytes);
+
+  double alpha_beta_cost = (msg_n * alpha) + (bytes_n * beta);
+  theLbdb->UpdateAfterLBComm(alpha_beta_cost/totalLoad);
+
   if (_lb_args.debug()) {
     double strat_end_time = CkWallTimer();
     envelope *env = UsrToEnv(msg);
index 33d3335500e4733608b8fffaec21f235457a2e71..0bebe7b96ee3013ffa515871d6456bda808de4fa 100644 (file)
@@ -441,6 +441,7 @@ void LBDatabase::init(void)
   total_contrib_vec.resize(VEC_SIZE, 0.0);
   max_iteration = -1;
   prev_idle = 0.0;
+  alpha_beta_cost_to_load = 1.0; // Some random value. Fix me!
 
   // If metabalancer enabled, initialize the variables
   adaptive_struct.tentative_period =  INT_MAX;
@@ -714,6 +715,15 @@ void LBDatabase::ReceiveMinStats(CkReductionMsg *msg) {
     return;
   }
 
+  double idle_load_tolerance = IDLE_LOAD_TOLERANCE;
+  if (alpha_beta_cost_to_load < 0.1) {
+    // Ignore the effect hence increase tolerance
+    CkPrintf("Changing the idle load tolerance coz this isn't communication intensive benchmark\n");
+    idle_load_tolerance = 1024.0;
+  }
+
+
+
 //  if (adaptive_struct.lb_period_informed) {
 //    return;
 //  }
@@ -745,7 +755,7 @@ void LBDatabase::ReceiveMinStats(CkReductionMsg *msg) {
 
 
 
-    if ((max_idle_load_ratio >= IDLE_LOAD_TOLERANCE || max/avg >= tolerate_imb) && adaptive_lbdb.history_data.size() > 6) {
+    if ((max_idle_load_ratio >= idle_load_tolerance || max/avg >= tolerate_imb) && adaptive_lbdb.history_data.size() > 6) {
       CkPrintf("Carry out load balancing step at iter max/avg(%lf) and max_idle_load_ratio ratio (%lf)\n", max/avg, max_idle_load_ratio);
 
       // If the previously calculated_period (not the final decision) is greater
@@ -797,7 +807,7 @@ void LBDatabase::ReceiveMinStats(CkReductionMsg *msg) {
   CkPrintf("Prev LB Data Type %d, max/avg %lf, local/remote %lf\n", tmp1, tmp2, tmp3);
 
 
-  if ((max_idle_load_ratio >= IDLE_LOAD_TOLERANCE || max/avg >= tolerate_imb) && adaptive_lbdb.history_data.size() > 4) {
+  if ((max_idle_load_ratio >= idle_load_tolerance || max/avg >= tolerate_imb) && adaptive_lbdb.history_data.size() > 4) {
     CkPrintf("Carry out load balancing step at iter max/avg(%lf) and max_idle_load_ratio ratio (%lf)\n", max/avg, max_idle_load_ratio);
 //    if (!adaptive_struct.lb_period_informed) {
 //      // Just for testing
@@ -1191,6 +1201,10 @@ avg_load) {
   }
 }
 
+void LBDatabase::UpdateAfterLBComm(double alpha_beta_cost_to_load) {
+  alpha_beta_cost_to_load = alpha_beta_cost_to_load;
+}
+
 
 void LBDatabase::GetPrevLBData(int& lb_type, double& lb_max_avg_ratio, double&
     remote_local_comm_ratio) {
index 168dc8cfc1d07f590e3c861cd897cbd9d235cca9..dc0504ea86d842bdbb18e072e598f7b289e4aee1 100644 (file)
@@ -380,6 +380,7 @@ public:
       local_comm, double remote_comm);
 
   void UpdateAfterLBData(double max_load, double max_cpu, double avg_load);
+  void UpdateAfterLBComm(double alpha_beta_cost);
   void GetPrevLBData(int& lb_type, double& lb_max_avg_ratio, double&
       local_remote_comm_ratio);
   void GetLBDataForLB(int lb_type, double& lb_max_avg_ratio, double&
@@ -408,6 +409,7 @@ private:
   double after_lb_max;
   double after_lb_avg;
   double prev_idle;
+  double alpha_beta_cost_to_load;
   int is_prev_lb_refine;
 
 public: