doc: Add serial to list of ci file reserved words
[charm.git] / src / ck-ldb / TopoCentLB.C
index ede47556828a3fa1a5feb465041399def69ae381..59210fdfccbe2d7b03f0d752522d14910e57e114 100644 (file)
@@ -8,12 +8,7 @@
 
 #include <math.h>
 #include <stdlib.h>
-#include "charm++.h"
-#include "cklists.h"
-#include "CentralLB.h"
-
 #include "TopoCentLB.decl.h"
-
 #include "TopoCentLB.h"
 
 #define alpha PER_MESSAGE_SEND_OVERHEAD_DEFAULT  /*Startup time per message, seconds*/
@@ -24,7 +19,7 @@
 //#define RAND_COMM
 #define make_mapping 0
 
-CreateLBFunc_Def(TopoCentLB,"Balance objects based on the network topology");
+CreateLBFunc_Def(TopoCentLB,"Balance objects based on the network topology")
 
 
 /*static void lbinit (void)
@@ -51,11 +46,11 @@ CmiBool TopoCentLB::QueryBalanceNow (int _step)
 }
 
 TopoCentLB::~TopoCentLB(){
-       if(partgraph)   delete partgraph;
        if(topo) delete topo;
 }
 
-
+/*This routine partitions the task graph minimizing the communication and balancing the object load on all partitions*/
+/*It uses METIS library to accomplish that*/
 void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newmap)
 {
        
@@ -74,6 +69,7 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
     origmap[i] = 0;
   }
 
+  //Prepare compute loads for METIS library
   for (i=0; i<stats->n_objs; i++) {
     LDObjData &odata = stats->objData[i];
     if (!odata.migratable) 
@@ -100,8 +96,6 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
                        totalwt+=objwt[i];
   }
        
-       //CkPrintf("\nmax obj wt :%d \n totalwt before :%d \n\n",maxobj,totalwt);
-       
   int **comm = new int*[numobjs];
   for (i=0; i<numobjs; i++) {
     comm[i] = new int[numobjs];
@@ -110,6 +104,7 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
     }
   }
 
+  //Prepare communication for METIS library
   const int csz = stats->n_comm;
   for(i=0; i<csz; i++) {
       LDCommData &cdata = stats->commData[i];
@@ -120,12 +115,8 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
        int recverID = stats->getHash(cdata.receiver.get_destObj());
        CmiAssert(senderID < numobjs);
        CmiAssert(recverID < numobjs);
-       //comm[senderID][recverID] += (int)(cdata.messages*alpha + cdata.bytes*beta);
-       //comm[recverID][senderID] += (int)(cdata.messages*alpha + cdata.bytes*beta);
-               //CkPrintf("in compute partitions...%d\n",comm[senderID][recverID]);
                                comm[senderID][recverID] += cdata.messages;
        comm[recverID][senderID] += cdata.messages;
-
                                //Use bytes or messages -- do i include messages for objlist too...??
                        }
                        else if (cdata.receiver.get_type() == LD_OBJLIST_MSG) {
@@ -172,18 +163,12 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
     xadj[i+1] = count4all;
   }
 
-  /*if (_lb_args.debug() >= 2) {
-  CkPrintf("Pre-LDB Statistics step %d\n", step());
-  printStats(count, numobjs, objtime, comm, origmap);
-  }*/
-
+  //Call METIS routine
   int wgtflag = 3; // Weights both on vertices and edges
   int numflag = 0; // C Style numbering
   int options[5];
   int edgecut;
-  int sameMapFlag = 1;
-
-       options[0] = 0;
+  options[0] = 0;
 
   if (count < 1) {
     CkPrintf("error: Number of Pe less than 1!");
@@ -191,10 +176,8 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
   else if (count == 1) {
        for(m=0;m<numobjs;m++) 
                        newmap[i] = origmap[i];
-       sameMapFlag = 1;
   }
   else {
-       sameMapFlag = 0;
                /*
        if (count > 8)
                        METIS_PartGraphKway(&numobjs, xadj, adjncy, objwt, edgewt, 
@@ -205,18 +188,28 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
                                 &wgtflag, &numflag, &count, options, 
                                 &edgecut, newmap);
                */
-   //CkPrintf("before calling metis.\n");
                METIS_PartGraphRecursive(&numobjs, xadj, adjncy, objwt, edgewt,
                                  &wgtflag, &numflag, &count, options,
                                  &edgecut, newmap);
-        //CkPrintf("after calling Metis function.\n");
   }
         
-  /*if (_lb_args.debug() >= 2) {
-       CkPrintf("Post-LDB Statistics step %d\n", step());
-       printStats(count, numobjs, objtime, comm, newmap);
-  }*/
+  //Debugging code: Checking load on each partition
+  if(_lb_args.debug() >=2){
+         int total=0;
+         int *chkwt = new int[count];
+         for(i=0;i<count;i++)
+                 chkwt[i]=0;
+         for(i=0;i<numobjs;i++){
+               chkwt[newmap[i]] += objwt[i];
+                 total += objwt[i];
+         }
+         for(i=0;i<count;i++)
+                 CkPrintf("%d -- %d\n",i,chkwt[i]);
+         CkPrintf("Totalwt of all partitions after call to METIS:%d, Avg is %d\n",total,total/count);
+  }
 
+  //Clean up all the variables allocated in this routine
   for(i=0;i<numobjs;i++)
     delete[] comm[i];
   delete[] comm;
@@ -226,38 +219,6 @@ void TopoCentLB::computePartitions(CentralLB::LDStats *stats,int count,int *newm
   delete[] objwt;
   delete[] edgewt;
        delete[] handles;
-
-       //CkPrintf("chking wts on each partition...\n");
-
-       /*int avg=0;
-       int *chkwt = new int[count];
-       for(i=0;i<count;i++)
-               chkwt[i]=0;
-       //totalwt=0;
-       for(i=0;i<numobjs;i++){
-               chkwt[newmap[i]] += objwt[i];
-               avg += objwt[i];
-               
-       }
-       
-       
-       for(i=0;i<count;i++)
-               CkPrintf("%d -- %d\n",i,chkwt[i]);
-       
-       //CkPrintf("totalwt after:%d avg is %d\n",avg,avg/count);
-*/
-  /*if(!sameMapFlag) {
-    for(i=0; i<numobjs; i++) {
-      if(origmap[i] != newmap[i]) {
-                               CmiAssert(stats->from_proc[i] == origmap[i]);
-                               ///Dont assign....wait........
-                               stats->to_proc[i] =  newmap[i];
-                               if (_lb_args.debug() >= 3)
-          CkPrintf("[%d] Obj %d migrating from %d to %d\n", CkMyPe(),i,stats->from_proc[i],stats->to_proc[i]);
-      }
-    }
-  }*/
-
   delete[] origmap;
 
 }
@@ -289,12 +250,11 @@ void TopoCentLB::Heapify(HeapNode *heap, int node, int heapSize)
   int right = 2*node+2;
   int xchange;
        
-  //heap[left].load > heap[node].load)
   if (left < heapSize && (heap[left].key > heap[node].key))
     xchange = left;
   else 
                xchange = node;
-  //heap[right].load > heap[xchange].load) 
+  
   if (right < heapSize && (heap[right].key > heap[xchange].key))
     xchange = right;
 
@@ -329,10 +289,8 @@ void TopoCentLB::BuildHeap(HeapNode *heap,int heapSize){
 }
 
 void TopoCentLB :: increaseKey(HeapNode *heap,int i,double wt){
-       int val;
        if(wt != -1.00){
                #ifdef MAX_EDGE
-                       //CkPrintf("me here...\n");
                        if(wt>heap[i].key)
                                heap[i].key = wt;
                #else
@@ -353,398 +311,378 @@ void TopoCentLB :: increaseKey(HeapNode *heap,int i,double wt){
        }
 }
 
-void TopoCentLB :: calculateMST(PartGraph *partgraph,LBTopology *topo,int *proc_mapping,int max_comm_part){
+/*This routine implements the algorithm used to produce the partition-processor mapping*/
+/*The algorithm uses an idea similar to the standard MST algorithm*/
+void TopoCentLB :: calculateMST(PartGraph *partgraph,LBTopology *topo,int *proc_mapping,int max_comm_part) {
 
-       //Edge **markedEdges;
-       //int *parent;
-       int *inHeap;
-       double *keys;
-       int count = partgraph->n_nodes;
-       int i=0,j=0;
-       
-       //parent = new int[partgraph->n_nodes];
-       inHeap = new int[partgraph->n_nodes];
-       keys = new double[partgraph->n_nodes];
-
-       //int **hopCount;
-       //int **topoGraph;
-       //int *topoDegree;
-       int *assigned_procs = new int[count];
-
-       hopCount = new double*[count];
-       for(i=0;i<count;i++){
-               proc_mapping[i]=-1;
-               assigned_procs[i]=0;
-               hopCount[i] = new double[count];
-               for(j=0;j<count;j++)
-                       hopCount[i][j] = 0;
-       }
+  int *inHeap;
+  double *keys;
+  int count = partgraph->n_nodes;
+  int i=0,j=0;
+
+  //Arrays needed for keeping information
+  inHeap = new int[partgraph->n_nodes];
+  keys = new double[partgraph->n_nodes];
+
+  int *assigned_procs = new int[count];
+
+  hopCount = new double*[count];
+  for(i=0;i<count;i++){
+    proc_mapping[i]=-1;
+    assigned_procs[i]=0;
+    hopCount[i] = new double[count];
+    for(j=0;j<count;j++)
+      hopCount[i][j] = 0;
+  }
 
-       //CkPrintf("getting hopcount\n");
-       //double start=CmiWallTimer();
-       topo->get_pairwise_hop_count(hopCount);
-       //CkPrintf("after getting hopcount:%lf\n",(CmiWallTimer()-start));
-       int max_neighbors = topo->max_neighbors();
+  //Call a topology routine to fill up hopCount
+  topo->get_pairwise_hop_count(hopCount);
        
-       HeapNode *heap = new HeapNode[partgraph->n_nodes];
-       heapMapping = new int[partgraph->n_nodes];
+  int max_neighbors = topo->max_neighbors();
        
-       int heapSize = 0;
-
-       for(i=0;i<partgraph->n_nodes;i++){
-               heap[i].key = 0.00;
-               heap[i].node = i;
-               keys[i] = 0.00;
-               //parent[i] = -1;
-               inHeap[i] = 1;
-               heapMapping[i]=i;
-               /*if(i==0){ //Take 0 as root
-                       heap[0].key = 1.00;
-                       keys[0] = 0;
-               }*/
-       }
+  HeapNode *heap = new HeapNode[partgraph->n_nodes];
+  heapMapping = new int[partgraph->n_nodes];
+       
+  int heapSize = 0;
+
+  for(i=0;i<partgraph->n_nodes;i++){
+    heap[i].key = 0.00;
+    heap[i].node = i;
+    keys[i] = 0.00;
+    inHeap[i] = 1;
+    heapMapping[i]=i;
+  }
 
-       //Assign the max comm partition first
-       heap[max_comm_part].key = 1.00;
+  //Assign the max comm partition first
+  heap[max_comm_part].key = 1.00;
        
-       heapSize = partgraph->n_nodes;
-       BuildHeap(heap,heapSize);
+  heapSize = partgraph->n_nodes;
+  BuildHeap(heap,heapSize);
 
-       int k=0,comm_cnt=0,m=0,n=0;
-       int *commParts = new int[partgraph->n_nodes];
+  int k=0,comm_cnt=0,m=0;
+  int *commParts = new int[partgraph->n_nodes];
        
-       //srand(count);
-
-       while(heapSize > 0){
-               HeapNode max = extractMax(heap,&heapSize);
-               inHeap[max.node] = 0;
-
-               //for(j=0;j<heapSize;j++)
-                       //heapMapping[heap[j].node]=j;
-
-               for(i=0;i<partgraph->n_nodes;i++){
-                       commParts[i]=-1;
-                       PartGraph::Edge wt = partgraph->edges[max.node][i];
-                       if(wt == 0)
-                               continue;
-                       if(inHeap[i]){
-                               //parent[i] = max.node;
-                       #ifdef MAX_EDGE
-                               if(wt>keys[i])
-                                       keys[i]=wt;
-                       #else
-                               keys[i] += wt;
-                       #endif
-                               //Update in the heap too
-                               //First, find where this node is..in the heap
-                               /*for(j=0;j<heapSize;j++)
-                                       if(heap[j].node == i)
-                                               break;
-                               if(j==heapSize)
-                                       CmiAbort("Some error in heap...\n");*/
-                               increaseKey(heap,heapMapping[i],wt);
-                       }
-               }
+  //srand(count);
+
+  while(heapSize > 0){
+
+    /****Phase1: Extracting appropriate partition from heap****/
+
+    HeapNode max = extractMax(heap,&heapSize);
+    inHeap[max.node] = 0;
+
+    for(i=0;i<partgraph->n_nodes;i++){
+      commParts[i]=-1;
+      PartGraph::Edge wt = partgraph->edges[max.node][i];
+      if(wt == 0)
+       continue;
+      if(inHeap[i]){
+#ifdef MAX_EDGE
+       if(wt>keys[i])
+         keys[i]=wt;
+#else
+       keys[i] += wt;
+#endif
+        /*This part has been COMMENTED out for optimizing the code: we handle the updation using heapMapping*/
+        /*array instead of searching for node in the heap everytime*/
+
+       //Update in the heap too
+       //First, find where this node is..in the heap
+       /*for(j=0;j<heapSize;j++)
+         if(heap[j].node == i)
+         break;
+         if(j==heapSize)
+         CmiAbort("Some error in heap...\n");*/
+       increaseKey(heap,heapMapping[i],wt);
+      }
+    }
+                
+    /*Phase2: ASSIGNING partition to processor*/
                
-               if(heapSize == partgraph->n_nodes-1){ //For now, assign max comm partition to 0th proc in the topology
-                       proc_mapping[max.node]=0;
-                       assigned_procs[0]=1;
-                       continue;
-               }
+    //Special case
+    if(heapSize == partgraph->n_nodes-1){ //Assign max comm partition to 0th proc in the topology
+      proc_mapping[max.node]=0;
+      assigned_procs[0]=1;
+      continue;
+    }
                
-               m=0;
-               n=0;
-               comm_cnt=0;
-
-               int pot_proc=-1;
-               double min_cost=-1;
-               int min_cost_index=-1;
-               double cost=0;
-               int p=0;
-               //int q=0;
-
-               for(k=0;k<partgraph->n_nodes;k++){
-                       if(!inHeap[k] && partgraph->edges[k][max.node]){
-                               commParts[comm_cnt]=k;
-                               comm_cnt++;
-                       }
-               }
+    m=0;
 
-               for(m=0;m<count;m++){
-                       if(!assigned_procs[m]){
-                               cost=0;
-                               for(p=0;p<comm_cnt;p++){
-                                       //if(!hopCount[proc_mapping[commParts[p]]][m])
-                                               //hopCount[proc_mapping[commParts[p]]][m]=topo->get_hop_count(proc_mapping[commParts[p]],m);
-                                       cost += hopCount[proc_mapping[commParts[p]]][m]*partgraph->edges[commParts[p]][max.node];
-                                       //CkPrintf("hop count:%d\n",hopCount[proc_mapping[commParts[p]]][pot_proc]);
-                               }
-                               if(min_cost==-1 || cost<min_cost){
-                                       min_cost=cost;
-                                       min_cost_index=m;
-                               }
-                       }
-               }
+    comm_cnt=0;
+
+    double min_cost=-1;
+    int min_cost_index=-1;
+    double cost=0;
+    int p=0;
+    //int q=0;
 
-               proc_mapping[max.node]=min_cost_index;
-               assigned_procs[min_cost_index]=1;
+    for(k=0;k<partgraph->n_nodes;k++){
+      if(!inHeap[k] && partgraph->edges[k][max.node]){
+       commParts[comm_cnt]=k;
+       comm_cnt++;
+      }
+    }
+
+    //Optimized the loop by commenting out the get_hop_count code and getting all the hop counts initially
+    for(m=0;m<count;m++){
+      if(!assigned_procs[m]){
+       cost=0;
+       for(p=0;p<comm_cnt;p++){
+         //if(!hopCount[proc_mapping[commParts[p]]][m])
+         //hopCount[proc_mapping[commParts[p]]][m]=topo->get_hop_count(proc_mapping[commParts[p]],m);
+         cost += hopCount[proc_mapping[commParts[p]]][m]*partgraph->edges[commParts[p]][max.node];
+       }
+       if(min_cost==-1 || cost<min_cost){
+         min_cost=cost;
+         min_cost_index=m;
        }
+      }
+    }
+
+    proc_mapping[max.node]=min_cost_index;
+    assigned_procs[min_cost_index]=1;
+  }
 
-       //clear up memory
-       delete[] inHeap;
-       delete[] keys;
-       delete[] assigned_procs;
-       delete[] heap;
-       delete[] commParts;
+  //clear up memory
+  delete[] inHeap;
+  delete[] keys;
+  delete[] assigned_procs;
+  delete[] heap;
+  delete[] commParts;
 }
 
 
-void TopoCentLB :: work(CentralLB::LDStats *stats,int count)
+void TopoCentLB :: work(LDStats *stats)
 {
   int proc;
-  int obj;
-  int dest;
   int i,j;
-       LDObjData *odata;
+  int n_pes = stats->nprocs();
        
-       //if (_lb_args.debug() >= 2) {
+  if (_lb_args.debug() >= 2) {
     CkPrintf("In TopoCentLB Strategy...\n");
-  //}
-       //double start = CmiWallTimer();
+  }
+  
   // Make sure that there is at least one available processor.
-  for (proc = 0; proc < count; proc++) {
+  for (proc = 0; proc < n_pes; proc++) {
     if (stats->procs[proc].available) {
       break;
     }
   }
-       if (proc == count) {
+
+  if (proc == n_pes) {
     CmiAbort ("TopoCentLB: no available processors!");
   }
 
-  removeNonMigratable(stats,count);
-
-       int *newmap = new int[stats->n_objs];
+  
+  removeNonMigratable(stats, n_pes);
+  int *newmap = new int[stats->n_objs];
 
-       //CkPrintf("before computing partitions...\n");
 
-       if(make_mapping)
-               computePartitions(stats,count,newmap);
-       else{
-               //mapping taken from previous algo
-               for(i=0;i<stats->n_objs;i++){
+  if(make_mapping)
+    computePartitions(stats, n_pes, newmap);
+  else {
+    //mapping taken from previous algo
+    for(i=0;i<stats->n_objs;i++) {
       newmap[i]=stats->from_proc[i];
     }
-       }
-       
-       /*CkPrintf("obj-proc mapping\n");
-       for(i=0;i<stats->n_objs;i++)
-               CkPrintf(" %d,%d ",i,newmap[i]);
-       */
-       int max_objs = findMaxObjs(newmap,stats->n_objs,count);
-       
-       //Check to see if edgecut contains the reqd. no. of edges
-       
-       partgraph = new PartGraph(count,max_objs);
+  }
 
-       //Fill up the partition graph - first fill the nodes and then, the edges
+  //Debugging Code
+  if(_lb_args.debug() >=2){
+    CkPrintf("Map obtained from partitioning:\n");
+    for(i=0;i<stats->n_objs;i++)
+      CkPrintf(" %d,%d ",i,newmap[i]);
+  }
 
+  int max_objs = findMaxObjs(newmap,stats->n_objs, n_pes);
+       
+  partgraph = new PartGraph(n_pes, max_objs);
 
-       for(i=0;i<stats->n_objs;i++)
-       {
-               PartGraph::Node* n = &partgraph->nodes[newmap[i]];
-               n->obj_list[n->num_objs]=i;
-               n->num_objs++;
-       }
+  //Fill up the partition graph - first fill the nodes and then, the edges
 
-       int *addedComm=new int[count];
+  for(i=0;i<stats->n_objs;i++)
+    {
+      PartGraph::Node* n = &partgraph->nodes[newmap[i]];
+      n->obj_list[n->num_objs]=i;
+      n->num_objs++;
+    }
 
-       int max_comm_part=-1;
-       
-       double max_comm=0;
+  int *addedComm=new int[n_pes];
+  
+  stats->makeCommHash();
+  
+  int max_comm_part=-1;
        
-       #ifdef RAND_COMM
-       for(i=0;i<count;i++){
-               for(j=i+1;j<count;j++){
-                       int val;
-                       if(rand()%5==0)
-                               val=0;
-                       else
-                               val= rand()%1000;
+  double max_comm=0;
+
+  //Try putting random amount of communication on the partition graph edges to see if things work fine
+  //This also checks the running time of the algorithm since number of edges is high than in a practical scenario
+#ifdef RAND_COMM
+  for(i = 0; i < n_pes; i++) {
+    for(j = i+1; j < n_pes; j++) {
+      int val;
+      if(rand()%5==0)
+       val=0;
+      else
+       val= rand()%1000;
                                
-                       partgraph->edges[i][j] = val;
-                       partgraph->edges[j][i] = val;
+      partgraph->edges[i][j] = val;
+      partgraph->edges[j][i] = val;
                        
-                       partgraph->nodes[i].comm += val;
-                       partgraph->nodes[j].comm += val;
+      partgraph->nodes[i].comm += val;
+      partgraph->nodes[j].comm += val;
                        
-                       if(partgraph->nodes[i].comm > max_comm){
-                               max_comm = partgraph->nodes[i].comm;
-                               max_comm_part = i;
-                       }
-                       if(partgraph->nodes[j].comm > max_comm){
-                               max_comm = partgraph->nodes[j].comm;
-                               max_comm_part = j;
-                       }
-               }
-       }
-       #else
-       for(i=0;i<stats->n_comm;i++)
-       {
-               //DO I consider other comm too....i.e. to or from a processor
-
-               LDCommData &cdata = stats->commData[i];
-               //if(cdata.from_proc() || cdata.receiver.get_type() != LD_OBJ_MSG)
-       //continue;
-               if(!cdata.from_proc() && cdata.receiver.get_type() == LD_OBJ_MSG){
+      if(partgraph->nodes[i].comm > max_comm){
+       max_comm = partgraph->nodes[i].comm;
+       max_comm_part = i;
+      }
+      if(partgraph->nodes[j].comm > max_comm){
+       max_comm = partgraph->nodes[j].comm;
+       max_comm_part = j;
+      }
+    }
+  }
+#else
+  //Adding communication to the partition graph edges
+  for(i=0;i<stats->n_comm;i++)
+    {
+      //DO I consider other comm too....i.e. to or from a processor
+      LDCommData &cdata = stats->commData[i];
+      if(!cdata.from_proc() && cdata.receiver.get_type() == LD_OBJ_MSG){
        int senderID = stats->getHash(cdata.sender);
        int recverID = stats->getHash(cdata.receiver.get_destObj());
-                       CmiAssert(senderID < stats->n_objs);
-               CmiAssert(recverID < stats->n_objs);
+       CmiAssert(senderID < stats->n_objs);
+       CmiAssert(recverID < stats->n_objs);
                
-                       if(newmap[senderID]==newmap[recverID])
-                               continue;
+       if(newmap[senderID]==newmap[recverID])
+         continue;
        
-                       if(partgraph->edges[newmap[senderID]][newmap[recverID]] == 0){
-                               partgraph->nodes[newmap[senderID]].degree++;
-                               partgraph->nodes[newmap[recverID]].degree++;
-                       }
+       if(partgraph->edges[newmap[senderID]][newmap[recverID]] == 0){
+         partgraph->nodes[newmap[senderID]].degree++;
+         partgraph->nodes[newmap[recverID]].degree++;
+       }
                
-                       partgraph->edges[newmap[senderID]][newmap[recverID]] += cdata.bytes;
-                       partgraph->edges[newmap[recverID]][newmap[senderID]] += cdata.bytes;
+       partgraph->edges[newmap[senderID]][newmap[recverID]] += cdata.bytes;
+       partgraph->edges[newmap[recverID]][newmap[senderID]] += cdata.bytes;
                        
-                       partgraph->nodes[newmap[senderID]].comm += cdata.bytes;
-                       partgraph->nodes[newmap[recverID]].comm += cdata.bytes;
-                       
-                       if(partgraph->nodes[newmap[senderID]].comm > max_comm){
-                               max_comm = partgraph->nodes[newmap[senderID]].comm;
-                               max_comm_part = newmap[senderID];
-                       }
-                       if(partgraph->nodes[newmap[recverID]].comm > max_comm){
-                               max_comm = partgraph->nodes[newmap[recverID]].comm;
-                               max_comm_part = newmap[recverID];
-                       }
-
-                       //partgraph->edges[newmap[recverID]][newmap[senderID]] += 1000*(cdata.messages*alpha + cdata.bytes*beta);
-                       //bytesComm[newmap[senderID]][newmap[recverID]] += cdata.bytes;
-                       //bytesComm[newmap[recverID]][newmap[senderID]] += cdata.bytes;
+       partgraph->nodes[newmap[senderID]].comm += cdata.bytes;
+       partgraph->nodes[newmap[recverID]].comm += cdata.bytes;
 
-               }
-               else if(cdata.receiver.get_type() == LD_OBJLIST_MSG) {
-       //CkPrintf("\nin obj list cal...\n\n");
-                       int nobjs;
+       //Keeping track of maximum communiacting partition
+       if(partgraph->nodes[newmap[senderID]].comm > max_comm){
+         max_comm = partgraph->nodes[newmap[senderID]].comm;
+         max_comm_part = newmap[senderID];
+       }
+       if(partgraph->nodes[newmap[recverID]].comm > max_comm){
+         max_comm = partgraph->nodes[newmap[recverID]].comm;
+         max_comm_part = newmap[recverID];
+       }
+      }
+      else if(cdata.receiver.get_type() == LD_OBJLIST_MSG) {
+       int nobjs;
        LDObjKey *objs = cdata.receiver.get_destObjs(nobjs);
-      int senderID = stats->getHash(cdata.sender);
-      //addedComm = new int[count];
-                       for(j=0;j<count;j++)
-                               addedComm[j]=0;
-                       //CkPrintf("in obj list loop...\n");
-                       for (j=0; j<nobjs; j++) {
-       int recverID = stats->getHash(objs[j]);
-        if((senderID == -1)||(recverID == -1))
-               if (_lb_args.migObjOnly()) continue;
-               else CkAbort("Error in search\n");
+       int senderID = stats->getHash(cdata.sender);
+       for(j = 0; j < n_pes; j++)
+         addedComm[j]=0;
+       for (j=0; j<nobjs; j++) {
+         int recverID = stats->getHash(objs[j]);
+         if((senderID == -1)||(recverID == -1))
+           if (_lb_args.migObjOnly()) continue;
+           else CkAbort("Error in search\n");
                                        
-                               if(newmap[senderID]==newmap[recverID])
-                                       continue;
+         if(newmap[senderID]==newmap[recverID])
+           continue;
        
-                               //CkPrintf("from %d to %d\n",newmap[senderID],newmap[recverID]);
-                               if(partgraph->edges[newmap[senderID]][newmap[recverID]] == 0){
-                                       //CkPrintf("sender:%d recver:%d\n",newmap[senderID],newmap[recverID]);
-                                       partgraph->nodes[newmap[senderID]].degree++;
-                                       partgraph->nodes[newmap[recverID]].degree++;
-                               }
-               
-                               if(!addedComm[newmap[recverID]]){
-                                       partgraph->edges[newmap[senderID]][newmap[recverID]] += cdata.bytes;
-                                       partgraph->edges[newmap[recverID]][newmap[senderID]] += cdata.bytes;
+         if(partgraph->edges[newmap[senderID]][newmap[recverID]] == 0){
+           partgraph->nodes[newmap[senderID]].degree++;
+           partgraph->nodes[newmap[recverID]].degree++;
+         }
+
+         //Communication added only once for a message sent to many objects on a single processor
+         if(!addedComm[newmap[recverID]]){
+           partgraph->edges[newmap[senderID]][newmap[recverID]] += cdata.bytes;
+           partgraph->edges[newmap[recverID]][newmap[senderID]] += cdata.bytes;
        
-                                       partgraph->nodes[newmap[senderID]].comm += cdata.bytes;
-                                       partgraph->nodes[newmap[recverID]].comm += cdata.bytes;
-
-                                       if(partgraph->nodes[newmap[senderID]].comm > max_comm){
-                                               max_comm = partgraph->nodes[newmap[senderID]].comm;
-                                               max_comm_part = newmap[senderID];
-                                       }
-                                       if(partgraph->nodes[newmap[recverID]].comm > max_comm){
-                                               max_comm = partgraph->nodes[newmap[recverID]].comm;
-                                               max_comm_part = newmap[recverID];
-                                       }
-
-                                       //bytesComm[newmap[senderID]][newmap[recverID]] += cdata.bytes;
-                                       //bytesComm[newmap[recverID]][newmap[senderID]] += cdata.bytes;
-                                       addedComm[newmap[recverID]]=1;
-                               }
+           partgraph->nodes[newmap[senderID]].comm += cdata.bytes;
+           partgraph->nodes[newmap[recverID]].comm += cdata.bytes;
+
+           if(partgraph->nodes[newmap[senderID]].comm > max_comm){
+             max_comm = partgraph->nodes[newmap[senderID]].comm;
+             max_comm_part = newmap[senderID];
+           }
+           if(partgraph->nodes[newmap[recverID]].comm > max_comm){
+             max_comm = partgraph->nodes[newmap[recverID]].comm;
+             max_comm_part = newmap[recverID];
+           }
+           //bytesComm[newmap[senderID]][newmap[recverID]] += cdata.bytes;
+           //bytesComm[newmap[recverID]][newmap[senderID]] += cdata.bytes;
+           addedComm[newmap[recverID]]=1;
+         }
+       }
       }
-                       //CkPrintf("outside loop..\n");
-       //delete [] addedComm;
-               }
 
-       }
-       #endif
+    }
+#endif
        
-       int *proc_mapping = new int[count];
+  int *proc_mapping = new int[n_pes];
        
-       delete [] addedComm;
+  delete [] addedComm;
                
-       LBtopoFn topofn;
-       int max_neighbors=0;
-       int *neigh;
-       int num_neigh=0;
-
-       char *lbcopy = strdup(_lbtopo);
-       char *ptr = strchr(lbcopy, ':');
-  if (ptr!=NULL){
-       ptr = strtok(lbcopy, ":");
-               //CkPrintf("token:%s\n",ptr);
-       }
-       else
-               ptr=lbcopy;
+  LBtopoFn topofn;
+
+  //Parsing the command line input for getting the processor topology
+  char *lbcopy = strdup(_lbtopo);
+  char *ptr = strchr(lbcopy, ':');
+  if (ptr!=NULL)
+    ptr = strtok(lbcopy, ":");
+  else
+    ptr=lbcopy;
 
-       topofn = LBTopoLookup(ptr);
+  topofn = LBTopoLookup(ptr);
   if (topofn == NULL) {
-       char str[1024];
+    char str[1024];
     CmiPrintf("TopoCentLB> Fatal error: Unknown topology: %s. Choose from:\n", ptr);
     printoutTopo();
     sprintf(str, "TopoCentLB> Fatal error: Unknown topology: %s", ptr);
     CmiAbort(str);
   }
   
-       topo = topofn(count);
+  topo = topofn(n_pes);
 
-       //double fourth = CmiWallTimer();
-       //CkPrintf("before calling algo...:%lf\n",(fourth-th));
-       calculateMST(partgraph,topo,proc_mapping,max_comm_part);
-       //CkPrintf("after calling algo...:%lf\n",(CmiWallTimer()-fourth));
-       //Returned partition graph is a Maximum Spanning Tree -- converted in above function itself
+  //Call the core routine to produce the partition processor mapping
+  calculateMST(partgraph,topo,proc_mapping,max_comm_part);
+  //Returned partition graph is a Maximum Spanning Tree -- converted in above function itself
 
-       //Construct the Topology Graph
-       
-       /*CkPrintf("part,proc mapping..\n");
-       for(i=0;i<count;i++)
-               CkPrintf("%d,%d len:%d\n",i,proc_mapping[i],(partgraph->nodes[i]).num_objs);
-               */
-       int pe;
-       PartGraph::Node* n;
-       for(i=0;i<count;i++){
-               pe = proc_mapping[i];
-               //CkPrintf("%d %d\n",i,pe);
-               n = &partgraph->nodes[i];
-               //CkPrintf("here..%d,first object:%d\n",n->num_objs,n->obj_list[0]);
-               for(j=0;j<n->num_objs;j++){
-                       stats->to_proc[n->obj_list[j]] = pe;
-                       if (_lb_args.debug()>1) 
+  //Debugging code: Result of mapping partition graph onto processor graph
+  if (_lb_args.debug()>1) {
+    CkPrintf("Resultant mapping..(partition,processor)\n");
+    for(i = 0; i < n_pes; i++)
+      CkPrintf("%d,%d\n",i,proc_mapping[i]);
+  }
+
+  //Store the result in the load balancing database
+  int pe;
+  PartGraph::Node* n;
+  for(i = 0; i < n_pes; i++){
+    pe = proc_mapping[i];
+    n = &partgraph->nodes[i];
+    for(j=0;j<n->num_objs;j++){
+      stats->to_proc[n->obj_list[j]] = pe;
+      if (_lb_args.debug()>1) 
         CkPrintf("[%d] Obj %d migrating from %d to %d\n", CkMyPe(),n->obj_list[j],stats->from_proc[n->obj_list[j]],pe);
-               }
-       }
+    }
+  }
 
-       delete[] newmap;
-       delete[] proc_mapping;
-       //Delete hopCount
-       for(i=0;i<count;i++)
-               delete[] hopCount[i];
+  delete[] newmap;
+  delete[] proc_mapping;
+  //Delete hopCount
+  for(i = 0; i < n_pes; i++)
+    delete[] hopCount[i];
 
-       delete[] hopCount;
-       delete[] heapMapping;
+  delete[] hopCount;
+  delete[] heapMapping;
+       
+  delete partgraph;
 }
 
 #include "TopoCentLB.def.h"