9675ec3dc5febcc42bc922d9991d400db0d3b21c
[charm.git] / src / ck-ldb / GreedyAgentLB.C
1 /*****************************************************************************
2  * $Source$
3  * $Author$
4  * $Date$
5  *****************************************************************************/
6
7 /**
8  * \addtogroup CkLdb
9 */
10 /*@{*/
11
12 /*
13  status:
14   * support processor avail bitvector
15   * support nonmigratable attrib
16 */
17
18 #include <charm++.h>
19 #include <LBSimulation.h>
20
21 #include "cklists.h"
22 #include "GreedyAgentLB.h"
23
24 #define LOAD_OFFSET 0.05
25
26 CreateLBFunc_Def(GreedyAgentLB,"always assign the heaviest obj onto lightest loaded processor taking into account the topology")
27
28 /*static void lbinit(void) {
29   LBRegisterBalancer("GreedyAgentLB", 
30                      CreateGreedyAgentLB, 
31                      AllocateGreedyAgentLB, 
32                      "always assign the heaviest obj onto lightest loaded processor.");
33 }
34 */
35 #include "GreedyAgentLB.def.h"
36
37 GreedyAgentLB::GreedyAgentLB(const CkLBOptions &opt): CentralLB(opt)
38 {
39   lbname = "GreedyAgentLB";
40   if (CkMyPe()==0)
41     CkPrintf("[%d] GreedyAgentLB created\n",CkMyPe());
42 }
43
44 CmiBool GreedyAgentLB::QueryBalanceNow(int _step)
45 {
46   //  CkPrintf("[%d] Balancing on step %d\n",CkMyPe(),_step);
47   return CmiTrue;
48 }
49
50 CmiBool  GreedyAgentLB::Compare(double x, double y, HeapCmp cmp)
51 {
52   const int test =  ((cmp == GT) ? (x > y) : (x < y));
53
54   if (test) return CmiTrue; 
55   else return CmiFalse;
56 }
57
58
59 void GreedyAgentLB::Heapify(HeapData *heap, int node, int heapSize, HeapCmp cmp)
60 {
61   int left = 2*node+1;
62   int right = 2*node+2;
63   int xchange;
64
65   //heap[left].load > heap[node].load)
66   if (left <= heapSize &&  Compare(heap[left].load, heap[node].load, cmp))
67     xchange = left;
68   else xchange = node;
69   //heap[right].load > heap[xchange].load) 
70   if (right <= heapSize && Compare(heap[right].load, heap[xchange].load, cmp))
71     xchange = right;
72
73   if (xchange != node) {
74     HeapData obj;
75     obj = heap[node];
76     heap[node] = heap[xchange];
77     heap[xchange] = obj;
78     Heapify(heap, xchange, heapSize, cmp);
79   }    
80 }
81
82 void GreedyAgentLB::BuildHeap(HeapData *data, int heapSize, HeapCmp cmp)
83 {
84         int i;
85         for(i=heapSize/2; i >= 0; i--)
86                 Heapify(data, i, heapSize, cmp);
87 }
88
89 void GreedyAgentLB::HeapSort(HeapData *data, int heapSize, HeapCmp cmp)
90 {
91         int i;
92         HeapData key;
93
94         int origSize = heapSize;
95         BuildHeap(data, heapSize, cmp);
96         for (i=heapSize; i > 0; i--) {
97                key = data[0];
98                data[0] = data[i];
99                data[i] = key;
100                heapSize--;
101                Heapify(data, 0, heapSize, cmp);
102         }
103         // after HeapSort, the data are in reverse order
104         for (i=0; i<(origSize+1)/2; i++) {
105           key = data[i];
106           data[i] = data[origSize-i];
107           data[origSize-i] = key;
108         }
109 }
110
111 GreedyAgentLB::HeapData* 
112 GreedyAgentLB::BuildObjectArray(CentralLB::LDStats* stats, 
113                              int count, int *objCount)
114 {
115   HeapData *objData;
116   int obj;
117
118 //for (obj = 0; obj < stats[pe].n_objs; obj++)
119 //if (stats[pe].objData[obj].migratable == CmiTrue) (*objCount)++; 
120
121   objData  = new HeapData[stats->n_objs];
122   *objCount = 0; 
123   for(obj=0; obj < stats->n_objs; obj++) {
124     LDObjData &oData = stats->objData[obj];
125     int pe = stats->from_proc[obj];
126     if (!oData.migratable) {
127       if (!stats->procs[pe].available) 
128         CmiAbort("GreedyAgentLB cannot handle nonmigratable object on an unavial processor!\n");
129       continue;
130     }
131     objData[*objCount].load = oData.wallTime * stats->procs[pe].pe_speed;
132     objData[*objCount].pe = pe;
133     objData[*objCount].id = obj;
134     (*objCount)++;
135   }
136   
137   HeapSort(objData, *objCount-1, GT);
138 /*
139 for (int i=0; i<*objCount; i++)
140   CmiPrintf("%f ", objData[i].load);
141 CmiPrintf("\n");
142 */
143   return objData;
144 }
145
146 GreedyAgentLB::HeapData* 
147 GreedyAgentLB::BuildCpuArray(CentralLB::LDStats* stats, 
148                           int count, int *peCount)
149 {
150   int pe;
151
152   *peCount = 0;
153   for (pe = 0; pe < count; pe++)
154     if (stats->procs[pe].available) (*peCount)++;
155   HeapData *data = new HeapData[*peCount];
156   int *map = new int[count];
157   
158   *peCount = 0;
159   for (pe=0; pe < count; pe++) {
160     CentralLB::ProcStats &peData = stats->procs[pe];
161  
162     data[*peCount].load = 0.0;
163     map[pe] = -1;
164     if (peData.available) 
165     {
166       data[*peCount].load += peData.bg_walltime;
167       data[*peCount].pe = data[*peCount].id = pe;
168       map[pe] = *peCount;
169       (*peCount)++;
170     }
171   }
172
173   // take non migratbale object load as background load
174   for (int obj = 0; obj < stats->n_objs; obj++) 
175   { 
176       LDObjData &oData = stats->objData[obj];
177       if (!oData.migratable)  {
178         int pe = stats->from_proc[obj];
179         pe = map[pe];
180         if (pe==-1) 
181           CmiAbort("GreedyAgentLB: nonmigratable object on an unavail processor!\n");
182         data[pe].load += oData.wallTime;
183       }
184   }
185
186   // considering cpu speed
187   for (pe = 0; pe<*peCount; pe++)
188     data[pe].load *= stats->procs[data[pe].pe].pe_speed;
189
190   BuildHeap(data, *peCount-1, LT);     // minHeap
191   delete [] map;
192   return data;
193 }
194
195 void GreedyAgentLB::work(CentralLB::LDStats* stats, int count)
196 {
197   int  i, obj, heapSize, objCount;
198   int *pemap = new int [count];
199   HeapData *cpuData = BuildCpuArray(stats, count, &heapSize);
200   HeapData *objData = BuildObjectArray(stats, count, &objCount);
201         
202         int max_neighbors=0;
203  
204         //int simprocs = LBSimulation::simProcs;
205         //CkPrintf("\nnum of procs:%d\n",simprocs);
206         
207
208         CkPrintf("num procs in stats:%d\n",count);
209         topologyAgent = new TopologyAgent(stats,count);
210
211         max_neighbors = topologyAgent->topo->max_neighbors();
212         
213   if (_lb_args.debug()) CkPrintf("In GreedyAgentLB strategy\n",CkMyPe());
214
215   heapSize--;
216         
217         HeapData *minCpu = new HeapData[count];
218         double minLoad = 0.0;
219         double loadThreshold = 0.0;
220         int *trialpes = new int[count+1];
221         int *trialmap = new int[count];
222         int *existing_map = new int[objCount];
223         Agent::Elem *preferList;
224         
225         for(i=0;i<objCount;i++)
226                 existing_map[i]=-1;
227
228         int extractIndex=0;
229
230         //stats->makeCommHash();
231
232         CkPrintf("before assigning objects...objcount:%d\n",objCount);
233   for (obj=0; obj < objCount; obj++) {
234     //HeapData minCpu;  
235     // Operation of extracting the the least loaded processor
236     // from the heap
237     //int extractIndex=0;
238                 
239                         CkPrintf("obj count:%d\n",obj);
240                 for(i=0;i<=count;i++)
241                         trialpes[i]=-1;
242
243                 if(extractIndex==0)
244                         minLoad = cpuData[0].load;
245                 else
246                         minLoad = minCpu[0].load;
247
248                                 //if(minLoad < 0.0)
249                 //      loadThreshold = minLoad*(1-LOAD_OFFSET);
250                 //else
251                 loadThreshold = minLoad*(1+LOAD_OFFSET);
252                 
253                 //CkPrintf("minload :%lf , threshold:%lf , heapSize:%d\n",minLoad,loadThreshold,heapSize);
254                 //We can do better by extracting from the heap only the incremental load nodes
255                 //after we have assigned the preferred node in the previous step
256                 //....as the others are still with us..
257         
258                 //CkPrintf("heapsize before :%d\n",heapSize);
259                 /*CkPrintf("heap stats...\n");
260                 for(int t=0;t<=heapSize;t++)
261                         CkPrintf("..pe:%d,load:%f..",cpuData[t].pe,cpuData[t].load);
262                 */
263                 while(1){
264                         if(cpuData[0].load > loadThreshold)
265                                 break;
266                         minCpu[extractIndex]=cpuData[0];
267                         extractIndex++;
268         cpuData[0]=cpuData[heapSize];
269         heapSize--;
270                         if(heapSize==-1)
271                                 break;
272         Heapify(cpuData, 0, heapSize, LT);    
273                 }
274                 //CkPrintf("after extracting loop....extractindex:%d,heapsize:%d\n",extractIndex,heapSize);
275                 //CkPrintf("trialpes...\n");
276                 int trialLen = 0;
277                 if(obj!=0){
278                         trialLen = max_neighbors*max_neighbors;
279                         if(trialLen > extractIndex)
280                                 trialLen = extractIndex;
281                 }
282                 else
283                         trialLen = extractIndex;
284                         
285                 for(i=0;i<trialLen;i++){
286                         trialpes[i]=minCpu[i].pe;
287                         trialmap[minCpu[i].pe]=i;
288                 }
289                 preferList = topologyAgent->my_preferred_procs(existing_map,objData[obj].id,trialpes,1);
290     // Increment the time of the least loaded processor by the cpuTime of
291     // the `heaviest' object
292                 // Assign the object to first processor in the preferList...we may change this
293                 // and assign by comparing the object load with topology comm cost
294                 int minIndex = trialmap[preferList[0].pe];
295                 /*int s=0;
296                 for(s=0;s<trialLen;s++)
297                         if(minCpu[s].pe == preferList[0].pe){
298                                 minIndex = s;
299                                 break;
300                         }
301                 */
302                 //CkPrintf("first element of prefer list...%d,%d...\n",minIndex,minCpu[minIndex].pe);
303                 
304                 //if(s==extractIndex)
305                         //CmiAbort("Seems as if Agent has returned corrupt value");
306
307                 const int dest = minCpu[minIndex].pe;
308                 const int id   = objData[obj].id;
309
310                 //CkPrintf("chk before load updation\n");
311     minCpu[minIndex].load += objData[obj].load;
312                 //CkPrintf("chk within updation.\n");
313                 existing_map[id]=minCpu[minIndex].pe;
314                 
315     //Insert object into migration queue if necessary
316     //const int dest = minCpu[minIndex].pe;
317     const int pe   = objData[obj].pe;
318     //const int id   = objData[obj].id;
319     if (dest != pe) {
320       stats->to_proc[id] = dest;
321       if (_lb_args.debug()>1) 
322         CkPrintf("[%d] Obj %d migrating from %d to %d\n", CkMyPe(),obj,pe,dest);
323     }
324
325     //Insert all the extracted processors (one with load updated) back into the heap
326     /*int cnt=0;
327                 while(cnt<extractIndex){
328                         heapSize++;
329         int location = heapSize;
330         while (location>0 && cpuData[(location-1)/2].load > minCpu[cnt].load) {
331         cpuData[location] = cpuData[(location-1)/2];
332         location = (location-1)/2;
333         }
334         cpuData[location] = minCpu[cnt];
335                         cnt++;
336                 }*/
337                 
338                 heapSize++;
339     extractIndex--;
340                 int location = heapSize;
341     while (location>0 && cpuData[(location-1)/2].load > minCpu[minIndex].load) {
342         cpuData[location] = cpuData[(location-1)/2];
343         location = (location-1)/2;
344     }
345     cpuData[location] = minCpu[minIndex];
346
347                 for(int r=minIndex;r<extractIndex;r++)
348                         minCpu[r] = minCpu[r+1];
349         }
350
351   delete [] cpuData;
352   delete [] objData;
353         delete [] minCpu;
354 }
355
356
357
358 /*@}*/
359
360
361
362