ck-ldb: the API for the virtual work function changed
[charm.git] / src / ck-ldb / GreedyAgentLB.C
1 /*****************************************************************************
2  * $Source$
3  * $Author$
4  * $Date$
5  *****************************************************************************/
6
7 /**
8  * \addtogroup CkLdb
9 */
10 /*@{*/
11
12 /*
13  status:
14   * support processor avail bitvector
15   * support nonmigratable attrib
16 */
17
18 #include <charm++.h>
19 #include <LBSimulation.h>
20
21 #include "cklists.h"
22 #include "GreedyAgentLB.h"
23
24 #define LOAD_OFFSET 0.05
25
26 CreateLBFunc_Def(GreedyAgentLB,"always assign the heaviest obj onto lightest loaded processor taking into account the topology")
27
28 /*static void lbinit(void) {
29   LBRegisterBalancer("GreedyAgentLB", 
30                      CreateGreedyAgentLB, 
31                      AllocateGreedyAgentLB, 
32                      "always assign the heaviest obj onto lightest loaded processor.");
33 }
34 */
35 #include "GreedyAgentLB.def.h"
36
37 GreedyAgentLB::GreedyAgentLB(const CkLBOptions &opt): CentralLB(opt)
38 {
39   lbname = "GreedyAgentLB";
40   if (CkMyPe()==0)
41     CkPrintf("[%d] GreedyAgentLB created\n",CkMyPe());
42 }
43
44 CmiBool GreedyAgentLB::QueryBalanceNow(int _step)
45 {
46   //  CkPrintf("[%d] Balancing on step %d\n",CkMyPe(),_step);
47   return CmiTrue;
48 }
49
50 CmiBool  GreedyAgentLB::Compare(double x, double y, HeapCmp cmp)
51 {
52   const int test =  ((cmp == GT) ? (x > y) : (x < y));
53
54   if (test) return CmiTrue; 
55   else return CmiFalse;
56 }
57
58
59 void GreedyAgentLB::Heapify(HeapData *heap, int node, int heapSize, HeapCmp cmp)
60 {
61   int left = 2*node+1;
62   int right = 2*node+2;
63   int xchange;
64
65   //heap[left].load > heap[node].load)
66   if (left <= heapSize &&  Compare(heap[left].load, heap[node].load, cmp))
67     xchange = left;
68   else xchange = node;
69   //heap[right].load > heap[xchange].load) 
70   if (right <= heapSize && Compare(heap[right].load, heap[xchange].load, cmp))
71     xchange = right;
72
73   if (xchange != node) {
74     HeapData obj;
75     obj = heap[node];
76     heap[node] = heap[xchange];
77     heap[xchange] = obj;
78     Heapify(heap, xchange, heapSize, cmp);
79   }    
80 }
81
82 void GreedyAgentLB::BuildHeap(HeapData *data, int heapSize, HeapCmp cmp)
83 {
84         int i;
85         for(i=heapSize/2; i >= 0; i--)
86                 Heapify(data, i, heapSize, cmp);
87 }
88
89 void GreedyAgentLB::HeapSort(HeapData *data, int heapSize, HeapCmp cmp)
90 {
91         int i;
92         HeapData key;
93
94         int origSize = heapSize;
95         BuildHeap(data, heapSize, cmp);
96         for (i=heapSize; i > 0; i--) {
97                key = data[0];
98                data[0] = data[i];
99                data[i] = key;
100                heapSize--;
101                Heapify(data, 0, heapSize, cmp);
102         }
103         // after HeapSort, the data are in reverse order
104         for (i=0; i<(origSize+1)/2; i++) {
105           key = data[i];
106           data[i] = data[origSize-i];
107           data[origSize-i] = key;
108         }
109 }
110
111 GreedyAgentLB::HeapData* 
112 GreedyAgentLB::BuildObjectArray(CentralLB::LDStats* stats, 
113                              int count, int *objCount)
114 {
115   HeapData *objData;
116   int obj;
117
118 //for (obj = 0; obj < stats[pe].n_objs; obj++)
119 //if (stats[pe].objData[obj].migratable == CmiTrue) (*objCount)++; 
120
121   objData  = new HeapData[stats->n_objs];
122   *objCount = 0; 
123   for(obj=0; obj < stats->n_objs; obj++) {
124     LDObjData &oData = stats->objData[obj];
125     int pe = stats->from_proc[obj];
126     if (!oData.migratable) {
127       if (!stats->procs[pe].available) 
128         CmiAbort("GreedyAgentLB cannot handle nonmigratable object on an unavial processor!\n");
129       continue;
130     }
131     objData[*objCount].load = oData.wallTime * stats->procs[pe].pe_speed;
132     objData[*objCount].pe = pe;
133     objData[*objCount].id = obj;
134     (*objCount)++;
135   }
136   
137   HeapSort(objData, *objCount-1, GT);
138 /*
139 for (int i=0; i<*objCount; i++)
140   CmiPrintf("%f ", objData[i].load);
141 CmiPrintf("\n");
142 */
143   return objData;
144 }
145
146 GreedyAgentLB::HeapData* 
147 GreedyAgentLB::BuildCpuArray(CentralLB::LDStats* stats, 
148                           int count, int *peCount)
149 {
150   int pe;
151
152   *peCount = 0;
153   for (pe = 0; pe < count; pe++)
154     if (stats->procs[pe].available) (*peCount)++;
155   HeapData *data = new HeapData[*peCount];
156   int *map = new int[count];
157   
158   *peCount = 0;
159   for (pe=0; pe < count; pe++) {
160     CentralLB::ProcStats &peData = stats->procs[pe];
161  
162     data[*peCount].load = 0.0;
163     map[pe] = -1;
164     if (peData.available) 
165     {
166       data[*peCount].load += peData.bg_walltime;
167       data[*peCount].pe = data[*peCount].id = pe;
168       map[pe] = *peCount;
169       (*peCount)++;
170     }
171   }
172
173   // take non migratbale object load as background load
174   for (int obj = 0; obj < stats->n_objs; obj++) 
175   { 
176       LDObjData &oData = stats->objData[obj];
177       if (!oData.migratable)  {
178         int pe = stats->from_proc[obj];
179         pe = map[pe];
180         if (pe==-1) 
181           CmiAbort("GreedyAgentLB: nonmigratable object on an unavail processor!\n");
182         data[pe].load += oData.wallTime;
183       }
184   }
185
186   // considering cpu speed
187   for (pe = 0; pe<*peCount; pe++)
188     data[pe].load *= stats->procs[data[pe].pe].pe_speed;
189
190   BuildHeap(data, *peCount-1, LT);     // minHeap
191   delete [] map;
192   return data;
193 }
194
195 void GreedyAgentLB::work(LDStats* stats)
196 {
197   int  i, obj, heapSize, objCount;
198   int n_pes = stats->count;
199
200   int *pemap = new int [n_pes];
201   HeapData *cpuData = BuildCpuArray(stats, n_pes, &heapSize);
202   HeapData *objData = BuildObjectArray(stats, n_pes, &objCount);
203         
204         int max_neighbors=0;
205  
206         //int simprocs = LBSimulation::simProcs;
207         //CkPrintf("\nnum of procs:%d\n",simprocs);
208         
209
210         CkPrintf("num procs in stats:%d\n", n_pes);
211         topologyAgent = new TopologyAgent(stats, n_pes);
212
213         max_neighbors = topologyAgent->topo->max_neighbors();
214         
215   if (_lb_args.debug()) CkPrintf("In GreedyAgentLB strategy\n",CkMyPe());
216
217   heapSize--;
218         
219         HeapData *minCpu = new HeapData[n_pes];
220         double minLoad = 0.0;
221         double loadThreshold = 0.0;
222         int *trialpes = new int[n_pes + 1];
223         int *trialmap = new int[n_pes];
224         int *existing_map = new int[objCount];
225         Agent::Elem *preferList;
226         
227         for(i=0;i<objCount;i++)
228                 existing_map[i]=-1;
229
230         int extractIndex=0;
231
232         //stats->makeCommHash();
233
234         CkPrintf("before assigning objects...objcount:%d\n",objCount);
235   for (obj=0; obj < objCount; obj++) {
236     //HeapData minCpu;  
237     // Operation of extracting the the least loaded processor
238     // from the heap
239     //int extractIndex=0;
240                 
241                         CkPrintf("obj count:%d\n",obj);
242                 for(i = 0; i <= n_pes; i++)
243                         trialpes[i]=-1;
244
245                 if(extractIndex==0)
246                         minLoad = cpuData[0].load;
247                 else
248                         minLoad = minCpu[0].load;
249
250                                 //if(minLoad < 0.0)
251                 //      loadThreshold = minLoad*(1-LOAD_OFFSET);
252                 //else
253                 loadThreshold = minLoad*(1+LOAD_OFFSET);
254                 
255                 //CkPrintf("minload :%lf , threshold:%lf , heapSize:%d\n",minLoad,loadThreshold,heapSize);
256                 //We can do better by extracting from the heap only the incremental load nodes
257                 //after we have assigned the preferred node in the previous step
258                 //....as the others are still with us..
259         
260                 //CkPrintf("heapsize before :%d\n",heapSize);
261                 /*CkPrintf("heap stats...\n");
262                 for(int t=0;t<=heapSize;t++)
263                         CkPrintf("..pe:%d,load:%f..",cpuData[t].pe,cpuData[t].load);
264                 */
265                 while(1){
266                         if(cpuData[0].load > loadThreshold)
267                                 break;
268                         minCpu[extractIndex]=cpuData[0];
269                         extractIndex++;
270         cpuData[0]=cpuData[heapSize];
271         heapSize--;
272                         if(heapSize==-1)
273                                 break;
274         Heapify(cpuData, 0, heapSize, LT);    
275                 }
276                 //CkPrintf("after extracting loop....extractindex:%d,heapsize:%d\n",extractIndex,heapSize);
277                 //CkPrintf("trialpes...\n");
278                 int trialLen = 0;
279                 if(obj!=0){
280                         trialLen = max_neighbors*max_neighbors;
281                         if(trialLen > extractIndex)
282                                 trialLen = extractIndex;
283                 }
284                 else
285                         trialLen = extractIndex;
286                         
287                 for(i=0;i<trialLen;i++){
288                         trialpes[i]=minCpu[i].pe;
289                         trialmap[minCpu[i].pe]=i;
290                 }
291                 preferList = topologyAgent->my_preferred_procs(existing_map,objData[obj].id,trialpes,1);
292     // Increment the time of the least loaded processor by the cpuTime of
293     // the `heaviest' object
294                 // Assign the object to first processor in the preferList...we may change this
295                 // and assign by comparing the object load with topology comm cost
296                 int minIndex = trialmap[preferList[0].pe];
297                 /*int s=0;
298                 for(s=0;s<trialLen;s++)
299                         if(minCpu[s].pe == preferList[0].pe){
300                                 minIndex = s;
301                                 break;
302                         }
303                 */
304                 //CkPrintf("first element of prefer list...%d,%d...\n",minIndex,minCpu[minIndex].pe);
305                 
306                 //if(s==extractIndex)
307                         //CmiAbort("Seems as if Agent has returned corrupt value");
308
309                 const int dest = minCpu[minIndex].pe;
310                 const int id   = objData[obj].id;
311
312                 //CkPrintf("chk before load updation\n");
313     minCpu[minIndex].load += objData[obj].load;
314                 //CkPrintf("chk within updation.\n");
315                 existing_map[id]=minCpu[minIndex].pe;
316                 
317     //Insert object into migration queue if necessary
318     //const int dest = minCpu[minIndex].pe;
319     const int pe   = objData[obj].pe;
320     //const int id   = objData[obj].id;
321     if (dest != pe) {
322       stats->to_proc[id] = dest;
323       if (_lb_args.debug()>1) 
324         CkPrintf("[%d] Obj %d migrating from %d to %d\n", CkMyPe(),obj,pe,dest);
325     }
326
327     //Insert all the extracted processors (one with load updated) back into the heap
328     /*int cnt=0;
329                 while(cnt<extractIndex){
330                         heapSize++;
331         int location = heapSize;
332         while (location>0 && cpuData[(location-1)/2].load > minCpu[cnt].load) {
333         cpuData[location] = cpuData[(location-1)/2];
334         location = (location-1)/2;
335         }
336         cpuData[location] = minCpu[cnt];
337                         cnt++;
338                 }*/
339                 
340                 heapSize++;
341     extractIndex--;
342                 int location = heapSize;
343     while (location>0 && cpuData[(location-1)/2].load > minCpu[minIndex].load) {
344         cpuData[location] = cpuData[(location-1)/2];
345         location = (location-1)/2;
346     }
347     cpuData[location] = minCpu[minIndex];
348
349                 for(int r=minIndex;r<extractIndex;r++)
350                         minCpu[r] = minCpu[r+1];
351         }
352
353   delete [] cpuData;
354   delete [] objData;
355         delete [] minCpu;
356 }
357
358
359
360 /*@}*/
361
362
363
364