391f267c3bc1fcd1c11d9639e0cb55b142bf5ca3
[charm.git] / src / conv-ldb / topology.C
1
2 /*****************************************************************************
3  * $Source$
4  * $Author$
5  * $Date$
6  * $Revision$
7  *****************************************************************************/
8
9 /**
10  * \addtogroup CkLdb
11 */
12 /*@{*/
13
14 #include <math.h>
15
16 //#include "LBDatabase.h"
17 #include "charm.h"
18 #include "cklists.h"
19 #include "topology.h"
20
21 extern "C" char *_lbtopo;                       /* topology name string */
22
23 int LBTopology::get_hop_count(int src,int dest)
24 {
25         int npe;
26         int *visited_srcs;
27
28         if(src==dest)
29                 return 0;
30         
31         npe = max_neighbors();
32         visited_srcs = new int[npes];
33         
34         int count = rec_hop_count(src,dest,npe,1,visited_srcs,999999);
35         delete [] visited_srcs;
36
37         return count;
38 }
39
40 int LBTopology::rec_hop_count(int src,int dest,int max_neigh,int count,int *visited_srcs,int min_hop_cnt)
41 {
42         int *pes = new int[max_neigh];
43         //int min_hop_cnt=999999;
44         int ret_val=0;
45         int skip_neigh=0;
46         int neigh_cnt=0;
47         int i;
48         
49         neighbors(src,pes,neigh_cnt);
50         
51         visited_srcs[count-1]=src;
52         
53         for(i=0;i<neigh_cnt;i++)
54         {
55                 if(pes[i]==dest)
56                         return count;
57         }
58         for(i=0;i<neigh_cnt;i++)
59         {
60                 for(int j=0;j<count;j++)
61                         if(visited_srcs[j]==pes[i])
62                         {
63                                 skip_neigh=1;
64                                 break;
65                         }
66                 if(!skip_neigh)
67                 {
68                         if(min_hop_cnt > count+1){
69                                 ret_val=rec_hop_count(pes[i],dest,max_neigh,count+1,visited_srcs,min_hop_cnt);
70                                 if(ret_val < min_hop_cnt)
71                                         min_hop_cnt = ret_val;
72                         }
73                 }
74                 else
75                         skip_neigh=0;
76         }
77         delete [] pes;
78         return min_hop_cnt;
79 }
80
81 double LBTopology::per_hop_delay(int last_hop)
82 {
83         if(!last_hop)
84                 return (HOP_LINK_DELAY + HOP_PROC_DELAY);
85         else
86                 return HOP_LINK_DELAY;
87 }
88
89 void LBTopology::get_pairwise_hop_count(double  **distance)
90 {
91   struct queueNode
92   {
93     int index;
94     int dist;
95     queueNode *next;
96     queueNode(int i,int d): index(i), dist(d), next(NULL) {}
97   };
98   
99   bool *visited=new bool[npes];
100   int *neigh=new int[max_neighbors()];
101   int num_neighbors;
102
103   for(int i=0;i<npes;i++)
104   {
105     //Init data structures for BFS from i-th proc
106     for(int j=0;j<npes;j++)
107       visited[j]=false;
108
109     queueNode *q=new queueNode(i,0);
110     queueNode *last=q;
111     distance[i][i]=0;
112     visited[i]=true;
113
114     // Perform BFS until queue is empty
115     while(q)
116     { 
117       neighbors(q->index,neigh,num_neighbors);
118       for(int j=0;j<num_neighbors;j++)
119       {
120         if(!visited[neigh[j]])
121         {
122           visited[neigh[j]]=true;
123           distance[i][neigh[j]]=q->dist+1;
124           queueNode *qnew=new queueNode(neigh[j],q->dist+1);
125           last->next=qnew;
126           last=last->next;
127         }
128       }
129       queueNode *qtemp=q;
130       q=q->next;
131       delete qtemp;
132     }
133   }
134   delete[] visited;
135   delete[] neigh;
136 }
137
138 //smp - assume 1,2,3 or 4 processors per node
139
140 template <int ppn>
141 class LBTopo_smp_n: public LBTopology {
142 public:
143   LBTopo_smp_n(int p): LBTopology(p) {}
144   virtual int max_neighbors() { return npes - 1; }
145
146   virtual void neighbors(int mype, int* _n, int &nb){
147                 CkPrintf("neighbors:Nothing in here..\n");
148         }
149         
150         int get_hop_count(int src,int dest){
151                 
152                 //CkPrintf("in smp get_hop_count\n");
153                 int a = src/ppn;
154                 int b = dest/ppn;
155                 
156                 if(a!=b){
157                         //CkPrintf("2 returned\n");
158                         return 2;
159                 }
160                 else{
161                         //CkPrintf("1 returned\n");
162                         return 1;
163                 }
164         }
165 };
166
167 typedef LBTopo_smp_n<1> LBTopo_smp_n_1;
168 typedef LBTopo_smp_n<2> LBTopo_smp_n_2;
169 typedef LBTopo_smp_n<3> LBTopo_smp_n_3;
170 typedef LBTopo_smp_n<4> LBTopo_smp_n_4;
171
172 LBTOPO_MACRO(LBTopo_smp_n_1)
173 LBTOPO_MACRO(LBTopo_smp_n_2)
174 LBTOPO_MACRO(LBTopo_smp_n_3)
175 LBTOPO_MACRO(LBTopo_smp_n_4)
176
177
178 // ring
179
180 LBTOPO_MACRO(LBTopo_ring)
181
182 int LBTopo_ring::max_neighbors()
183 {
184   if (npes > 2) return 2;
185   else return (npes-1);
186 }
187
188 void LBTopo_ring::neighbors(int mype, int* _n, int &nb)
189 {
190   nb = 0;
191   if (npes>1) _n[nb++] = (mype + npes -1) % npes;
192   if (npes>2) _n[nb++] = (mype + 1) % npes;
193 }
194
195 int LBTopo_ring::get_hop_count(int src,int dest){
196         
197         int dist=src-dest;
198         if(dist<0) dist=-dist;
199         
200         if((npes-dist) < dist)
201                 return (npes-dist);
202         else
203                 return dist;
204 }
205
206 //  TORUS 2D
207
208 LBTOPO_MACRO(LBTopo_torus2d)
209
210 LBTopo_torus2d::LBTopo_torus2d(int p): LBTopology(p) 
211 {
212   width = (int)sqrt(p*1.0);
213   if (width * width < npes) width++;
214 }
215
216 int LBTopo_torus2d::max_neighbors()
217 {
218   return 4;
219 }
220
221 int LBTopo_torus2d::goodcoor(int x, int y)
222 {
223   if (x<0 || x>=width) return -1;
224   if (y<0 || y>=width) return -1;
225   int next = x*width + y;
226   if (next<npes && next>=0) return next;
227   return -1;
228 }
229
230 static int checkuniq(int *arr, int nb, int val) {
231   for (int i=0;i<nb;i++) if (arr[i]==val) return 0;
232   return 1;
233 }
234
235 void LBTopo_torus2d::neighbors(int mype, int* _n, int &nb)
236 {
237   int next;
238   int x = mype/width;
239   int y = mype%width;
240   nb=0;
241   for (int i=-1; i<=1; i+=2) {
242     int x1 = x+i;
243     if (x1 == -1) {
244       x1 = width-1;
245       while (goodcoor(x1, y)==-1) x1--;
246     }
247     else if (goodcoor(x1, y) == -1) x1=0;
248     next = goodcoor(x1, y);
249     CmiAssert(next != -1);
250     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
251
252     int y1 = y+i;
253     if (y1 == -1) {
254       y1 = width-1;
255       while (goodcoor(x, y1)==-1) y1--;
256     }
257     else if (goodcoor(x, y1) == -1) y1=0;
258     next = goodcoor(x, y1);
259     CmiAssert(next != -1);
260     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
261   }
262 }
263
264 int LBTopo_torus2d::get_hop_count(int src,int dest){
265         int xpos_src,xpos_dest;
266         int ypos_src,ypos_dest;
267         int xdist=0;
268         int ydist=0;
269         
270         int xchange;
271         if(src > dest){
272                 xchange = src;
273                 src = dest;
274                 dest = xchange;
275         }
276         
277         xpos_src=src%width;
278         ypos_src=src/width;
279
280         xpos_dest=dest%width;
281         ypos_dest=dest/width;
282
283         xdist = xpos_dest-xpos_src;
284         if(xdist<0) xdist=-xdist;
285         if((width-xdist) < xdist)
286                 xdist = width-xdist;
287         
288         ydist = ypos_dest-ypos_src;
289         if(ydist<0) ydist=-ydist;
290
291         int lastpos=(npes-1)%width;
292         int otherylen=0;
293
294         if(xpos_src<=lastpos && xpos_dest<=lastpos)
295                 otherylen=((npes-1)/width)+1-ydist;
296         else{
297                 if(ypos_dest==((npes-1)/width))
298                         otherylen=((npes-1)/width)+1-ydist;
299                 else    
300                         otherylen=((npes-1)/width)-ydist;
301         }
302         
303         if(otherylen < ydist)
304                 ydist=otherylen;
305         
306         //added later
307         int sdist=0,adist=0,bdist=0,cdist=0,ddist=0;
308         
309         if(xpos_src>lastpos && xpos_dest>lastpos){
310                 sdist = xpos_src;
311                 if((width-sdist) < sdist)
312                 sdist = width-sdist;
313
314                 adist = ((npes-1)/width)-ypos_src;
315                 if(adist<0) adist=-adist;
316                 if(ypos_src+1 < adist)
317                         adist = ypos_src+1;
318         
319                 bdist = 1;
320
321                 cdist = ((npes-1)/width)-ypos_dest;
322                 if(cdist<0) cdist=-cdist;
323                 if(ypos_dest+1 < cdist)
324                         cdist = ypos_dest+1;
325
326                 ddist = xpos_dest-lastpos;
327                 if(ddist<0) ddist=-ddist;
328                 if((width-ddist) < ddist)
329                         ddist = width-ddist;
330         }
331         else{
332                 if(xpos_src>lastpos){
333                         xchange = src;
334                         src = dest;
335                         dest = xchange;
336                         xpos_src=src%width;
337                         ypos_src=src/width;
338                         xpos_dest=dest%width;
339                         ypos_dest=dest/width;
340                 }
341                 adist = ((npes-1)/width)-ypos_src;
342                 if(adist<0) adist=-adist;
343                 if(ypos_src+1 < adist)
344                         adist = ypos_src+1;
345         
346                 if(xpos_dest<=lastpos){
347                         bdist = xpos_dest-xpos_src;
348                         if(bdist<0) bdist=-bdist;
349                         if((lastpos+1-bdist) < bdist)
350                                 bdist = lastpos+1-bdist;
351
352                         cdist = ((npes-1)/width)-ypos_dest;
353                         if(cdist<0) cdist=-cdist;
354                         if(ypos_dest+1 < cdist)
355                                 cdist = ypos_dest+1;
356                 
357                         ddist=0;
358                 }
359                 else{
360                         bdist = lastpos-xpos_src;
361                         if(bdist<0) bdist=-bdist;
362                         if((xpos_src+1) < bdist)
363                                 bdist = xpos_src+1;
364
365                         cdist = ((npes-1)/width)-ypos_dest;
366                         if(cdist<0) cdist=-cdist;
367                         if(ypos_dest+1 < cdist)
368                                 cdist = ypos_dest+1;
369
370                         ddist = xpos_dest-lastpos;
371                         if(ddist<0) ddist=-ddist;
372                         if((width-ddist) < ddist)
373                                 ddist = width-ddist;
374                 }
375         }
376         
377         if((sdist+adist+bdist+cdist+ddist) < (xdist+ydist))
378                 return (sdist+adist+bdist+cdist+ddist);
379         else
380                 return (xdist+ydist);
381
382 }
383
384 //  TORUS 3D
385
386 LBTOPO_MACRO(LBTopo_torus3d)
387
388 LBTopo_torus3d::LBTopo_torus3d(int p): LBTopology(p) 
389 {
390   width = 1;
391   while ( (width+1) * (width+1) * (width+1) <= npes) width++;
392   if (width * width * width < npes) width++;
393 }
394
395 int LBTopo_torus3d::max_neighbors()
396 {
397   return 6;
398 }
399
400 int LBTopo_torus3d::goodcoor(int x, int y, int z)
401 {
402   if (x<0 || x>=width) return -1;
403   if (y<0 || y>=width) return -1;
404   if (z<0 || z>=width) return -1;
405   int next = x*width*width + y*width + z;
406   if (next<npes && next>=0) return next;
407   return -1;
408 }
409
410 void LBTopo_torus3d::neighbors(int mype, int* _n, int &nb)
411 {
412
413   int x = mype/(width*width);
414   int k = mype%(width*width);
415   int y = k/width;
416   int z = k%width;
417   int next;
418   nb=0;
419   for (int i=-1; i<=1; i+=2) {
420     int x1 = x+i;
421     if (x1 == -1) {
422       x1 = width-1;
423       while (goodcoor(x1, y, z)==-1) x1--;
424     }
425     else if (goodcoor(x1, y, z) == -1) x1=0;
426     next = goodcoor(x1, y, z);
427     CmiAssert(next != -1);
428     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
429
430     int y1 = y+i;
431     if (y1 == -1) {
432       y1 = width-1;
433       while (goodcoor(x, y1, z)==-1) y1--;
434     }
435     else if (goodcoor(x, y1, z) == -1) y1=0;
436     next = goodcoor(x, y1, z);
437     CmiAssert(next != -1);
438     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
439
440     int z1 = z+i;
441     if (z1 == -1) {
442       z1 = width-1;
443       while (goodcoor(x, y, z1)==-1) z1--;
444     }
445     else if (goodcoor(x, y, z1) == -1) z1=0;
446     next = goodcoor(x, y, z1);
447     CmiAssert(next != -1);
448     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
449   }
450 }
451
452 /*
453 // Works only for perfect cube number of processor topologies
454 int LBTopo_torus3d::get_hop_count(int src,int dest){
455         
456         int x_src = src/(width*width);
457   int k_src = src%(width*width);
458   int y_src = k_src/width;
459   int z_src = k_src%width;
460
461         int x_dest = dest/(width*width);
462   int k_dest = dest%(width*width);
463   int y_dest = k_dest/width;
464   int z_dest = k_dest%width;
465
466         int xdist=0,ydist=0,zdist=0;
467         
468         //CkPrintf("just a chk........\n");
469         xdist = x_dest-x_src;
470         if(xdist<0) xdist=-xdist;
471         if((width-xdist) < xdist)
472                 xdist = width-xdist;
473
474         ydist = y_dest-y_src;
475         if(ydist<0) ydist=-ydist;
476         if((width-ydist) < ydist)
477                 ydist = width-ydist;
478
479         zdist = z_dest-z_src;
480         if(zdist<0) zdist=-zdist;
481         if((width-zdist) < zdist)
482                 zdist = width-zdist;
483
484         return (xdist+ydist+zdist);
485
486 }
487 */
488
489 //Mesh3D
490 LBTOPO_MACRO(LBTopo_mesh3d)
491
492 LBTopo_mesh3d::LBTopo_mesh3d(int p): LBTopology(p) 
493 {
494   width = 1;
495   while ( (width+1) * (width+1) * (width+1) <= npes) width++;
496   if (width * width * width < npes) width++;
497 }
498
499 int LBTopo_mesh3d::max_neighbors()
500 {
501   return 6;
502 }
503
504 int LBTopo_mesh3d::goodcoor(int x, int y, int z)
505 {
506   if (x<0 || x>=width) return -1;
507   if (y<0 || y>=width) return -1;
508   if (z<0 || z>=width) return -1;
509   int next = z*width*width + y*width + x;
510   if (next<npes && next>=0) return next;
511   return -1;
512 }
513
514 void LBTopo_mesh3d::neighbors(int mype, int* _n, int &nb)
515 {
516
517   int z = mype/(width*width);
518   int k = mype%(width*width);
519   int y = k/width;
520   int x = k%width;
521   int next;
522   int isNeigh=1;
523   nb=0;
524   for (int i=-1; i<=1; i+=2) {
525     isNeigh=1;
526     int x1 = x+i;
527     if (x1 == -1) {
528       //x1 = width-1;
529       x1=x;
530       //while (goodcoor(x1, y, z)==-1) x1--;
531       isNeigh=0;
532     }
533     else if (goodcoor(x1, y, z) == -1) { x1=0; isNeigh=0; }
534     next = goodcoor(x1, y, z);
535     CmiAssert(next != -1);
536     if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
537
538     isNeigh=1;
539     int y1 = y+i;
540     if (y1 == -1) {
541       //y1 = width-1;
542       //while (goodcoor(x, y1, z)==-1) y1--;
543       y1=y;
544       isNeigh=0;
545     }
546     else if (goodcoor(x, y1, z) == -1) { y1=0; isNeigh=0; }
547     next = goodcoor(x, y1, z);
548     CmiAssert(next != -1);
549     if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
550
551     isNeigh=1;
552     int z1 = z+i;
553     if (z1 == -1) {
554       //z1 = width-1;
555       //while (goodcoor(x, y, z1)==-1) z1--;
556       z1=z;
557       isNeigh=0;
558     }
559     else if (goodcoor(x, y, z1) == -1) { z1=0; isNeigh=0; }
560     next = goodcoor(x, y, z1);
561     CmiAssert(next != -1);
562     if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
563   }
564 }
565
566 //  TORUS ND 
567 //  added by zshao1
568
569 template <int dimension>
570 class LBTopo_torus_nd: public LBTopology {
571 private:
572   // inherited int npes;
573   int* Cardinality;
574   int VirtualProcessorCount;
575   int* TempCo;
576 private:
577   int GetNeighborID(int ProcessorID, int number) {
578     CmiAssert(number>=0 && number<max_neighbors());
579     CmiAssert(ProcessorID>=0 && ProcessorID<npes);
580     get_processor_coordinates(ProcessorID, TempCo);
581
582     int index = number/2;
583     int displacement = (number%2)? -1: 1;
584     do{
585       TempCo[index] = (TempCo[index] + displacement + Cardinality[index]) % Cardinality[index];
586       get_processor_id(TempCo, &ProcessorID);
587     } while (ProcessorID >= npes);
588     return ProcessorID;
589   }
590 public:
591   LBTopo_torus_nd(int p): LBTopology(p) /*inherited :npes(p) */ {
592     int i;
593     CmiAssert(dimension>=1 && dimension<=16);
594     CmiAssert(p>=1);
595
596     Cardinality = new int[dimension];
597     TempCo = new int[dimension];
598     double pp = p;
599     for(i=0;i<dimension;i++) {
600       Cardinality[i] = (int)ceil(pow(pp,1.0/(dimension-i))-1e-5);
601       pp = pp / Cardinality[i];
602     }
603     VirtualProcessorCount = 1;
604     for(i=0;i<dimension;i++) {
605       VirtualProcessorCount *= Cardinality[i];
606     }
607   }
608   ~LBTopo_torus_nd() {
609     delete[] Cardinality;
610     delete[] TempCo;
611   }
612   virtual int max_neighbors() {
613     return dimension*2;
614   }
615   virtual void neighbors(int mype, int* _n, int &nb) {
616     nb = 0;
617     for(int i=0;i<dimension*2;i++) {
618       _n[nb] = GetNeighborID(mype, i);
619       if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
620     }
621   }
622   virtual int get_dimension() {
623     return dimension;
624   }
625   virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
626     CmiAssert(processor_id>=0 && processor_id<VirtualProcessorCount);
627     CmiAssert( processor_coordinates != NULL );
628     for(int i=0;i<dimension;i++) {
629       processor_coordinates[i] = processor_id % Cardinality[i];
630       processor_id = processor_id / Cardinality[i];
631     }
632     return true;
633   }
634   virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
635     int i;
636     CmiAssert( processor_coordinates != NULL );
637     CmiAssert( processor_id != NULL );
638     for(i=dimension-1;i>=0;i--) 
639       CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<Cardinality[i]);
640     (*processor_id) = 0;
641     for(i=dimension-1;i>=0;i--) {
642       (*processor_id) = (*processor_id)* Cardinality[i] + processor_coordinates[i];
643     }
644     return true;
645   }
646   //Note: if abs(difference)*2 = cardinality, the difference is set to zero
647   virtual bool coordinate_difference(const int* my_coordinates, const int* target_coordinates, int* difference) { 
648 //    CkPrintf("[%d] coordiate_difference begin\n", CkMyPe());
649     CmiAssert( my_coordinates != NULL);
650     CmiAssert( target_coordinates != NULL);
651     CmiAssert( difference != NULL);
652 //    CkPrintf("[%d] after assert\n", CkMyPe());
653     for(int i=0;i<dimension;i++) {
654 //      CkPrintf("[%d] coordiate_difference iteration %d\n", i);
655       difference[i] = target_coordinates[i] - my_coordinates[i];
656       if (abs(difference[i])*2 > Cardinality[i]) {
657         difference[i] += (difference[i]>0) ? -Cardinality[i] : Cardinality[i];
658       } else if (abs(difference[i])*2 == Cardinality[i]) {
659         difference[i] = 0;
660       }
661     }
662 //    CkPrintf("[%d] coordiate_difference just before return\n");
663     return true;
664   }
665   //Note: if abs(difference)*2 = cardinality, the difference is set to zero
666   virtual bool coordinate_difference(int my_processor_id, int target_processor_id, int* difference) { 
667     CmiAssert( difference != NULL);
668     int my_coordinates[dimension];
669     int target_coordinates[dimension];
670     get_processor_coordinates(my_processor_id, my_coordinates);
671     get_processor_coordinates(target_processor_id, target_coordinates);
672     coordinate_difference(my_coordinates, target_coordinates, difference);
673     return true;
674   }
675 };
676
677 typedef LBTopo_torus_nd<1> LBTopo_torus_nd_1;
678 typedef LBTopo_torus_nd<2> LBTopo_torus_nd_2;
679 typedef LBTopo_torus_nd<3> LBTopo_torus_nd_3;
680 typedef LBTopo_torus_nd<4> LBTopo_torus_nd_4;
681 typedef LBTopo_torus_nd<5> LBTopo_torus_nd_5;
682 typedef LBTopo_torus_nd<6> LBTopo_torus_nd_6;
683 typedef LBTopo_torus_nd<7> LBTopo_torus_nd_7;
684
685 LBTOPO_MACRO(LBTopo_torus_nd_1)
686 LBTOPO_MACRO(LBTopo_torus_nd_2)
687 LBTOPO_MACRO(LBTopo_torus_nd_3)
688 LBTOPO_MACRO(LBTopo_torus_nd_4)
689 LBTOPO_MACRO(LBTopo_torus_nd_5)
690 LBTOPO_MACRO(LBTopo_torus_nd_6)
691 LBTOPO_MACRO(LBTopo_torus_nd_7)
692
693
694
695 //Torus ND with unequal number of processors in each dimension
696 /***************************************************************/
697 template <int dimension>
698 class LBTopo_itorus_nd: public LBTopology {
699 private:
700         int *dim;
701         int *tempCoor;
702         
703 public:
704         LBTopo_itorus_nd(int p): LBTopology(p) {
705         CkPrintf("Irregular torus created\n");
706         dim = new int[dimension];
707                 tempCoor = new int[dimension];
708
709                 int i=0;
710         char *lbcopy = strdup(_lbtopo);
711         char *ptr = strchr(lbcopy, ':');
712         if (ptr==NULL) return;
713         ptr = strtok(ptr+1, ",");
714         while (ptr) {
715                         dim[i]=atoi(ptr);
716                         i++;
717                         ptr = strtok(NULL, ",");
718         }
719                 CmiAssert(dimension==i);
720                 
721                 int procs=1;
722                 for(i=0;i<dimension;i++)
723                         procs*=dim[i];
724     CmiAssert(dimension>=1 && dimension<=16);
725     CmiAssert(p>=1);
726                 CmiAssert(procs==p);
727   }
728         
729   ~LBTopo_itorus_nd() {
730         delete[] dim;
731                 delete[] tempCoor;
732         }
733         
734   virtual int max_neighbors() {
735     return dimension*2;
736   }
737         
738         virtual void neighbors(int mype, int* _n, int &nb) {
739     nb = 0;
740     for(int i=0;i<dimension*2;i++) {
741       _n[nb] = GetNeighborID(mype, i);
742       if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
743     }
744   }
745
746         int GetNeighborID(int ProcessorID, int number) {
747     CmiAssert(number>=0 && number<max_neighbors());
748     CmiAssert(ProcessorID>=0 && ProcessorID<npes);
749     get_processor_coordinates(ProcessorID, tempCoor);
750
751     int index = number/2;
752     int displacement = (number%2)? -1: 1;
753    // do{
754                 tempCoor[index] = (tempCoor[index] + displacement + dim[index]) % dim[index];
755                 get_processor_id(tempCoor, &ProcessorID);
756     //} while (ProcessorID >= npes);
757     return ProcessorID;
758   }
759
760         virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
761     CmiAssert(processor_id>=0 && processor_id<npes);
762     CmiAssert(processor_coordinates != NULL );
763     for(int i=0;i<dimension;i++) {
764       processor_coordinates[i] = processor_id % dim[i];
765       processor_id = processor_id / dim[i];
766     }
767     return true;
768   }
769
770         virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
771     int i;
772     CmiAssert( processor_coordinates != NULL );
773     CmiAssert( processor_id != NULL );
774     for(i=dimension-1;i>=0;i--) 
775       CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<dim[i]);
776     (*processor_id) = 0;
777     for(i=dimension-1;i>=0;i--) {
778       (*processor_id) = (*processor_id)* dim[i] + processor_coordinates[i];
779     }
780     return true;
781   }
782 };
783
784
785 typedef LBTopo_itorus_nd<1> LBTopo_itorus_nd_1;
786 typedef LBTopo_itorus_nd<2> LBTopo_itorus_nd_2;
787 typedef LBTopo_itorus_nd<3> LBTopo_itorus_nd_3;
788 typedef LBTopo_itorus_nd<4> LBTopo_itorus_nd_4;
789 typedef LBTopo_itorus_nd<5> LBTopo_itorus_nd_5;
790 typedef LBTopo_itorus_nd<6> LBTopo_itorus_nd_6;
791 typedef LBTopo_itorus_nd<7> LBTopo_itorus_nd_7;
792
793 LBTOPO_MACRO(LBTopo_itorus_nd_1)
794 LBTOPO_MACRO(LBTopo_itorus_nd_2)
795 LBTOPO_MACRO(LBTopo_itorus_nd_3)
796 LBTOPO_MACRO(LBTopo_itorus_nd_4)
797 LBTOPO_MACRO(LBTopo_itorus_nd_5)
798 LBTOPO_MACRO(LBTopo_itorus_nd_6)
799 LBTOPO_MACRO(LBTopo_itorus_nd_7)
800
801
802 /******************************************************************/
803 //Mesh ND with unequal number of processors in each dimension
804 /***************************************************************/
805 template <int dimension>
806 class LBTopo_imesh_nd: public LBTopology {
807 private:
808         int *dim;
809         int *tempCoor;
810         
811 public:
812         LBTopo_imesh_nd(int p): LBTopology(p) {
813         CkPrintf("Irregular mesh created\n");
814         dim = new int[dimension];
815         tempCoor = new int[dimension];
816
817         int i=0;
818         char *lbcopy = strdup(_lbtopo);
819         char *ptr = strchr(lbcopy, ':');
820         if (ptr==NULL) return;
821         ptr = strtok(ptr+1, ",");
822         while (ptr) {
823           dim[i]=atoi(ptr);
824           i++;
825           ptr = strtok(NULL, ",");
826         }
827         CmiAssert(dimension==i);
828         
829         //char *ptr2=strchr(_lbtopo,':');
830         //*ptr2='\0';
831         int procs=1;
832         for(i=0;i<dimension;i++)
833           procs*=dim[i];
834           CmiAssert(dimension>=1 && dimension<=16);
835           CmiAssert(p>=1);
836           CmiAssert(procs==p);
837   }
838         
839   ~LBTopo_imesh_nd() {
840         delete[] dim;
841                 delete[] tempCoor;
842         }
843         
844   virtual int max_neighbors() {
845     return dimension*2;
846   }
847         
848   virtual void neighbors(int mype, int* _n, int &nb) {
849     nb = 0;
850     for(int i=0;i<dimension*2;i++) {
851       _n[nb] = GetNeighborID(mype, i);
852       if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
853     }
854     /*CkPrintf("Nhs[%d]:",mype);
855     for(int i=0;i<nb;i++)
856       CkPrintf("%d ",_n[i]);
857     CkPrintf("\n");*/
858   }
859
860         int GetNeighborID(int ProcessorID, int number) {
861         CmiAssert(number>=0 && number<max_neighbors());
862         CmiAssert(ProcessorID>=0 && ProcessorID<npes);
863         get_processor_coordinates(ProcessorID, tempCoor);
864
865         int index = number/2;
866         int displacement = (number%2)? -1: 1;
867    // do{
868             if((tempCoor[index]==0 && displacement==-1) || (tempCoor[index]==dim[index]-1 && displacement==1)){
869         }
870         else{
871              tempCoor[index] = (tempCoor[index] + displacement + dim[index]) % dim[index];
872              get_processor_id(tempCoor, &ProcessorID);
873             }
874     //} while (ProcessorID >= npes);
875     return ProcessorID;
876   }
877
878         virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
879     CmiAssert(processor_id>=0 && processor_id<npes);
880     CmiAssert(processor_coordinates != NULL );
881     for(int i=0;i<dimension;i++) {
882       processor_coordinates[i] = processor_id % dim[i];
883       processor_id = processor_id / dim[i];
884     }
885     return true;
886   }
887
888         virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
889     int i;
890     CmiAssert( processor_coordinates != NULL );
891     CmiAssert( processor_id != NULL );
892     for(i=dimension-1;i>=0;i--) 
893       CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<dim[i]);
894     (*processor_id) = 0;
895     for(i=dimension-1;i>=0;i--) {
896       (*processor_id) = (*processor_id)* dim[i] + processor_coordinates[i];
897     }
898     return true;
899   }
900 };
901
902
903 typedef LBTopo_imesh_nd<1> LBTopo_imesh_nd_1;
904 typedef LBTopo_imesh_nd<2> LBTopo_imesh_nd_2;
905 typedef LBTopo_imesh_nd<3> LBTopo_imesh_nd_3;
906 typedef LBTopo_imesh_nd<4> LBTopo_imesh_nd_4;
907 typedef LBTopo_imesh_nd<5> LBTopo_imesh_nd_5;
908 typedef LBTopo_imesh_nd<6> LBTopo_imesh_nd_6;
909 typedef LBTopo_imesh_nd<7> LBTopo_imesh_nd_7;
910
911 LBTOPO_MACRO(LBTopo_imesh_nd_1)
912 LBTOPO_MACRO(LBTopo_imesh_nd_2)
913 LBTOPO_MACRO(LBTopo_imesh_nd_3)
914 LBTOPO_MACRO(LBTopo_imesh_nd_4)
915 LBTOPO_MACRO(LBTopo_imesh_nd_5)
916 LBTOPO_MACRO(LBTopo_imesh_nd_6)
917 LBTOPO_MACRO(LBTopo_imesh_nd_7)
918
919
920 // dense graph
921
922 LBTOPO_MACRO(LBTopo_graph)
923
924 int LBTopo_graph::max_neighbors()
925 {
926   return (int)(sqrt(1.0*CmiNumPes())+0.5);
927 }
928
929 extern "C" void gengraph(int, int, int, int *, int *, int);
930
931 void LBTopo_graph::neighbors(int mype, int* na, int &nb)
932 {
933   gengraph(CmiNumPes(), (int)(sqrt(1.0*CmiNumPes())+0.5), 234, na, &nb, 0);
934 }
935
936 // complete graph
937
938 class LBTopo_complete: public LBTopology {
939 public:
940   LBTopo_complete(int p): LBTopology(p) {}
941   int max_neighbors() {
942     return npes - 1;
943   }
944   void neighbors(int mype, int* _n, int &nb) {
945     nb = 0;
946     for (int i=0; i<npes; i++)  if (mype != i) _n[nb++] = i;
947   }
948 };
949
950 LBTOPO_MACRO(LBTopo_complete)
951
952 //   k-ary tree
953
954 template <int k>
955 class LBTopo_karytree: public LBTopology {
956 public:
957   LBTopo_karytree(int p): LBTopology(p) {}
958   virtual int max_neighbors() {
959     return k+1;     // parent + children
960   }
961   virtual void neighbors(int mype, int* _n, int &nb) {
962     nb = 0;
963     if (mype!=0) _n[nb++] = (mype-1)/k;
964     int firstchild = mype*k+1;
965     for (int i=0; i<k; i++)
966       if (firstchild+i < npes) _n[nb++] = firstchild+i;
967   }
968 };
969
970 typedef LBTopo_karytree<2> LBTopo_2_arytree;
971 typedef LBTopo_karytree<3> LBTopo_3_arytree;
972 typedef LBTopo_karytree<3> LBTopo_4_arytree;
973
974 LBTOPO_MACRO(LBTopo_2_arytree)
975 LBTOPO_MACRO(LBTopo_3_arytree)
976 LBTOPO_MACRO(LBTopo_4_arytree)
977
978 //
979
980 class LBTopoMap {
981 public:
982   const char *name;
983   LBtopoFn fn;
984   LBTopoMap(const char *s, LBtopoFn f): name(s), fn(f) {}
985   LBTopoMap(const LBTopoMap &p);                // You don't want to copy
986   void operator=(const LBTopoMap &p);           // You don't want to copy
987 };
988
989 class LBTopoVec {
990   CkVec<LBTopoMap *> lbTopos;
991 public:
992   LBTopoVec() {
993     // register all topos
994     lbTopos.push_back(new LBTopoMap("ring", createLBTopo_ring));
995     lbTopos.push_back(new LBTopoMap("torus2d", createLBTopo_torus2d));
996     lbTopos.push_back(new LBTopoMap("torus3d", createLBTopo_torus3d));
997     lbTopos.push_back(new LBTopoMap("mesh3d", createLBTopo_mesh3d));
998     lbTopos.push_back(new LBTopoMap("torus_nd_1", createLBTopo_torus_nd_1));
999     lbTopos.push_back(new LBTopoMap("torus_nd_2", createLBTopo_torus_nd_2));
1000     lbTopos.push_back(new LBTopoMap("torus_nd_3", createLBTopo_torus_nd_3));
1001     lbTopos.push_back(new LBTopoMap("torus_nd_4", createLBTopo_torus_nd_4));
1002     lbTopos.push_back(new LBTopoMap("torus_nd_5", createLBTopo_torus_nd_5));
1003     lbTopos.push_back(new LBTopoMap("torus_nd_6", createLBTopo_torus_nd_6));
1004     lbTopos.push_back(new LBTopoMap("torus_nd_7", createLBTopo_torus_nd_7));
1005     lbTopos.push_back(new LBTopoMap("itorus_nd_1", createLBTopo_itorus_nd_1));
1006     lbTopos.push_back(new LBTopoMap("itorus_nd_2", createLBTopo_itorus_nd_2));
1007     lbTopos.push_back(new LBTopoMap("itorus_nd_3", createLBTopo_itorus_nd_3));
1008     lbTopos.push_back(new LBTopoMap("itorus_nd_4", createLBTopo_itorus_nd_4));
1009     lbTopos.push_back(new LBTopoMap("itorus_nd_5", createLBTopo_itorus_nd_5));
1010     lbTopos.push_back(new LBTopoMap("itorus_nd_6", createLBTopo_itorus_nd_6));
1011     lbTopos.push_back(new LBTopoMap("itorus_nd_7", createLBTopo_itorus_nd_7));
1012     lbTopos.push_back(new LBTopoMap("imesh_nd_1", createLBTopo_imesh_nd_1));
1013     lbTopos.push_back(new LBTopoMap("imesh_nd_2", createLBTopo_imesh_nd_2));
1014     lbTopos.push_back(new LBTopoMap("imesh_nd_3", createLBTopo_imesh_nd_3));
1015     lbTopos.push_back(new LBTopoMap("imesh_nd_4", createLBTopo_imesh_nd_4));
1016     lbTopos.push_back(new LBTopoMap("imesh_nd_5", createLBTopo_imesh_nd_5));
1017     lbTopos.push_back(new LBTopoMap("imesh_nd_6", createLBTopo_imesh_nd_6));
1018     lbTopos.push_back(new LBTopoMap("imesh_nd_7", createLBTopo_imesh_nd_7));
1019     lbTopos.push_back(new LBTopoMap("graph", createLBTopo_graph));
1020     lbTopos.push_back(new LBTopoMap("complete", createLBTopo_complete));
1021     lbTopos.push_back(new LBTopoMap("2_arytree", createLBTopo_2_arytree));
1022     lbTopos.push_back(new LBTopoMap("3_arytree", createLBTopo_3_arytree));
1023     lbTopos.push_back(new LBTopoMap("4_arytree", createLBTopo_4_arytree));
1024     lbTopos.push_back(new LBTopoMap("smp_n_1", createLBTopo_smp_n_1));
1025     lbTopos.push_back(new LBTopoMap("smp_n_2", createLBTopo_smp_n_2));
1026     lbTopos.push_back(new LBTopoMap("smp_n_3", createLBTopo_smp_n_3));
1027     lbTopos.push_back(new LBTopoMap("smp_n_4", createLBTopo_smp_n_4));
1028   }
1029   ~LBTopoVec() {
1030     for (int i=0; i<lbTopos.length(); i++)
1031       delete lbTopos[i];
1032   }
1033   void push_back(LBTopoMap *map) { lbTopos.push_back(map); }
1034   int length() { return lbTopos.length(); }
1035   LBTopoMap * operator[](size_t n) { return lbTopos[n]; }
1036   void print() {
1037     for (int i=0; i<lbTopos.length(); i++) {
1038       CmiPrintf("  %s\n", lbTopos[i]->name);
1039     }
1040   }
1041 };
1042
1043 static LBTopoVec lbTopoMap;
1044
1045 extern "C"
1046 LBtopoFn LBTopoLookup(char *name)
1047 {
1048   for (int i=0; i<lbTopoMap.length(); i++) {
1049     if (strcmp(name, lbTopoMap[i]->name)==0) return lbTopoMap[i]->fn;
1050   }
1051   return NULL;
1052 }
1053
1054 // C wrapper functions
1055 extern "C" void getTopoNeighbors(void *topo, int myid, int* narray, int *n)
1056 {
1057   ((LBTopology*)topo)->neighbors(myid, narray, *n);
1058 }
1059
1060 extern "C" int getTopoMaxNeighbors(void *topo)
1061 {
1062   return ((LBTopology*)topo)->max_neighbors();
1063 }
1064
1065 extern "C" void printoutTopo()
1066 {
1067   lbTopoMap.print();
1068 }
1069