build: fix travis MPI/SMP build
[charm.git] / src / conv-ldb / topology.C
1 /**
2  * \addtogroup CkLdb
3 */
4 /*@{*/
5
6 #include <math.h>
7
8 //#include "LBDatabase.h"
9 #include <vector>
10 #include "converse.h"
11 #include "topology.h"
12
13 extern char *_lbtopo;                   /* topology name string */
14
15 int LBTopology::get_hop_count(int src,int dest)
16 {
17         int npe;
18         int *visited_srcs;
19
20         if(src==dest)
21                 return 0;
22         
23         npe = max_neighbors();
24         visited_srcs = new int[npes];
25         
26         int count = rec_hop_count(src,dest,npe,1,visited_srcs,999999);
27         delete [] visited_srcs;
28
29         return count;
30 }
31
32 int LBTopology::rec_hop_count(int src,int dest,int max_neigh,int count,int *visited_srcs,int min_hop_cnt)
33 {
34         int *pes = new int[max_neigh];
35         //int min_hop_cnt=999999;
36         int ret_val=0;
37         int skip_neigh=0;
38         int neigh_cnt=0;
39         int i;
40         
41         neighbors(src,pes,neigh_cnt);
42         
43         visited_srcs[count-1]=src;
44         
45         for(i=0;i<neigh_cnt;i++)
46         {
47                 if(pes[i]==dest)
48                         return count;
49         }
50         for(i=0;i<neigh_cnt;i++)
51         {
52                 for(int j=0;j<count;j++)
53                         if(visited_srcs[j]==pes[i])
54                         {
55                                 skip_neigh=1;
56                                 break;
57                         }
58                 if(!skip_neigh)
59                 {
60                         if(min_hop_cnt > count+1){
61                                 ret_val=rec_hop_count(pes[i],dest,max_neigh,count+1,visited_srcs,min_hop_cnt);
62                                 if(ret_val < min_hop_cnt)
63                                         min_hop_cnt = ret_val;
64                         }
65                 }
66                 else
67                         skip_neigh=0;
68         }
69         delete [] pes;
70         return min_hop_cnt;
71 }
72
73 double LBTopology::per_hop_delay(int last_hop)
74 {
75         if(!last_hop)
76                 return (HOP_LINK_DELAY + HOP_PROC_DELAY);
77         else
78                 return HOP_LINK_DELAY;
79 }
80
81 void LBTopology::get_pairwise_hop_count(double  **distance)
82 {
83   struct queueNode
84   {
85     int index;
86     int dist;
87     queueNode *next;
88     queueNode(int i,int d): index(i), dist(d), next(NULL) {}
89   };
90   
91   bool *visited=new bool[npes];
92   int *neigh=new int[max_neighbors()];
93   int num_neighbors;
94
95   for(int i=0;i<npes;i++)
96   {
97     //Init data structures for BFS from i-th proc
98     for(int j=0;j<npes;j++)
99       visited[j]=false;
100
101     queueNode *q=new queueNode(i,0);
102     queueNode *last=q;
103     distance[i][i]=0;
104     visited[i]=true;
105
106     // Perform BFS until queue is empty
107     while(q)
108     { 
109       neighbors(q->index,neigh,num_neighbors);
110       for(int j=0;j<num_neighbors;j++)
111       {
112         if(!visited[neigh[j]])
113         {
114           visited[neigh[j]]=true;
115           distance[i][neigh[j]]=q->dist+1;
116           queueNode *qnew=new queueNode(neigh[j],q->dist+1);
117           last->next=qnew;
118           last=last->next;
119         }
120       }
121       queueNode *qtemp=q;
122       q=q->next;
123       delete qtemp;
124     }
125   }
126   delete[] visited;
127   delete[] neigh;
128 }
129
130 //smp - assume 1,2,3 or 4 processors per node
131
132 template <int ppn>
133 class LBTopo_smp_n: public LBTopology {
134 public:
135   LBTopo_smp_n(int p): LBTopology(p) {}
136   virtual int max_neighbors() { return npes - 1; }
137
138   virtual void neighbors(int mype, int* _n, int &nb){
139       nb = 0;
140       for(int i=1; i<=ppn; i++)
141       {
142           _n[nb++] = (mype+i)%npes;
143       }
144   }
145         
146         int get_hop_count(int src,int dest){
147                 
148                 //CmiPrintf("in smp get_hop_count\n");
149                 int a = src/ppn;
150                 int b = dest/ppn;
151                 
152                 if(a!=b){
153                         //CmiPrintf("2 returned\n");
154                         return 2;
155                 }
156                 else{
157                         //CmiPrintf("1 returned\n");
158                         return 1;
159                 }
160         }
161 };
162
163 typedef LBTopo_smp_n<1> LBTopo_smp_n_1;
164 typedef LBTopo_smp_n<2> LBTopo_smp_n_2;
165 typedef LBTopo_smp_n<3> LBTopo_smp_n_3;
166 typedef LBTopo_smp_n<4> LBTopo_smp_n_4;
167 typedef LBTopo_smp_n<5> LBTopo_smp_n_5;
168 typedef LBTopo_smp_n<6> LBTopo_smp_n_6;
169 typedef LBTopo_smp_n<7> LBTopo_smp_n_7;
170 typedef LBTopo_smp_n<8> LBTopo_smp_n_8;
171 typedef LBTopo_smp_n<9> LBTopo_smp_n_9;
172 typedef LBTopo_smp_n<10> LBTopo_smp_n_10;
173
174 LBTOPO_MACRO(LBTopo_smp_n_1)
175 LBTOPO_MACRO(LBTopo_smp_n_2)
176 LBTOPO_MACRO(LBTopo_smp_n_3)
177 LBTOPO_MACRO(LBTopo_smp_n_4)
178 LBTOPO_MACRO(LBTopo_smp_n_5)
179 LBTOPO_MACRO(LBTopo_smp_n_6)
180 LBTOPO_MACRO(LBTopo_smp_n_7)
181 LBTOPO_MACRO(LBTopo_smp_n_8)
182 LBTOPO_MACRO(LBTopo_smp_n_9)
183 LBTOPO_MACRO(LBTopo_smp_n_10)
184
185
186 // ring
187
188 LBTOPO_MACRO(LBTopo_ring)
189
190 int LBTopo_ring::max_neighbors()
191 {
192   if (npes > 2) return 2;
193   else return (npes-1);
194 }
195
196 void LBTopo_ring::neighbors(int mype, int* _n, int &nb)
197 {
198   nb = 0;
199   if (npes>1) _n[nb++] = (mype + npes -1) % npes;
200   if (npes>2) _n[nb++] = (mype + 1) % npes;
201 }
202
203 int LBTopo_ring::get_hop_count(int src,int dest){
204         
205         int dist=src-dest;
206         if(dist<0) dist=-dist;
207         
208         if((npes-dist) < dist)
209                 return (npes-dist);
210         else
211                 return dist;
212 }
213
214 //  TORUS 2D
215
216 LBTOPO_MACRO(LBTopo_torus2d)
217
218 LBTopo_torus2d::LBTopo_torus2d(int p): LBTopology(p) 
219 {
220   width = (int)sqrt(p*1.0);
221   if (width * width < npes) width++;
222 }
223
224 int LBTopo_torus2d::max_neighbors()
225 {
226   return 4;
227 }
228
229 int LBTopo_torus2d::goodcoor(int x, int y)
230 {
231   if (x<0 || x>=width) return -1;
232   if (y<0 || y>=width) return -1;
233   int next = x*width + y;
234   if (next<npes && next>=0) return next;
235   return -1;
236 }
237
238 static int checkuniq(int *arr, int nb, int val) {
239   for (int i=0;i<nb;i++) if (arr[i]==val) return 0;
240   return 1;
241 }
242
243 void LBTopo_torus2d::neighbors(int mype, int* _n, int &nb)
244 {
245   int next;
246   int x = mype/width;
247   int y = mype%width;
248   nb=0;
249   for (int i=-1; i<=1; i+=2) {
250     int x1 = x+i;
251     if (x1 == -1) {
252       x1 = width-1;
253       while (goodcoor(x1, y)==-1) x1--;
254     }
255     else if (goodcoor(x1, y) == -1) x1=0;
256     next = goodcoor(x1, y);
257     CmiAssert(next != -1);
258     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
259
260     int y1 = y+i;
261     if (y1 == -1) {
262       y1 = width-1;
263       while (goodcoor(x, y1)==-1) y1--;
264     }
265     else if (goodcoor(x, y1) == -1) y1=0;
266     next = goodcoor(x, y1);
267     CmiAssert(next != -1);
268     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
269   }
270 }
271
272 int LBTopo_torus2d::get_hop_count(int src,int dest){
273         int xpos_src,xpos_dest;
274         int ypos_src,ypos_dest;
275         int xdist=0;
276         int ydist=0;
277         
278         int xchange;
279         if(src > dest){
280                 xchange = src;
281                 src = dest;
282                 dest = xchange;
283         }
284         
285         xpos_src=src%width;
286         ypos_src=src/width;
287
288         xpos_dest=dest%width;
289         ypos_dest=dest/width;
290
291         xdist = xpos_dest-xpos_src;
292         if(xdist<0) xdist=-xdist;
293         if((width-xdist) < xdist)
294                 xdist = width-xdist;
295         
296         ydist = ypos_dest-ypos_src;
297         if(ydist<0) ydist=-ydist;
298
299         int lastpos=(npes-1)%width;
300         int otherylen=0;
301
302         if(xpos_src<=lastpos && xpos_dest<=lastpos)
303                 otherylen=((npes-1)/width)+1-ydist;
304         else{
305                 if(ypos_dest==((npes-1)/width))
306                         otherylen=((npes-1)/width)+1-ydist;
307                 else    
308                         otherylen=((npes-1)/width)-ydist;
309         }
310         
311         if(otherylen < ydist)
312                 ydist=otherylen;
313         
314         //added later
315         int sdist=0,adist=0,bdist=0,cdist=0,ddist=0;
316         
317         if(xpos_src>lastpos && xpos_dest>lastpos){
318                 sdist = xpos_src;
319                 if((width-sdist) < sdist)
320                 sdist = width-sdist;
321
322                 adist = ((npes-1)/width)-ypos_src;
323                 if(adist<0) adist=-adist;
324                 if(ypos_src+1 < adist)
325                         adist = ypos_src+1;
326         
327                 bdist = 1;
328
329                 cdist = ((npes-1)/width)-ypos_dest;
330                 if(cdist<0) cdist=-cdist;
331                 if(ypos_dest+1 < cdist)
332                         cdist = ypos_dest+1;
333
334                 ddist = xpos_dest-lastpos;
335                 if(ddist<0) ddist=-ddist;
336                 if((width-ddist) < ddist)
337                         ddist = width-ddist;
338         }
339         else{
340                 if(xpos_src>lastpos){
341                         xchange = src;
342                         src = dest;
343                         dest = xchange;
344                         xpos_src=src%width;
345                         ypos_src=src/width;
346                         xpos_dest=dest%width;
347                         ypos_dest=dest/width;
348                 }
349                 adist = ((npes-1)/width)-ypos_src;
350                 if(adist<0) adist=-adist;
351                 if(ypos_src+1 < adist)
352                         adist = ypos_src+1;
353         
354                 if(xpos_dest<=lastpos){
355                         bdist = xpos_dest-xpos_src;
356                         if(bdist<0) bdist=-bdist;
357                         if((lastpos+1-bdist) < bdist)
358                                 bdist = lastpos+1-bdist;
359
360                         cdist = ((npes-1)/width)-ypos_dest;
361                         if(cdist<0) cdist=-cdist;
362                         if(ypos_dest+1 < cdist)
363                                 cdist = ypos_dest+1;
364                 
365                         ddist=0;
366                 }
367                 else{
368                         bdist = lastpos-xpos_src;
369                         if(bdist<0) bdist=-bdist;
370                         if((xpos_src+1) < bdist)
371                                 bdist = xpos_src+1;
372
373                         cdist = ((npes-1)/width)-ypos_dest;
374                         if(cdist<0) cdist=-cdist;
375                         if(ypos_dest+1 < cdist)
376                                 cdist = ypos_dest+1;
377
378                         ddist = xpos_dest-lastpos;
379                         if(ddist<0) ddist=-ddist;
380                         if((width-ddist) < ddist)
381                                 ddist = width-ddist;
382                 }
383         }
384         
385         if((sdist+adist+bdist+cdist+ddist) < (xdist+ydist))
386                 return (sdist+adist+bdist+cdist+ddist);
387         else
388                 return (xdist+ydist);
389
390 }
391
392 //  TORUS 3D
393
394 LBTOPO_MACRO(LBTopo_torus3d)
395
396 LBTopo_torus3d::LBTopo_torus3d(int p): LBTopology(p) 
397 {
398   width = 1;
399   while ( (width+1) * (width+1) * (width+1) <= npes) width++;
400   if (width * width * width < npes) width++;
401 }
402
403 int LBTopo_torus3d::max_neighbors()
404 {
405   return 6;
406 }
407
408 int LBTopo_torus3d::goodcoor(int x, int y, int z)
409 {
410   if (x<0 || x>=width) return -1;
411   if (y<0 || y>=width) return -1;
412   if (z<0 || z>=width) return -1;
413   int next = x*width*width + y*width + z;
414   if (next<npes && next>=0) return next;
415   return -1;
416 }
417
418 void LBTopo_torus3d::neighbors(int mype, int* _n, int &nb)
419 {
420
421   int x = mype/(width*width);
422   int k = mype%(width*width);
423   int y = k/width;
424   int z = k%width;
425   int next;
426   nb=0;
427   for (int i=-1; i<=1; i+=2) {
428     int x1 = x+i;
429     if (x1 == -1) {
430       x1 = width-1;
431       while (goodcoor(x1, y, z)==-1) x1--;
432     }
433     else if (goodcoor(x1, y, z) == -1) x1=0;
434     next = goodcoor(x1, y, z);
435     CmiAssert(next != -1);
436     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
437
438     int y1 = y+i;
439     if (y1 == -1) {
440       y1 = width-1;
441       while (goodcoor(x, y1, z)==-1) y1--;
442     }
443     else if (goodcoor(x, y1, z) == -1) y1=0;
444     next = goodcoor(x, y1, z);
445     CmiAssert(next != -1);
446     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
447
448     int z1 = z+i;
449     if (z1 == -1) {
450       z1 = width-1;
451       while (goodcoor(x, y, z1)==-1) z1--;
452     }
453     else if (goodcoor(x, y, z1) == -1) z1=0;
454     next = goodcoor(x, y, z1);
455     CmiAssert(next != -1);
456     if (next != mype && checkuniq(_n, nb, next)) _n[nb++] = next;
457   }
458 }
459
460 /*
461 // Works only for perfect cube number of processor topologies
462 int LBTopo_torus3d::get_hop_count(int src,int dest){
463         
464         int x_src = src/(width*width);
465   int k_src = src%(width*width);
466   int y_src = k_src/width;
467   int z_src = k_src%width;
468
469         int x_dest = dest/(width*width);
470   int k_dest = dest%(width*width);
471   int y_dest = k_dest/width;
472   int z_dest = k_dest%width;
473
474         int xdist=0,ydist=0,zdist=0;
475         
476         //CmiPrintf("just a chk........\n");
477         xdist = x_dest-x_src;
478         if(xdist<0) xdist=-xdist;
479         if((width-xdist) < xdist)
480                 xdist = width-xdist;
481
482         ydist = y_dest-y_src;
483         if(ydist<0) ydist=-ydist;
484         if((width-ydist) < ydist)
485                 ydist = width-ydist;
486
487         zdist = z_dest-z_src;
488         if(zdist<0) zdist=-zdist;
489         if((width-zdist) < zdist)
490                 zdist = width-zdist;
491
492         return (xdist+ydist+zdist);
493
494 }
495 */
496
497 //Mesh3D
498 LBTOPO_MACRO(LBTopo_mesh3d)
499
500 LBTopo_mesh3d::LBTopo_mesh3d(int p): LBTopology(p) 
501 {
502   width = 1;
503   while ( (width+1) * (width+1) * (width+1) <= npes) width++;
504   if (width * width * width < npes) width++;
505 }
506
507 int LBTopo_mesh3d::max_neighbors()
508 {
509   return 6;
510 }
511
512 int LBTopo_mesh3d::goodcoor(int x, int y, int z)
513 {
514   if (x<0 || x>=width) return -1;
515   if (y<0 || y>=width) return -1;
516   if (z<0 || z>=width) return -1;
517   int next = z*width*width + y*width + x;
518   if (next<npes && next>=0) return next;
519   return -1;
520 }
521
522 void LBTopo_mesh3d::neighbors(int mype, int* _n, int &nb)
523 {
524
525   int z = mype/(width*width);
526   int k = mype%(width*width);
527   int y = k/width;
528   int x = k%width;
529   int next;
530   int isNeigh=1;
531   nb=0;
532   for (int i=-1; i<=1; i+=2) {
533     isNeigh=1;
534     int x1 = x+i;
535     if (x1 == -1) {
536       //x1 = width-1;
537       x1=x;
538       //while (goodcoor(x1, y, z)==-1) x1--;
539       isNeigh=0;
540     }
541     else if (goodcoor(x1, y, z) == -1) { x1=0; isNeigh=0; }
542     next = goodcoor(x1, y, z);
543     CmiAssert(next != -1);
544     if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
545
546     isNeigh=1;
547     int y1 = y+i;
548     if (y1 == -1) {
549       //y1 = width-1;
550       //while (goodcoor(x, y1, z)==-1) y1--;
551       y1=y;
552       isNeigh=0;
553     }
554     else if (goodcoor(x, y1, z) == -1) { y1=0; isNeigh=0; }
555     next = goodcoor(x, y1, z);
556     CmiAssert(next != -1);
557     if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
558
559     isNeigh=1;
560     int z1 = z+i;
561     if (z1 == -1) {
562       //z1 = width-1;
563       //while (goodcoor(x, y, z1)==-1) z1--;
564       z1=z;
565       isNeigh=0;
566     }
567     else if (goodcoor(x, y, z1) == -1) { z1=0; isNeigh=0; }
568     next = goodcoor(x, y, z1);
569     CmiAssert(next != -1);
570     if (next != mype && isNeigh && checkuniq(_n, nb, next)) _n[nb++] = next;
571   }
572 }
573
574 //  TORUS ND 
575 //  added by zshao1
576
577 template <int dimension>
578 class LBTopo_torus_nd: public LBTopology {
579 private:
580   // inherited int npes;
581   int* Cardinality;
582   int VirtualProcessorCount;
583   int* TempCo;
584 private:
585   int GetNeighborID(int ProcessorID, int number) {
586     CmiAssert(number>=0 && number<max_neighbors());
587     CmiAssert(ProcessorID>=0 && ProcessorID<npes);
588     get_processor_coordinates(ProcessorID, TempCo);
589
590     int index = number/2;
591     int displacement = (number%2)? -1: 1;
592     do{
593       TempCo[index] = (TempCo[index] + displacement + Cardinality[index]) % Cardinality[index];
594       get_processor_id(TempCo, &ProcessorID);
595     } while (ProcessorID >= npes);
596     return ProcessorID;
597   }
598 public:
599   LBTopo_torus_nd(int p): LBTopology(p) /*inherited :npes(p) */ {
600     int i;
601     CmiAssert(dimension>=1 && dimension<=16);
602     CmiAssert(p>=1);
603
604     Cardinality = new int[dimension];
605     TempCo = new int[dimension];
606     double pp = p;
607     for(i=0;i<dimension;i++) {
608       Cardinality[i] = (int)ceil(pow(pp,1.0/(dimension-i))-1e-5);
609       pp = pp / Cardinality[i];
610     }
611     VirtualProcessorCount = 1;
612     for(i=0;i<dimension;i++) {
613       VirtualProcessorCount *= Cardinality[i];
614     }
615   }
616   ~LBTopo_torus_nd() {
617     delete[] Cardinality;
618     delete[] TempCo;
619   }
620   virtual int max_neighbors() {
621     return dimension*2;
622   }
623   virtual void neighbors(int mype, int* _n, int &nb) {
624     nb = 0;
625     for(int i=0;i<dimension*2;i++) {
626       _n[nb] = GetNeighborID(mype, i);
627       if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
628     }
629   }
630   virtual int get_dimension() {
631     return dimension;
632   }
633   virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
634     CmiAssert(processor_id>=0 && processor_id<VirtualProcessorCount);
635     CmiAssert( processor_coordinates != NULL );
636     for(int i=0;i<dimension;i++) {
637       processor_coordinates[i] = processor_id % Cardinality[i];
638       processor_id = processor_id / Cardinality[i];
639     }
640     return true;
641   }
642   virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
643     int i;
644     CmiAssert( processor_coordinates != NULL );
645     CmiAssert( processor_id != NULL );
646     for(i=dimension-1;i>=0;i--) 
647       CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<Cardinality[i]);
648     (*processor_id) = 0;
649     for(i=dimension-1;i>=0;i--) {
650       (*processor_id) = (*processor_id)* Cardinality[i] + processor_coordinates[i];
651     }
652     return true;
653   }
654   //Note: if abs(difference)*2 = cardinality, the difference is set to zero
655   virtual bool coordinate_difference(const int* my_coordinates, const int* target_coordinates, int* difference) { 
656 //    CmiPrintf("[%d] coordiate_difference begin\n", CkMyPe());
657     CmiAssert( my_coordinates != NULL);
658     CmiAssert( target_coordinates != NULL);
659     CmiAssert( difference != NULL);
660 //    CmiPrintf("[%d] after assert\n", CkMyPe());
661     for(int i=0;i<dimension;i++) {
662 //      CmiPrintf("[%d] coordiate_difference iteration %d\n", i);
663       difference[i] = target_coordinates[i] - my_coordinates[i];
664       if (abs(difference[i])*2 > Cardinality[i]) {
665         difference[i] += (difference[i]>0) ? -Cardinality[i] : Cardinality[i];
666       } else if (abs(difference[i])*2 == Cardinality[i]) {
667         difference[i] = 0;
668       }
669     }
670 //    CmiPrintf("[%d] coordiate_difference just before return\n");
671     return true;
672   }
673   //Note: if abs(difference)*2 = cardinality, the difference is set to zero
674   virtual bool coordinate_difference(int my_processor_id, int target_processor_id, int* difference) { 
675     CmiAssert( difference != NULL);
676     int my_coordinates[dimension];
677     int target_coordinates[dimension];
678     get_processor_coordinates(my_processor_id, my_coordinates);
679     get_processor_coordinates(target_processor_id, target_coordinates);
680     coordinate_difference(my_coordinates, target_coordinates, difference);
681     return true;
682   }
683 };
684
685 typedef LBTopo_torus_nd<1> LBTopo_torus_nd_1;
686 typedef LBTopo_torus_nd<2> LBTopo_torus_nd_2;
687 typedef LBTopo_torus_nd<3> LBTopo_torus_nd_3;
688 typedef LBTopo_torus_nd<4> LBTopo_torus_nd_4;
689 typedef LBTopo_torus_nd<5> LBTopo_torus_nd_5;
690 typedef LBTopo_torus_nd<6> LBTopo_torus_nd_6;
691 typedef LBTopo_torus_nd<7> LBTopo_torus_nd_7;
692 typedef LBTopo_torus_nd<8> LBTopo_torus_nd_8;
693 typedef LBTopo_torus_nd<9> LBTopo_torus_nd_9;
694 typedef LBTopo_torus_nd<10> LBTopo_torus_nd_10;
695
696 LBTOPO_MACRO(LBTopo_torus_nd_1)
697 LBTOPO_MACRO(LBTopo_torus_nd_2)
698 LBTOPO_MACRO(LBTopo_torus_nd_3)
699 LBTOPO_MACRO(LBTopo_torus_nd_4)
700 LBTOPO_MACRO(LBTopo_torus_nd_5)
701 LBTOPO_MACRO(LBTopo_torus_nd_6)
702 LBTOPO_MACRO(LBTopo_torus_nd_7)
703 LBTOPO_MACRO(LBTopo_torus_nd_8)
704 LBTOPO_MACRO(LBTopo_torus_nd_9)
705 LBTOPO_MACRO(LBTopo_torus_nd_10)
706
707 //  TORUS ND  and SMP Awareness
708 //  added by Yanhua Sun 
709
710 //#define YHDEBUG
711
712 template <int dimension>
713 class LBTopo_torus_nd_smp: public LBTopology {
714 private:
715   // inherited int npes;
716   int* Cardinality;
717   int  VirtualNodeCount;
718   int* TempCo;
719   int  ppn;
720   int  NumOfNodes;
721 private:
722   int GetNeighborID(int ProcessorID, int number) {
723     CmiAssert(number>=0 && number<max_neighbors());
724     CmiAssert(ProcessorID>=0 && ProcessorID<npes);
725     int neighborId; 
726     int nodeId = CmiPhysicalNodeID(ProcessorID);
727     
728     get_node_coordinates(nodeId, TempCo);
729
730     int index = number/2;
731     int displacement = (number%2)? -1: 1;
732     do{
733       TempCo[index] = (TempCo[index] + displacement + Cardinality[index]) % Cardinality[index];
734       get_node_id(TempCo, &nodeId);
735     } while (nodeId >= NumOfNodes);
736     neighborId = CmiGetFirstPeOnPhysicalNode(nodeId);
737     return neighborId;
738   }
739 public:
740   LBTopo_torus_nd_smp(int p): LBTopology(p) /*inherited :npes(p) */ {
741     int i;
742     CmiAssert(dimension>=1 && dimension<=32);
743     CmiAssert(p>=1);
744
745     ppn = CmiNumPesOnPhysicalNode(0);
746     NumOfNodes = CmiNumPhysicalNodes();
747
748     Cardinality = new int[dimension];
749     TempCo = new int[dimension];
750     double pp = NumOfNodes;
751     for(i=0;i<dimension;i++) {
752       Cardinality[i] = (int)ceil(pow(pp,1.0/(dimension-i))-1e-5);
753       pp = pp / Cardinality[i];
754     }
755     VirtualNodeCount = 1;
756     for(i=0;i<dimension;i++) {
757       VirtualNodeCount *= Cardinality[i];
758     }
759 #ifdef YHDEBUG
760     CmiPrintf(" ppn=%d, NumOfNodes=%d\n", ppn, NumOfNodes);
761 #endif
762   }
763   ~LBTopo_torus_nd_smp() {
764     delete[] Cardinality;
765     delete[] TempCo;
766   }
767   virtual int max_neighbors() {
768     return (dimension+ppn)*2;
769   }
770   virtual void neighbors(int mype, int* _n, int &nb) {
771     nb = 0;
772     int *nodePeList; 
773     int numpes;
774     int rank = CmiPhysicalRank(mype);
775     int node = CmiPhysicalNodeID(mype);
776     int _ppn_ = CmiNumPesOnPhysicalNode(node);
777     CmiGetPesOnPhysicalNode(node, &nodePeList, &numpes); 
778 #ifdef YHDEBUG
779     CmiPrintf(" PE[%d] ppn=%d, NumOfNodes=%d, rank=%d, node=%d, numpes=%d\n", mype, _ppn_, NumOfNodes, rank, node, numpes);
780 #endif   
781     for(int i=0; i<numpes; i++)
782     {
783         int _pid = nodePeList[i];
784         if(_pid != mype)
785         {
786              _n[nb] = _pid;
787              nb++;
788         }
789     }
790
791     /* for inter-node communication */
792     if(mype == CmiGetFirstPeOnPhysicalNode(node))
793     {
794         for(int j=0; j<dimension*2; j++)
795         {
796             //_n[nb] = (mype+1)%npes;//GetNeighborID(mype, j);
797             _n[nb] = GetNeighborID(mype, j);
798             /* the first processors in other nodes */
799             if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
800         }
801     }
802
803 #ifdef YHDEBUG
804   CmiPrintf(" [%d] neighbor = %d ppn=%d, NumOfNodes=%d, rank=%d, node=%d, numpes=%d\n", mype, nb, _ppn_, NumOfNodes, rank, node, numpes);
805 #endif
806   }
807   virtual int get_dimension() {
808     return dimension;
809   }
810   virtual bool get_node_coordinates(int node_id, int* node_coordinates) {
811     CmiAssert(node_id>=0 && node_id<VirtualNodeCount);
812     CmiAssert( node_coordinates != NULL );
813     for(int i=0;i<dimension;i++) {
814       node_coordinates[i] = node_id % Cardinality[i];
815       node_id = node_id / Cardinality[i];
816     }
817     return true;
818   }
819   virtual bool get_node_id(const int* node_coordinates, int* node_id) {
820     int i;
821     CmiAssert( node_coordinates != NULL );
822     CmiAssert( node_id != NULL );
823     for(i=dimension-1;i>=0;i--) 
824       CmiAssert( 0<=node_coordinates[i] && node_coordinates[i]<Cardinality[i]);
825     (*node_id) = 0;
826     for(i=dimension-1;i>=0;i--) {
827       (*node_id) = (*node_id)* Cardinality[i] + node_coordinates[i];
828     }
829     return true;
830   }
831   //Note: if abs(difference)*2 = cardinality, the difference is set to zero
832   virtual bool coordinate_difference(const int* my_coordinates, const int* target_coordinates, int* difference) { 
833     CmiAssert( my_coordinates != NULL);
834     CmiAssert( target_coordinates != NULL);
835     CmiAssert( difference != NULL);
836     for(int i=0;i<dimension;i++) {
837       difference[i] = target_coordinates[i] - my_coordinates[i];
838       if (abs(difference[i])*2 > Cardinality[i]) {
839         difference[i] += (difference[i]>0) ? -Cardinality[i] : Cardinality[i];
840       } else if (abs(difference[i])*2 == Cardinality[i]) {
841         difference[i] = 0;
842       }
843     }
844     return true;
845   }
846   //Note: if abs(difference)*2 = cardinality, the difference is set to zero
847   virtual bool coordinate_difference(int my_processor_id, int target_processor_id, int* difference) { 
848     CmiAssert( difference != NULL);
849     int my_coordinates[dimension];
850     int target_coordinates[dimension];
851     get_processor_coordinates(my_processor_id, my_coordinates);
852     get_processor_coordinates(target_processor_id, target_coordinates);
853     coordinate_difference(my_coordinates, target_coordinates, difference);
854     return true;
855   }
856 };
857
858
859 typedef LBTopo_torus_nd_smp<1> LBTopo_torus_nd_smp_1;
860 typedef LBTopo_torus_nd_smp<2> LBTopo_torus_nd_smp_2;
861 typedef LBTopo_torus_nd_smp<3> LBTopo_torus_nd_smp_3;
862 typedef LBTopo_torus_nd_smp<4> LBTopo_torus_nd_smp_4;
863 typedef LBTopo_torus_nd_smp<5> LBTopo_torus_nd_smp_5;
864 typedef LBTopo_torus_nd_smp<6> LBTopo_torus_nd_smp_6;
865 typedef LBTopo_torus_nd_smp<7> LBTopo_torus_nd_smp_7;
866 typedef LBTopo_torus_nd_smp<8> LBTopo_torus_nd_smp_8;
867 typedef LBTopo_torus_nd_smp<9> LBTopo_torus_nd_smp_9;
868 typedef LBTopo_torus_nd_smp<10> LBTopo_torus_nd_smp_10;
869
870 LBTOPO_MACRO(LBTopo_torus_nd_smp_1)
871 LBTOPO_MACRO(LBTopo_torus_nd_smp_2)
872 LBTOPO_MACRO(LBTopo_torus_nd_smp_3)
873 LBTOPO_MACRO(LBTopo_torus_nd_smp_4)
874 LBTOPO_MACRO(LBTopo_torus_nd_smp_5)
875 LBTOPO_MACRO(LBTopo_torus_nd_smp_6)
876 LBTOPO_MACRO(LBTopo_torus_nd_smp_7)
877 LBTOPO_MACRO(LBTopo_torus_nd_smp_8)
878 LBTOPO_MACRO(LBTopo_torus_nd_smp_9)
879 LBTOPO_MACRO(LBTopo_torus_nd_smp_10)
880
881
882 //Torus ND with unequal number of processors in each dimension
883 /***************************************************************/
884 template <int dimension>
885 class LBTopo_itorus_nd: public LBTopology {
886 private:
887         int *dim;
888         int *tempCoor;
889         
890 public:
891         LBTopo_itorus_nd(int p): LBTopology(p) {
892         CmiPrintf("Irregular torus created\n");
893         dim = new int[dimension];
894                 tempCoor = new int[dimension];
895
896                 int i=0;
897         char *lbcopy = strdup(_lbtopo);
898         char *ptr = strchr(lbcopy, ':');
899     if (ptr==NULL) {
900       free(lbcopy);
901       return;
902     }
903         ptr = strtok(ptr+1, ",");
904         while (ptr) {
905                         dim[i]=atoi(ptr);
906                         i++;
907                         ptr = strtok(NULL, ",");
908         }
909                 CmiAssert(dimension==i);
910                 
911                 int procs=1;
912                 for(i=0;i<dimension;i++)
913                         procs*=dim[i];
914     CmiAssert(dimension>=1 && dimension<=16);
915     CmiAssert(p>=1);
916                 CmiAssert(procs==p);
917     free(lbcopy);
918   }
919         
920   ~LBTopo_itorus_nd() {
921         delete[] dim;
922                 delete[] tempCoor;
923         }
924         
925   virtual int max_neighbors() {
926     return dimension*2;
927   }
928         
929         virtual void neighbors(int mype, int* _n, int &nb) {
930     nb = 0;
931     for(int i=0;i<dimension*2;i++) {
932       _n[nb] = GetNeighborID(mype, i);
933       if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
934     }
935   }
936
937         int GetNeighborID(int ProcessorID, int number) {
938     CmiAssert(number>=0 && number<max_neighbors());
939     CmiAssert(ProcessorID>=0 && ProcessorID<npes);
940     get_processor_coordinates(ProcessorID, tempCoor);
941
942     int index = number/2;
943     int displacement = (number%2)? -1: 1;
944    // do{
945                 tempCoor[index] = (tempCoor[index] + displacement + dim[index]) % dim[index];
946                 get_processor_id(tempCoor, &ProcessorID);
947     //} while (ProcessorID >= npes);
948     return ProcessorID;
949   }
950
951         virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
952     CmiAssert(processor_id>=0 && processor_id<npes);
953     CmiAssert(processor_coordinates != NULL );
954     for(int i=0;i<dimension;i++) {
955       processor_coordinates[i] = processor_id % dim[i];
956       processor_id = processor_id / dim[i];
957     }
958     return true;
959   }
960
961         virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
962     int i;
963     CmiAssert( processor_coordinates != NULL );
964     CmiAssert( processor_id != NULL );
965     for(i=dimension-1;i>=0;i--) 
966       CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<dim[i]);
967     (*processor_id) = 0;
968     for(i=dimension-1;i>=0;i--) {
969       (*processor_id) = (*processor_id)* dim[i] + processor_coordinates[i];
970     }
971     return true;
972   }
973 };
974
975
976 typedef LBTopo_itorus_nd<1> LBTopo_itorus_nd_1;
977 typedef LBTopo_itorus_nd<2> LBTopo_itorus_nd_2;
978 typedef LBTopo_itorus_nd<3> LBTopo_itorus_nd_3;
979 typedef LBTopo_itorus_nd<4> LBTopo_itorus_nd_4;
980 typedef LBTopo_itorus_nd<5> LBTopo_itorus_nd_5;
981 typedef LBTopo_itorus_nd<6> LBTopo_itorus_nd_6;
982 typedef LBTopo_itorus_nd<7> LBTopo_itorus_nd_7;
983
984 LBTOPO_MACRO(LBTopo_itorus_nd_1)
985 LBTOPO_MACRO(LBTopo_itorus_nd_2)
986 LBTOPO_MACRO(LBTopo_itorus_nd_3)
987 LBTOPO_MACRO(LBTopo_itorus_nd_4)
988 LBTOPO_MACRO(LBTopo_itorus_nd_5)
989 LBTOPO_MACRO(LBTopo_itorus_nd_6)
990 LBTOPO_MACRO(LBTopo_itorus_nd_7)
991
992
993 /******************************************************************/
994 //Mesh ND with unequal number of processors in each dimension
995 /***************************************************************/
996 template <int dimension>
997 class LBTopo_imesh_nd: public LBTopology {
998 private:
999         int *dim;
1000         int *tempCoor;
1001         
1002 public:
1003         LBTopo_imesh_nd(int p): LBTopology(p) {
1004         CmiPrintf("Irregular mesh created\n");
1005         dim = new int[dimension];
1006         tempCoor = new int[dimension];
1007
1008         int i=0;
1009         char *lbcopy = strdup(_lbtopo);
1010         char *ptr = strchr(lbcopy, ':');
1011         if (ptr==NULL) 
1012           {
1013             delete [] dim;
1014             delete [] tempCoor;
1015             free(lbcopy);
1016             return;
1017           }
1018         ptr = strtok(ptr+1, ",");
1019         while (ptr) {
1020           dim[i]=atoi(ptr);
1021           i++;
1022           ptr = strtok(NULL, ",");
1023         }
1024         CmiAssert(dimension==i);
1025         
1026         //char *ptr2=strchr(_lbtopo,':');
1027         //*ptr2='\0';
1028         int procs=1;
1029         for(i=0;i<dimension;i++)
1030           procs*=dim[i];
1031           CmiAssert(dimension>=1 && dimension<=16);
1032           CmiAssert(p>=1);
1033           CmiAssert(procs==p);
1034           free(lbcopy);
1035   }
1036         
1037   ~LBTopo_imesh_nd() {
1038         delete[] dim;
1039                 delete[] tempCoor;
1040         }
1041         
1042   virtual int max_neighbors() {
1043     return dimension*2;
1044   }
1045         
1046   virtual void neighbors(int mype, int* _n, int &nb) {
1047     nb = 0;
1048     for(int i=0;i<dimension*2;i++) {
1049       _n[nb] = GetNeighborID(mype, i);
1050       if (_n[nb]!=mype && (nb==0 || _n[nb-1]!=_n[nb]) ) nb++;
1051     }
1052     /*CmiPrintf("Nhs[%d]:",mype);
1053     for(int i=0;i<nb;i++)
1054       CmiPrintf("%d ",_n[i]);
1055     CmiPrintf("\n");*/
1056   }
1057
1058         int GetNeighborID(int ProcessorID, int number) {
1059         CmiAssert(number>=0 && number<max_neighbors());
1060         CmiAssert(ProcessorID>=0 && ProcessorID<npes);
1061         get_processor_coordinates(ProcessorID, tempCoor);
1062
1063         int index = number/2;
1064         int displacement = (number%2)? -1: 1;
1065    // do{
1066             if((tempCoor[index]==0 && displacement==-1) || (tempCoor[index]==dim[index]-1 && displacement==1)){
1067         }
1068         else{
1069              tempCoor[index] = (tempCoor[index] + displacement + dim[index]) % dim[index];
1070              get_processor_id(tempCoor, &ProcessorID);
1071             }
1072     //} while (ProcessorID >= npes);
1073     return ProcessorID;
1074   }
1075
1076         virtual bool get_processor_coordinates(int processor_id, int* processor_coordinates) {
1077     CmiAssert(processor_id>=0 && processor_id<npes);
1078     CmiAssert(processor_coordinates != NULL );
1079     for(int i=0;i<dimension;i++) {
1080       processor_coordinates[i] = processor_id % dim[i];
1081       processor_id = processor_id / dim[i];
1082     }
1083     return true;
1084   }
1085
1086         virtual bool get_processor_id(const int* processor_coordinates, int* processor_id) {
1087     int i;
1088     CmiAssert( processor_coordinates != NULL );
1089     CmiAssert( processor_id != NULL );
1090     for(i=dimension-1;i>=0;i--) 
1091       CmiAssert( 0<=processor_coordinates[i] && processor_coordinates[i]<dim[i]);
1092     (*processor_id) = 0;
1093     for(i=dimension-1;i>=0;i--) {
1094       (*processor_id) = (*processor_id)* dim[i] + processor_coordinates[i];
1095     }
1096     return true;
1097   }
1098 };
1099
1100
1101 typedef LBTopo_imesh_nd<1> LBTopo_imesh_nd_1;
1102 typedef LBTopo_imesh_nd<2> LBTopo_imesh_nd_2;
1103 typedef LBTopo_imesh_nd<3> LBTopo_imesh_nd_3;
1104 typedef LBTopo_imesh_nd<4> LBTopo_imesh_nd_4;
1105 typedef LBTopo_imesh_nd<5> LBTopo_imesh_nd_5;
1106 typedef LBTopo_imesh_nd<6> LBTopo_imesh_nd_6;
1107 typedef LBTopo_imesh_nd<7> LBTopo_imesh_nd_7;
1108
1109 LBTOPO_MACRO(LBTopo_imesh_nd_1)
1110 LBTOPO_MACRO(LBTopo_imesh_nd_2)
1111 LBTOPO_MACRO(LBTopo_imesh_nd_3)
1112 LBTOPO_MACRO(LBTopo_imesh_nd_4)
1113 LBTOPO_MACRO(LBTopo_imesh_nd_5)
1114 LBTOPO_MACRO(LBTopo_imesh_nd_6)
1115 LBTOPO_MACRO(LBTopo_imesh_nd_7)
1116
1117
1118 // dense graph with connectivity of square root processor number
1119
1120 LBTOPO_MACRO(LBTopo_graph)
1121
1122 int LBTopo_graph::max_neighbors()
1123 {
1124   return (int)(sqrt(1.0*CmiNumPes())+0.5);
1125 }
1126
1127 extern "C" void gengraph(int, int, int, int *, int *, int);
1128
1129 void LBTopo_graph::neighbors(int mype, int* na, int &nb)
1130 {
1131   gengraph(CmiNumPes(), (int)(sqrt(1.0*CmiNumPes())+0.5), 234, na, &nb, 0);
1132 }
1133
1134 /* add by Yanhua Aug-2010*/
1135 template <int dimension>
1136 class LBTopo_graph_nc: public LBTopology {
1137
1138 public:
1139     LBTopo_graph_nc(int p): LBTopology(p) {}
1140     int max_neighbors()
1141     {
1142         return dimension + 1; 
1143     }
1144
1145     void neighbors(int mype, int* na, int &nb)
1146     {
1147 #if CMK_NODE_QUEUE_AVAILABLE
1148         gengraph(CmiNumNodes(), dimension, 234, na, &nb, 0);
1149 #else
1150         gengraph(CmiNumPes(), dimension, 234, na, &nb, 0);
1151 #endif
1152     }
1153
1154 };
1155 typedef LBTopo_graph_nc<2> LBTopo_graph_nc_2;
1156 typedef LBTopo_graph_nc<3> LBTopo_graph_nc_3;
1157 typedef LBTopo_graph_nc<4> LBTopo_graph_nc_4;
1158 typedef LBTopo_graph_nc<5> LBTopo_graph_nc_5;
1159 typedef LBTopo_graph_nc<6> LBTopo_graph_nc_6;
1160 typedef LBTopo_graph_nc<7> LBTopo_graph_nc_7;
1161 typedef LBTopo_graph_nc<8> LBTopo_graph_nc_8;
1162 typedef LBTopo_graph_nc<9> LBTopo_graph_nc_9;
1163 typedef LBTopo_graph_nc<10> LBTopo_graph_nc_10;
1164 typedef LBTopo_graph_nc<20> LBTopo_graph_nc_20;
1165
1166 LBTOPO_MACRO(LBTopo_graph_nc_2)
1167 LBTOPO_MACRO(LBTopo_graph_nc_3)
1168 LBTOPO_MACRO(LBTopo_graph_nc_4)
1169 LBTOPO_MACRO(LBTopo_graph_nc_5)
1170 LBTOPO_MACRO(LBTopo_graph_nc_6)
1171 LBTOPO_MACRO(LBTopo_graph_nc_7)
1172 LBTOPO_MACRO(LBTopo_graph_nc_8)
1173 LBTOPO_MACRO(LBTopo_graph_nc_9)
1174 LBTOPO_MACRO(LBTopo_graph_nc_10)
1175 LBTOPO_MACRO(LBTopo_graph_nc_20)
1176
1177
1178 /* Centralized  balancer, one processor has the neighbors of all other processors, while the other ones only have one neighbor, the centralized processor */
1179
1180  
1181
1182 // complete graph
1183
1184 class LBTopo_complete: public LBTopology {
1185 public:
1186   LBTopo_complete(int p): LBTopology(p) {}
1187   int max_neighbors() {
1188     return npes - 1;
1189   }
1190   void neighbors(int mype, int* _n, int &nb) {
1191     nb = 0;
1192     for (int i=0; i<npes; i++)  if (mype != i) _n[nb++] = i;
1193   }
1194 };
1195
1196 LBTOPO_MACRO(LBTopo_complete)
1197
1198 //   k-ary tree
1199
1200 template <int k>
1201 class LBTopo_karytree: public LBTopology {
1202 public:
1203   LBTopo_karytree(int p): LBTopology(p) {}
1204   virtual int max_neighbors() {
1205     return k+1;     // parent + children
1206   }
1207   virtual void neighbors(int mype, int* _n, int &nb) {
1208     nb = 0;
1209     if (mype!=0) _n[nb++] = (mype-1)/k;
1210     int firstchild = mype*k+1;
1211     for (int i=0; i<k; i++)
1212       if (firstchild+i < npes) _n[nb++] = firstchild+i;
1213   }
1214 };
1215
1216 typedef LBTopo_karytree<2> LBTopo_2_arytree;
1217 typedef LBTopo_karytree<3> LBTopo_3_arytree;
1218 typedef LBTopo_karytree<4> LBTopo_4_arytree;
1219 typedef LBTopo_karytree<128> LBTopo_128_arytree;
1220 typedef LBTopo_karytree<512> LBTopo_512_arytree;
1221
1222 LBTOPO_MACRO(LBTopo_2_arytree)
1223 LBTOPO_MACRO(LBTopo_3_arytree)
1224 LBTOPO_MACRO(LBTopo_4_arytree)
1225 LBTOPO_MACRO(LBTopo_128_arytree)
1226 LBTOPO_MACRO(LBTopo_512_arytree)
1227
1228 //
1229
1230 class LBTopoMap {
1231 public:
1232   const char *name;
1233   LBtopoFn fn;
1234   LBTopoMap(const char *s, LBtopoFn f): name(s), fn(f) {}
1235   LBTopoMap(const LBTopoMap &p);                // You don't want to copy
1236   void operator=(const LBTopoMap &p);           // You don't want to copy
1237 };
1238
1239 class LBTopoVec {
1240     std::vector<LBTopoMap *> lbTopos;
1241 public:
1242   LBTopoVec() {
1243     // register all topos
1244     lbTopos.reserve(64);
1245     lbTopos.push_back(new LBTopoMap("ring", createLBTopo_ring));
1246     lbTopos.push_back(new LBTopoMap("torus2d", createLBTopo_torus2d));
1247     lbTopos.push_back(new LBTopoMap("torus3d", createLBTopo_torus3d));
1248     lbTopos.push_back(new LBTopoMap("mesh3d", createLBTopo_mesh3d));
1249     lbTopos.push_back(new LBTopoMap("torus_nd_1", createLBTopo_torus_nd_1));
1250     lbTopos.push_back(new LBTopoMap("torus_nd_2", createLBTopo_torus_nd_2));
1251     lbTopos.push_back(new LBTopoMap("torus_nd_3", createLBTopo_torus_nd_3));
1252     lbTopos.push_back(new LBTopoMap("torus_nd_4", createLBTopo_torus_nd_4));
1253     lbTopos.push_back(new LBTopoMap("torus_nd_5", createLBTopo_torus_nd_5));
1254     lbTopos.push_back(new LBTopoMap("torus_nd_6", createLBTopo_torus_nd_6));
1255     lbTopos.push_back(new LBTopoMap("torus_nd_7", createLBTopo_torus_nd_7));
1256     lbTopos.push_back(new LBTopoMap("torus_nd_8", createLBTopo_torus_nd_8));
1257     lbTopos.push_back(new LBTopoMap("torus_nd_9", createLBTopo_torus_nd_9));
1258     lbTopos.push_back(new LBTopoMap("torus_nd_10", createLBTopo_torus_nd_10));
1259     lbTopos.push_back(new LBTopoMap("torus_nd_smp_1", createLBTopo_torus_nd_smp_1));
1260     lbTopos.push_back(new LBTopoMap("torus_nd_smp_2", createLBTopo_torus_nd_smp_2));
1261     lbTopos.push_back(new LBTopoMap("torus_nd_smp_3", createLBTopo_torus_nd_smp_3));
1262     lbTopos.push_back(new LBTopoMap("torus_nd_smp_4", createLBTopo_torus_nd_smp_4));
1263     lbTopos.push_back(new LBTopoMap("torus_nd_smp_5", createLBTopo_torus_nd_smp_5));
1264     lbTopos.push_back(new LBTopoMap("torus_nd_smp_6", createLBTopo_torus_nd_smp_6));
1265     lbTopos.push_back(new LBTopoMap("torus_nd_smp_7", createLBTopo_torus_nd_smp_7));
1266     lbTopos.push_back(new LBTopoMap("torus_nd_smp_8", createLBTopo_torus_nd_smp_8));
1267     lbTopos.push_back(new LBTopoMap("torus_nd_smp_9", createLBTopo_torus_nd_smp_9));
1268     lbTopos.push_back(new LBTopoMap("torus_nd_smp_10", createLBTopo_torus_nd_smp_10));
1269     lbTopos.push_back(new LBTopoMap("itorus_nd_1", createLBTopo_itorus_nd_1));
1270     lbTopos.push_back(new LBTopoMap("itorus_nd_2", createLBTopo_itorus_nd_2));
1271     lbTopos.push_back(new LBTopoMap("itorus_nd_3", createLBTopo_itorus_nd_3));
1272     lbTopos.push_back(new LBTopoMap("itorus_nd_4", createLBTopo_itorus_nd_4));
1273     lbTopos.push_back(new LBTopoMap("itorus_nd_5", createLBTopo_itorus_nd_5));
1274     lbTopos.push_back(new LBTopoMap("itorus_nd_6", createLBTopo_itorus_nd_6));
1275     lbTopos.push_back(new LBTopoMap("itorus_nd_7", createLBTopo_itorus_nd_7));
1276     lbTopos.push_back(new LBTopoMap("imesh_nd_1", createLBTopo_imesh_nd_1));
1277     lbTopos.push_back(new LBTopoMap("imesh_nd_2", createLBTopo_imesh_nd_2));
1278     lbTopos.push_back(new LBTopoMap("imesh_nd_3", createLBTopo_imesh_nd_3));
1279     lbTopos.push_back(new LBTopoMap("imesh_nd_4", createLBTopo_imesh_nd_4));
1280     lbTopos.push_back(new LBTopoMap("imesh_nd_5", createLBTopo_imesh_nd_5));
1281     lbTopos.push_back(new LBTopoMap("imesh_nd_6", createLBTopo_imesh_nd_6));
1282     lbTopos.push_back(new LBTopoMap("imesh_nd_7", createLBTopo_imesh_nd_7));
1283     lbTopos.push_back(new LBTopoMap("graph", createLBTopo_graph));
1284     lbTopos.push_back(new LBTopoMap("graph_nc_2", createLBTopo_graph_nc_2));
1285     lbTopos.push_back(new LBTopoMap("graph_nc_3", createLBTopo_graph_nc_3));
1286     lbTopos.push_back(new LBTopoMap("graph_nc_4", createLBTopo_graph_nc_4));
1287     lbTopos.push_back(new LBTopoMap("graph_nc_5", createLBTopo_graph_nc_5));
1288     lbTopos.push_back(new LBTopoMap("graph_nc_6", createLBTopo_graph_nc_6));
1289     lbTopos.push_back(new LBTopoMap("graph_nc_7", createLBTopo_graph_nc_7));
1290     lbTopos.push_back(new LBTopoMap("graph_nc_8", createLBTopo_graph_nc_8));
1291     lbTopos.push_back(new LBTopoMap("graph_nc_9", createLBTopo_graph_nc_9));
1292     lbTopos.push_back(new LBTopoMap("graph_nc_10", createLBTopo_graph_nc_10));
1293     lbTopos.push_back(new LBTopoMap("graph_nc_20", createLBTopo_graph_nc_20));
1294     lbTopos.push_back(new LBTopoMap("complete", createLBTopo_complete));
1295     lbTopos.push_back(new LBTopoMap("2_arytree", createLBTopo_2_arytree));
1296     lbTopos.push_back(new LBTopoMap("3_arytree", createLBTopo_3_arytree));
1297     lbTopos.push_back(new LBTopoMap("4_arytree", createLBTopo_4_arytree));
1298     lbTopos.push_back(new LBTopoMap("128_arytree", createLBTopo_128_arytree));
1299     lbTopos.push_back(new LBTopoMap("512_arytree", createLBTopo_512_arytree));
1300     lbTopos.push_back(new LBTopoMap("smp_n_1", createLBTopo_smp_n_1));
1301     lbTopos.push_back(new LBTopoMap("smp_n_2", createLBTopo_smp_n_2));
1302     lbTopos.push_back(new LBTopoMap("smp_n_3", createLBTopo_smp_n_3));
1303     lbTopos.push_back(new LBTopoMap("smp_n_4", createLBTopo_smp_n_4));
1304     lbTopos.push_back(new LBTopoMap("smp_n_5", createLBTopo_smp_n_5));
1305     lbTopos.push_back(new LBTopoMap("smp_n_6", createLBTopo_smp_n_6));
1306     lbTopos.push_back(new LBTopoMap("smp_n_7", createLBTopo_smp_n_7));
1307     lbTopos.push_back(new LBTopoMap("smp_n_8", createLBTopo_smp_n_8));
1308     lbTopos.push_back(new LBTopoMap("smp_n_9", createLBTopo_smp_n_9));
1309     lbTopos.push_back(new LBTopoMap("smp_n_10", createLBTopo_smp_n_10));
1310   }
1311   ~LBTopoVec() {
1312     for (auto i : lbTopos)
1313       delete i;
1314   }
1315   void push_back(LBTopoMap *map) { lbTopos.push_back(map); }
1316   int length() { return lbTopos.size(); }
1317   LBTopoMap * operator[](size_t n) { return lbTopos[n]; }
1318   void print() {
1319     for (auto i : lbTopos) {
1320       CmiPrintf("  %s\n", i->name);
1321     }
1322   }
1323 };
1324
1325 static LBTopoVec* lbTopoMap;
1326 static CmiNodeLock lbTopoMapInitLock;
1327 static bool lbTopoInitialized = false;
1328
1329 void LBTopoInit() {
1330   CmiAssert(!lbTopoInitialized);
1331   lbTopoMapInitLock = CmiCreateLock();
1332   lbTopoInitialized = true;
1333 }
1334
1335 extern "C"
1336 LBtopoFn LBTopoLookup(const char *name)
1337 {
1338   // This routine is called from an LB's constructor. LBs are Groups, so their construction happens
1339   // inline on PE 0 but via the scheduler on other PEs, so we use a lock instead of a barrier here.
1340   CmiLock(lbTopoMapInitLock);
1341   if (!lbTopoMap)
1342     lbTopoMap = new LBTopoVec();
1343   CmiUnlock(lbTopoMapInitLock);
1344
1345   for (int i=0; i<lbTopoMap->length(); i++) {
1346     if (strcmp(name, (*lbTopoMap)[i]->name)==0) return (*lbTopoMap)[i]->fn;
1347   }
1348   return NULL;
1349 }
1350
1351 // C wrapper functions
1352 extern "C" void getTopoNeighbors(void *topo, int myid, int* narray, int *n)
1353 {
1354   ((LBTopology*)topo)->neighbors(myid, narray, *n);
1355 }
1356
1357 extern "C" int getTopoMaxNeighbors(void *topo)
1358 {
1359   return ((LBTopology*)topo)->max_neighbors();
1360 }
1361
1362 extern "C" void printoutTopo()
1363 {
1364   lbTopoMap->print();
1365 }
1366