correct name
[charm.git] / tests / charm++ / jacobi3d-gausssiedel / jacobi3d.C
1 /*****************************************************************************
2  * $Source$
3  * $Author$
4  * $Date$
5  * $Revision$
6  *****************************************************************************/
7
8 /** \file jacobi3d.C
9  *  Author: Abhinav S Bhatele
10  *  Date Created: June 01st, 2009
11  *
12  *  This does a topological placement for a 3d jacobi.
13  *
14  *      
15  *            *****************
16  *         *               *  *
17  *   ^  *****************     *
18  *   |  *               *     *
19  *   |  *               *     *
20  *   |  *               *     *
21  *   Y  *               *     *
22  *   |  *               *     *
23  *   |  *               *     *
24  *   |  *               *  * 
25  *   ~  *****************    Z
26  *      <------ X ------> 
27  *
28  *   X: left, right --> wrap_x
29  *   Y: top, bottom --> wrap_y
30  *   Z: front, back --> wrap_z
31  */
32
33 #include "jacobi3d.decl.h"
34 #include "TopoManager.h"
35 #ifdef JACOBI_OPENMP
36 #include <omp.h>
37 #endif
38
39 #define THRESHOLD 0.01
40 // See README for documentation
41
42 /*readonly*/ CProxy_Main mainProxy;
43 /*readonly*/ int arrayDimX;
44 /*readonly*/ int arrayDimY;
45 /*readonly*/ int arrayDimZ;
46 /*readonly*/ int blockDimX;
47 /*readonly*/ int blockDimY;
48 /*readonly*/ int blockDimZ;
49
50 // specify the number of worker chares in each dimension
51 /*readonly*/ int num_chare_x;
52 /*readonly*/ int num_chare_y;
53 /*readonly*/ int num_chare_z;
54
55 /*readonly*/ int globalBarrier;
56
57 static unsigned long next = 1;
58
59 int myrand(int numpes) {
60   next = next * 1103515245 + 12345;
61   return((unsigned)(next/65536) % numpes);
62 }
63
64 // We want to wrap entries around, and because mod operator % 
65 // sometimes misbehaves on negative values. -1 maps to the highest value.
66 #define wrap_x(a)       (((a)+num_chare_x)%num_chare_x)
67 #define wrap_y(a)       (((a)+num_chare_y)%num_chare_y)
68 #define wrap_z(a)       (((a)+num_chare_z)%num_chare_z)
69
70 #define index(a, b, c)  ( (a)*(blockDimY+2)*(blockDimZ+2) + (b)*(blockDimZ+2) + (c) )
71
72 #define PRINT_FREQ      100
73 #define CKP_FREQ                100
74 #define MAX_ITER         400    
75 #define WARM_ITER               5
76 #define LEFT                    1
77 #define RIGHT                   2
78 #define TOP                     3
79 #define BOTTOM                  4
80 #define FRONT                   5
81 #define BACK                    6
82 #define DIVIDEBY7               0.14285714285714285714
83
84 double startTime;
85 double endTime;
86
87 /** \class ghostMsg
88  *
89  */
90 class ghostMsg: public CMessage_ghostMsg {
91   public:
92     int dir;
93     int height;
94     int width;
95     double* gh;
96
97     ghostMsg(int _d, int _h, int _w) : dir(_d), height(_h), width(_w) {
98     }
99 };
100
101 /** \class Main
102  *
103  */
104 class Main : public CBase_Main {
105   public:
106     CProxy_Jacobi array;
107     int iterations;
108
109     Main(CkArgMsg* m) {
110       if ( (m->argc != 3) && (m->argc != 7) ) {
111         CkPrintf("%s [array_size] [block_size]\n", m->argv[0]);
112         CkPrintf("OR %s [array_size_X] [array_size_Y] [array_size_Z] [block_size_X] [block_size_Y] [block_size_Z]\n", m->argv[0]);
113         CkAbort("Abort");
114       }
115
116       // set iteration counter to zero
117       iterations = 0;
118
119       // store the main proxy
120       mainProxy = thisProxy;
121         
122       if(m->argc == 3) {
123           arrayDimX = arrayDimY = arrayDimZ = atoi(m->argv[1]);
124           blockDimX = blockDimY = blockDimZ = atoi(m->argv[2]); 
125       }
126       else if (m->argc == 7) {
127           arrayDimX = atoi(m->argv[1]);
128           arrayDimY = atoi(m->argv[2]);
129           arrayDimZ = atoi(m->argv[3]);
130           blockDimX = atoi(m->argv[4]); 
131           blockDimY = atoi(m->argv[5]); 
132           blockDimZ = atoi(m->argv[6]);
133       }
134
135       if (arrayDimX < blockDimX || arrayDimX % blockDimX != 0)
136         CkAbort("array_size_X % block_size_X != 0!");
137       if (arrayDimY < blockDimY || arrayDimY % blockDimY != 0)
138         CkAbort("array_size_Y % block_size_Y != 0!");
139       if (arrayDimZ < blockDimZ || arrayDimZ % blockDimZ != 0)
140         CkAbort("array_size_Z % block_size_Z != 0!");
141
142       num_chare_x = arrayDimX / blockDimX;
143       num_chare_y = arrayDimY / blockDimY;
144       num_chare_z = arrayDimZ / blockDimZ;
145
146       // print info
147       CkPrintf("\nSTENCIL COMPUTATION WITH NO BARRIERS\n");
148       CkPrintf("Running Jacobi on %d processors with (%d, %d, %d) chares\n", CkNumPes(), num_chare_x, num_chare_y, num_chare_z);
149       CkPrintf("Array Dimensions: %d %d %d\n", arrayDimX, arrayDimY, arrayDimZ);
150       CkPrintf("Block Dimensions: %d %d %d\n", blockDimX, blockDimY, blockDimZ);
151
152       // Create new array of worker chares
153 #if USE_TOPOMAP
154       CProxy_JacobiMap map = CProxy_JacobiMap::ckNew(num_chare_x, num_chare_y, num_chare_z);
155       CkPrintf("Topology Mapping is being done ... \n");
156       CkArrayOptions opts(num_chare_x, num_chare_y, num_chare_z);
157       opts.setMap(map);
158       array = CProxy_Jacobi::ckNew(opts);
159 #else
160       array = CProxy_Jacobi::ckNew(num_chare_x, num_chare_y, num_chare_z);
161 #endif
162
163       TopoManager tmgr;
164       CkArray *jarr = array.ckLocalBranch();
165       int jmap[num_chare_x][num_chare_y][num_chare_z];
166
167       int hops=0, p;
168       for(int i=0; i<num_chare_x; i++)
169         for(int j=0; j<num_chare_y; j++)
170           for(int k=0; k<num_chare_z; k++) {
171             jmap[i][j][k] = jarr->procNum(CkArrayIndex3D(i, j, k));
172           }
173
174       for(int i=0; i<num_chare_x; i++)
175         for(int j=0; j<num_chare_y; j++)
176           for(int k=0; k<num_chare_z; k++) {
177             p = jmap[i][j][k];
178             hops += tmgr.getHopsBetweenRanks(p, jmap[wrap_x(i+1)][j][k]);
179             hops += tmgr.getHopsBetweenRanks(p, jmap[wrap_x(i-1)][j][k]);
180             hops += tmgr.getHopsBetweenRanks(p, jmap[i][wrap_y(j+1)][k]);
181             hops += tmgr.getHopsBetweenRanks(p, jmap[i][wrap_y(j-1)][k]);
182             hops += tmgr.getHopsBetweenRanks(p, jmap[i][j][wrap_z(k+1)]);
183             hops += tmgr.getHopsBetweenRanks(p, jmap[i][j][wrap_z(k-1)]);
184           }
185       CkPrintf("Total Hops: %d\n", hops);
186         
187 #ifdef JACOBI_OPENMP 
188       CProxy_OmpInitializer ompInit = 
189         CProxy_OmpInitializer::ckNew(4);
190 #else    
191       //Start the computation
192       start();
193 #endif
194     }
195
196     void start() {
197       startTime = CmiWallTimer();                
198       array.doStep();
199     }
200
201     void converge(double maxDiff)
202     {
203         iterations++;
204         if(maxDiff <= THRESHOLD)
205         {
206             CkPrintf("Completed %d iterations with diff %lf\n", iterations, maxDiff);
207             endTime = CmiWallTimer();
208             CkPrintf("Time elapsed per iteration: %lf  total time:%lf\n", (endTime - startTime)/(MAX_ITER-1-WARM_ITER), endTime - startTime);
209             CkExit();
210         }else
211         {
212             if(iterations % PRINT_FREQ== 0)
213                 CkPrintf("iteration %d  diff=%f\n", iterations, maxDiff); 
214                         array.doStep();
215         }
216     }
217     // Each worker reports back to here when it completes an iteration
218         void report() {
219         iterations += CKP_FREQ;
220         if (iterations < MAX_ITER) {
221 #ifdef CMK_MEM_CHECKPOINT
222             CkCallback cb (CkIndex_Jacobi::doStep(), array);
223             CkStartMemCheckpoint(cb);
224 #else
225                         array.doStep();
226 #endif
227         } else {
228             CkPrintf("Completed %d iterations\n", MAX_ITER-1);
229             endTime = CmiWallTimer();
230             CkPrintf("Time elapsed per iteration: %f\n", (endTime - startTime)/(MAX_ITER-1-WARM_ITER));
231             CkExit();
232         }
233     }
234
235 };
236
237 /** \class Jacobi
238  *
239  */
240
241 class Jacobi: public CBase_Jacobi {
242   Jacobi_SDAG_CODE
243
244   public:
245     int iterations;
246     int imsg;
247
248     double *temperature;
249         double timing,average;
250     int neighbors;
251     // Constructor, initialize values
252     Jacobi() {
253       // This call is an anachronism - the need to call __sdag_init() has been
254       // removed. We still call it here to test backward compatibility.
255       __sdag_init();
256
257       int i, j, k;
258       // allocate a three dimensional array
259       temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
260
261       for(i=0; i<blockDimX+2; ++i) {
262           for(j=0; j<blockDimY+2; ++j) {
263               for(k=0; k<blockDimZ+2; ++k) {
264                   temperature[index(i, j, k)] = 0.0;
265               }
266           } 
267       }
268
269       iterations = 0;
270       imsg = 0;
271       if(thisIndex.x==0 && thisIndex.y==0 && thisIndex.z ==0)
272           constrainBC();
273
274       usesAtSync = CmiTrue;
275       neighbors = 6;
276       if(thisIndex.x == 0) 
277           neighbors--;
278       if( thisIndex.x== num_chare_x-1)
279           neighbors--;
280       if(thisIndex.y == 0) 
281           neighbors--;
282       if( thisIndex.y== num_chare_y-1)
283           neighbors--;
284       if(thisIndex.z == 0) 
285           neighbors--;
286       if( thisIndex.z== num_chare_z-1)
287           neighbors--;
288        // CkPrintf("neighbor = %d \n", neighbors);
289     }
290
291     Jacobi(CkMigrateMessage* m): CBase_Jacobi(m) { }
292
293     ~Jacobi() { 
294         delete [] temperature; 
295     }
296
297         // Pupping function for migration and fault tolerance
298         // Condition: assuming the 3D Chare Arrays are NOT used
299         void pup(PUP::er &p){
300                 
301                 // calling parent's pup
302                 CBase_Jacobi::pup(p);
303         
304                 __sdag_pup(p);
305         
306                 // pupping properties of this class
307                 p | iterations;
308                 p | imsg;
309
310                 // if unpacking, allocate the memory space
311                 if(p.isUnpacking()){
312                         temperature = new double[(blockDimX+2) * (blockDimY+2) * (blockDimZ+2)];
313         
314                 }
315
316                 // pupping the arrays
317                 p((char *)temperature, (blockDimX+2) * (blockDimY+2) * (blockDimZ+2) * sizeof(double));
318
319         }
320
321
322     // Send ghost faces to the six neighbors
323     void begin_iteration(void) {
324         /*  
325       if (thisIndex.x == 0 && thisIndex.y == 0 && thisIndex.z == 0) {
326 //          CkPrintf("Start of iteration %d\n", iterations);
327                         if(iterations % PRINT_FREQ == 0){
328                                 average = timing;
329                                 timing = CmiWallTimer();
330                                 average = (timing - average)/(double)PRINT_FREQ;
331                                 CkPrintf("time=%.2f it=%d avg=%.4f\n",timing,iterations,average);
332                         }
333       }*/
334       iterations++;
335
336       // Copy different faces into messages
337        ghostMsg *leftMsg ;
338        ghostMsg *rightMsg; 
339        ghostMsg *topMsg ;
340        ghostMsg *bottomMsg;
341        ghostMsg *frontMsg ;
342        ghostMsg *backMsg ;
343      
344        
345       if(thisIndex.x-1 >= 0)
346       {
347           leftMsg = new (blockDimY*blockDimZ) ghostMsg(RIGHT, blockDimY, blockDimZ);
348           for(int j=0; j<blockDimY; ++j) 
349               for(int k=0; k<blockDimZ; ++k) {
350                   leftMsg->gh[k*blockDimY+j] = temperature[index(1, j+1, k+1)];
351               }
352           CkSetRefNum(leftMsg, iterations);
353           thisProxy(wrap_x(thisIndex.x-1), thisIndex.y, thisIndex.z).receiveGhosts(leftMsg);
354       }
355
356       if(thisIndex.x+1 <num_chare_x) 
357       {
358           rightMsg = new (blockDimY*blockDimZ) ghostMsg(LEFT, blockDimY, blockDimZ);
359           for(int j=0; j<blockDimY; ++j) 
360               for(int k=0; k<blockDimZ; ++k) {
361                   rightMsg->gh[k*blockDimY+j] = temperature[index(blockDimX, j+1, k+1)];}
362           CkSetRefNum(rightMsg, iterations);
363           thisProxy(wrap_x(thisIndex.x+1), thisIndex.y, thisIndex.z).receiveGhosts(rightMsg);
364       }
365
366       if(thisIndex.y-1 >= 0)
367       {
368           topMsg = new (blockDimX*blockDimZ) ghostMsg(BOTTOM, blockDimX, blockDimZ);
369           for(int i=0; i<blockDimX; ++i) 
370               for(int k=0; k<blockDimZ; ++k) {
371                   topMsg->gh[k*blockDimX+i] = temperature[index(i+1, 1, k+1)];
372               }
373
374           CkSetRefNum(topMsg, iterations);
375           thisProxy(thisIndex.x, wrap_y(thisIndex.y-1), thisIndex.z).receiveGhosts(topMsg);
376       }
377
378       if(thisIndex.y+1 <num_chare_y)
379       {
380           bottomMsg = new (blockDimX*blockDimZ) ghostMsg(TOP, blockDimX, blockDimZ);
381           for(int i=0; i<blockDimX; ++i) 
382           for(int k=0; k<blockDimZ; ++k) {
383               bottomMsg->gh[k*blockDimX+i] = temperature[index(i+1, blockDimY, k+1)];
384           }
385           CkSetRefNum(bottomMsg, iterations);
386           thisProxy(thisIndex.x, wrap_y(thisIndex.y+1), thisIndex.z).receiveGhosts(bottomMsg);
387       }
388
389       if(thisIndex.z-1 >= 0)
390       {
391           frontMsg = new (blockDimX*blockDimY) ghostMsg(BACK, blockDimX, blockDimY);
392           for(int i=0; i<blockDimX; ++i) 
393               for(int j=0; j<blockDimY; ++j) {
394                   frontMsg->gh[j*blockDimX+i] = temperature[index(i+1, j+1, 1)];
395               }
396           CkSetRefNum(frontMsg, iterations);
397           thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z-1)).receiveGhosts(frontMsg);
398       }
399       
400       if(thisIndex.z+1 <num_chare_z)
401       {
402           backMsg = new (blockDimX*blockDimY) ghostMsg(FRONT, blockDimX, blockDimY);
403           for(int i=0; i<blockDimX; ++i) 
404               for(int j=0; j<blockDimY; ++j) {
405                   backMsg->gh[j*blockDimX+i] = temperature[index(i+1, j+1, blockDimZ)];
406               }
407           CkSetRefNum(backMsg, iterations);
408           thisProxy(thisIndex.x, thisIndex.y, wrap_z(thisIndex.z+1)).receiveGhosts(backMsg);
409       }
410
411
412
413     }
414
415     void processGhosts(ghostMsg *gmsg) {
416       int height = gmsg->height;
417       int width = gmsg->width;
418
419       /*CkPrintf("[%d,%d,%d] received %d msg from %d ", thisIndex.x, thisIndex.y, thisIndex.z, height*width, gmsg->dir);
420       for(int j=0; j<height; ++j) 
421           for(int k=0; k<width; ++k) {
422           CkPrintf(" %f ", gmsg->gh[k*height+j]);
423           }
424       CkPrintf("\n\n");
425       */
426       switch(gmsg->dir) {
427       case LEFT:
428           for(int j=0; j<height; ++j) 
429               for(int k=0; k<width; ++k) {
430                   temperature[index(0, j+1, k+1)] = gmsg->gh[k*height+j];
431               }
432           break;
433       case RIGHT:
434           for(int j=0; j<height; ++j) 
435               for(int k=0; k<width; ++k) {
436                   temperature[index(blockDimX+1, j+1, k+1)] = gmsg->gh[k*height+j];
437               }
438           break;
439       case TOP:
440           for(int i=0; i<height; ++i) 
441               for(int k=0; k<width; ++k) {
442                   temperature[index(i+1, 0, k+1)] = gmsg->gh[k*height+i];
443               }
444           break;
445       case BOTTOM:
446           for(int i=0; i<height; ++i) 
447               for(int k=0; k<width; ++k) {
448                   temperature[index(i+1, blockDimY+1, k+1)] = gmsg->gh[k*height+i];
449               }
450           break;
451       case FRONT:
452           for(int i=0; i<height; ++i) 
453               for(int j=0; j<width; ++j) {
454                   temperature[index(i+1, j+1,0)] = gmsg->gh[j*height+i];
455               }
456           break;
457       case BACK:
458           for(int i=0; i<height; ++i) 
459               for(int j=0; j<width; ++j) {
460                   temperature[index(i+1, j+1, blockDimZ+1)] = gmsg->gh[j*height+i];
461               }
462           break;
463       default:
464           CkAbort("ERROR\n");
465       }
466
467       delete gmsg;
468     }
469
470
471         void check_and_compute() {
472                 double maxDiff = compute_kernel();
473
474                 // calculate error
475                 // not being done right now since we are doing a fixed no. of iterations
476
477                 //constrainBC();
478         //CkPrintf("[%d,%d,%d] max = %f\n", thisIndex.x, thisIndex.y, thisIndex.z, maxDiff);
479                 if (iterations % CKP_FREQ == 0 || iterations > MAX_ITER){
480 #ifdef CMK_MEM_CHECKPOINT
481                         contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy));
482 #elif CMK_MESSAGE_LOGGING
483                         if(iterations > MAX_ITER)
484                                 contribute(0, 0, CkReduction::concat, CkCallback(CkIndex_Main::report(), mainProxy));
485                         else
486                                 AtSync();
487 #else
488             CkCallback cb(CkReductionTarget(Main, converge), mainProxy);
489             contribute(sizeof(double), &maxDiff, CkReduction::max_double, cb);
490 #endif
491         } else {
492             CkCallback cb(CkReductionTarget(Main, converge), mainProxy);
493             contribute(sizeof(double), &maxDiff, CkReduction::max_double, cb);
494                         //doStep();
495         }
496     }
497
498         void ResumeFromSync(){
499                 doStep();
500         }
501
502
503     // Check to see if we have received all neighbor values yet
504     // If all neighbor values have been received, we update our values and proceed
505     double compute_kernel() {     //Gauss-Siedal compute
506         int i;
507         double maxDiff=0;
508         double tmp, update=0;
509         int count=1;
510   #pragma omp parallel for schedule(dynamic) 
511         for(i=1; i<blockDimX+1; ++i) {
512           //printf("[%d] did  %d iteration out of %d \n", omp_get_thread_num(), i, blockDimX+1); 
513           for(int j=1; j<blockDimY+1; ++j) {
514               for(int k=1; k<blockDimZ+1; ++k) {
515                   update = temperature[index(i, j, k)];
516                   count = 1;
517                   //CkPrintf("{%f, ", update);
518                   if(!(thisIndex.x+1>=num_chare_x&&i==blockDimX))
519                   {
520                       update += temperature[index(i+1, j, k)];
521                       count++;
522                   }
523                   if(!(thisIndex.x-1<0&&i==1))
524                   {
525                       update += temperature[index(i-1, j, k)];
526                       count++;
527                   }
528                   if(!(thisIndex.y+1>=num_chare_y&&j==blockDimY))
529                   {
530                       update += temperature[index(i, j+1, k)];
531                       count++;
532                   }
533                   if(!(thisIndex.y-1<0&&j==1))
534                   {
535                       update += temperature[index(i, j-1, k)];
536                       count++;
537                   }
538                   if(!(thisIndex.z+1>=num_chare_z&&k==blockDimZ))
539                   {
540                       update += temperature[index(i, j, k+1)];
541                       count++;
542                   }
543                   if(!(thisIndex.z-1<0&&k==1))
544                   {
545                       update += temperature[index(i, j, k-1)];
546                       count++;
547                   }
548                   update /= count;
549                   double diff = temperature[index(i, j, k)] - update;
550                   if(diff<0) diff = -1*diff;
551                   if(diff>maxDiff) maxDiff = diff;
552                   temperature[index(i, j, k)] = update;
553                   //CkPrintf(" %d:%f }", count, update);
554               }
555           }
556       }
557         //CkPrintf("[%d:%d:%d]\n\n", thisIndex.x, thisIndex.y, thisIndex.z);
558     return maxDiff;
559     }
560
561     // Enforce some boundary conditions
562     void constrainBC() {
563       
564         temperature[index(1, 1, 1)] = 255.0;
565         // Heat left, top and front faces of each chare's block
566       /*  for(int i=1; i<blockDimX+1; ++i)
567       for(int k=1; k<blockDimZ+1; ++k)
568               temperature[index(i, 1, k)] = 255.0;
569         for(int j=1; j<blockDimY+1; ++j)
570           for(int k=1; k<blockDimZ+1; ++k)
571               temperature[index(1, j, k)] = 255.0;
572       for(int i=1; i<blockDimX+1; ++i)
573           for(int j=1; j<blockDimY+1; ++j)
574               temperature[index(i, j, 1)] = 255.0; */
575     }
576
577 };
578
579 /** \class JacobiMap
580  *
581  */
582
583 class JacobiMap : public CkArrayMap {
584   public:
585     int X, Y, Z;
586     int *mapping;
587
588     JacobiMap(int x, int y, int z) {
589       X = x; Y = y; Z = z;
590       mapping = new int[X*Y*Z];
591
592       // we are assuming that the no. of chares in each dimension is a 
593       // multiple of the torus dimension
594
595       TopoManager tmgr;
596       int dimNX, dimNY, dimNZ, dimNT;
597
598       dimNX = tmgr.getDimNX();
599       dimNY = tmgr.getDimNY();
600       dimNZ = tmgr.getDimNZ();
601       dimNT = tmgr.getDimNT();
602
603       // we are assuming that the no. of chares in each dimension is a 
604       // multiple of the torus dimension
605       int numCharesPerPe = X*Y*Z/CkNumPes();
606
607       int numCharesPerPeX = X / dimNX;
608       int numCharesPerPeY = Y / dimNY;
609       int numCharesPerPeZ = Z / dimNZ;
610       int pe = 0, pes = CkNumPes();
611
612 #if USE_BLOCK_RNDMAP
613       int used[pes];
614       for(int i=0; i<pes; i++)
615         used[i] = 0;
616 #endif
617
618       if(dimNT < 2) {   // one core per node
619         if(CkMyPe()==0) CkPrintf("%d %d %d %d : %d %d %d \n", dimNX, dimNY, dimNZ, dimNT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ); 
620         for(int i=0; i<dimNX; i++)
621           for(int j=0; j<dimNY; j++)
622             for(int k=0; k<dimNZ; k++)
623             {
624 #if USE_BLOCK_RNDMAP
625               pe = myrand(pes); 
626               while(used[pe]!=0) {
627                 pe = myrand(pes); 
628               }
629               used[pe] = 1;
630 #endif
631
632               for(int ci=i*numCharesPerPeX; ci<(i+1)*numCharesPerPeX; ci++)
633                 for(int cj=j*numCharesPerPeY; cj<(j+1)*numCharesPerPeY; cj++)
634                   for(int ck=k*numCharesPerPeZ; ck<(k+1)*numCharesPerPeZ; ck++) {
635 #if USE_TOPOMAP
636                     mapping[ci*Y*Z + cj*Z + ck] = tmgr.coordinatesToRank(i, j, k);
637 #elif USE_BLOCK_RNDMAP
638                     mapping[ci*Y*Z + cj*Z + ck] = pe;
639 #endif
640                   }
641             }
642       } else {          // multiple cores per node
643         // In this case, we split the chares in the X dimension among the
644         // cores on the same node. The strange thing I figured out is that
645         // doing this in the Z dimension is not as good.
646         numCharesPerPeX /= dimNT;
647         if(CkMyPe()==0) CkPrintf("%d %d %d %d : %d %d %d \n", dimNX, dimNY, dimNZ, dimNT, numCharesPerPeX, numCharesPerPeY, numCharesPerPeZ);
648
649         for(int i=0; i<dimNX; i++)
650           for(int j=0; j<dimNY; j++)
651             for(int k=0; k<dimNZ; k++)
652               for(int l=0; l<dimNT; l++)
653                 for(int ci=(dimNT*i+l)*numCharesPerPeX; ci<(dimNT*i+l+1)*numCharesPerPeX; ci++)
654                   for(int cj=j*numCharesPerPeY; cj<(j+1)*numCharesPerPeY; cj++)
655                     for(int ck=k*numCharesPerPeZ; ck<(k+1)*numCharesPerPeZ; ck++) {
656                       mapping[ci*Y*Z + cj*Z + ck] = tmgr.coordinatesToRank(i, j, k, l);
657                     }
658       } // end of if
659
660       if(CkMyPe() == 0) CkPrintf("Map generated ... \n");
661     }
662
663     ~JacobiMap() { 
664       delete [] mapping;
665     }
666
667     int procNum(int, const CkArrayIndex &idx) {
668       int *index = (int *)idx.data();
669       return mapping[index[0]*Y*Z + index[1]*Z + index[2]]; 
670     }
671 };
672
673 class OmpInitializer : public CBase_OmpInitializer{
674 public:
675         OmpInitializer(int numThreads) {
676 #ifdef JACOBI_OPENMP
677           if (numThreads < 1) {
678             numThreads = 1; 
679           }
680           //omp_set_num_threads(numThreads);
681           if(CkMyPe() == 0)
682           {
683 #pragma omp parallel
684               { if(omp_get_thread_num() == 0 )
685                   CkPrintf("Computation loop will be parallelized"
686                " with %d OpenMP threads\n", omp_get_num_threads());
687               }
688           }
689 #endif
690           mainProxy.start();
691         }
692 };
693
694
695 #include "jacobi3d.def.h"