Add the option of block mapping in the SMP node level for the default mapping of...
authorChao Mei <chaomei2@hopper02.(none)>
Thu, 10 May 2012 03:19:26 +0000 (20:19 -0700)
committerChao Mei <chaomei2@hopper02.(none)>
Thu, 10 May 2012 03:19:26 +0000 (20:19 -0700)
src/ck-core/cklocation.C
src/ck-core/init.C

index 97b34ef20826ac937202e0f15f6e4df7cf9ae6c1..0d49f64a8354af5f2930e7ba82a4ad085d7f2911 100644 (file)
@@ -55,7 +55,8 @@ static const char *idx2str(const CkArrayMessage *m) {
 #   define DEBUG(x)   /**/
 #endif
 
-
+//whether to use block mapping in the SMP node level
+bool useNodeBlkMapping;
 
 #if CMK_LBDB_ON
 /*LBDB object handles are fixed-sized, and not necc.
@@ -232,6 +233,12 @@ public:
   int _numFirstSet;            /* _remChares X (_binSize + 1) -- number of
                                   chares in the first set */
 
+  int _nBinSizeFloor;           /* floor of numChares/numNodes */
+  int _nRemChares;              /* numChares % numNodes -- equals the number of
+                                   nodes in the first set */
+  int _nNumFirstSet;            /* _remChares X (_binSize + 1) -- number of
+                                   chares in the first set of nodes */
+
   /** All processors are divided into two sets. Processors in the first set
    *  have one chare more than the processors in the second set. */
 
@@ -246,6 +253,8 @@ public:
   void compute_binsize()
   {
     int numPes = CkNumPes();
+    //Now assuming homogenous nodes where each node has the same number of PEs
+    int numNodes = CkNumNodes();
 
     if (_nelems.nInts == 1) {
       _numChares = _nelems.data()[0];
@@ -259,6 +268,10 @@ public:
     _binSizeFloor = (int)floor((double)_numChares/(double)numPes);
     _binSizeCeil = (int)ceil((double)_numChares/(double)numPes);
     _numFirstSet = _remChares * (_binSizeFloor + 1);
+
+    _nRemChares = _numChares % numNodes;
+    _nBinSizeFloor = _numChares/numNodes;
+    _nNumFirstSet = _nRemChares * (_nBinSizeFloor +1);
   }
 
   void pup(PUP::er& p){
@@ -268,8 +281,11 @@ public:
     p|_numChares;
     p|_remChares;
     p|_numFirstSet;
+    p|_nRemChares;
+    p|_nBinSizeFloor;
+    p|_nNumFirstSet;
   }
-};
+}c;
 
 
 /**
@@ -317,6 +333,31 @@ public:
     }
 #endif
 
+    if(useNodeBlkMapping){
+      if(flati < amaps[arrayHdl]->_numChares){
+        int numCharesOnNode = amaps[arrayHdl]->_nBinSizeFloor;
+        int startNodeID, offsetInNode;
+        if(flati < amaps[arrayHdl]->_nNumFirstSet){
+          numCharesOnNode++;
+          startNodeID = flati/numCharesOnNode;
+          offsetInNode = flati%numCharesOnNode;
+        }else{
+          startNodeID = amaps[arrayHdl]->_nRemChares+(flati-amaps[arrayHdl]->_nNumFirstSet)/numCharesOnNode;
+          offsetInNode = (flati-amaps[arrayHdl]->_nNumFirstSet)%numCharesOnNode;
+        }
+        int nodeSize = CkMyNodeSize(); //assuming every node has same number of PEs
+        int elemsPerPE = numCharesOnNode/nodeSize;
+        int remElems = numCharesOnNode%nodeSize;
+        int firstSetPEs = remElems*(elemsPerPE+1);
+        if(offsetInNode<firstSetPEs){
+          return CkNodeFirst(startNodeID)+offsetInNode/(elemsPerPE+1);
+        }else{
+          return CkNodeFirst(startNodeID)+remElems+(offsetInNode-firstSetPEs)/elemsPerPE;
+        }
+      } else
+          return (flati % CkNumPes());
+    }
+    //regular PE-based block mapping
     if(flati < amaps[arrayHdl]->_numFirstSet)
       return (flati / (amaps[arrayHdl]->_binSizeFloor + 1));
     else if (flati < amaps[arrayHdl]->_numChares)
index be1f834150816d532163da5f9addd3a4b58f050c..23ea9d9b6c96f183ed9f5331666ad0b2c4a8c8b0 100644 (file)
@@ -222,6 +222,8 @@ static int _raiseEvac=0;
 static char *_raiseEvacFile;
 void processRaiseEvacFile(char *raiseEvacFile);
 
+extern bool useNodeBlkMapping;
+
 static inline void _parseCommandLineOpts(char **argv)
 {
   if (CmiGetArgFlagDesc(argv,"+cs", "Print extensive statistics at shutdown"))
@@ -329,6 +331,11 @@ static inline void _parseCommandLineOpts(char **argv)
          _isStaticInsertion = true;
        }
 
+        useNodeBlkMapping = false;
+        if (CmiGetArgFlagDesc(argv,"+useNodeBlkMapping","Array elements are block-mapped in SMP-node level")) {
+          useNodeBlkMapping = true;
+        }
+
 #if ! CMK_WITH_CONTROLPOINT
        // Display a warning if charm++ wasn't compiled with control point support but user is expecting it
        if( CmiGetArgFlag(argv,"+CPSamplePeriod") ||