Finally, a first version of parallel recovery with scalable collectives.
authorEsteban Meneses <emenese2@illinois.edu>
Sat, 7 Jul 2012 23:26:53 +0000 (18:26 -0500)
committerEsteban Meneses <emenese2@illinois.edu>
Sat, 7 Jul 2012 23:26:53 +0000 (18:26 -0500)
src/ck-core/ckcausalmlog.C
src/ck-core/ckreduction.C
src/ck-core/ckreduction.h

index 800a9843fb9f2d54d6ae96a6fb115d7470d3aede..32332075998015fcdfa67c90e9d8e5ed88159dd8 100644 (file)
@@ -2997,7 +2997,7 @@ void _sendBackLocationHandler(char *receivedMsg){
        mgr->resume(idx,pmem,CmiTrue);
        donotCountMigration=0;
        informLocationHome(gID,idx,mgr->homePe(idx),CkMyPe());
        mgr->resume(idx,pmem,CmiTrue);
        donotCountMigration=0;
        informLocationHome(gID,idx,mgr->homePe(idx),CkMyPe());
-       printf("Array element inserted at processor %d after distribution at restart ",CkMyPe());
+       printf("Array element inserted at processor %d after parallel recovery\n",CkMyPe());
        idx.print();
 
        // decrementing number of emigrant objects at reduction manager
        idx.print();
 
        // decrementing number of emigrant objects at reduction manager
@@ -3006,6 +3006,7 @@ void _sendBackLocationHandler(char *receivedMsg){
        mgr->migratableList((CkLocRec_local *)rec,eltList);
        CkReductionMgr *reductionMgr = (CkReductionMgr*)CkpvAccess(_groupTable)->find(eltList[0]->mlogData->objID.data.array.id).getObj();
        reductionMgr->decNumEmigrantRecObjs();
        mgr->migratableList((CkLocRec_local *)rec,eltList);
        CkReductionMgr *reductionMgr = (CkReductionMgr*)CkpvAccess(_groupTable)->find(eltList[0]->mlogData->objID.data.array.id).getObj();
        reductionMgr->decNumEmigrantRecObjs();
+       reductionMgr->decGCount();
 
        // checking if it has received all emigrant recovering objects
        CpvAccess(_numEmigrantRecObjs)--;
 
        // checking if it has received all emigrant recovering objects
        CpvAccess(_numEmigrantRecObjs)--;
@@ -3054,6 +3055,7 @@ void _distributedLocationHandler(char *receivedMsg){
                        // incrementing immigrant counter at reduction manager
                        CkReductionMgr *reductionMgr = (CkReductionMgr*)CkpvAccess(_groupTable)->find(eltList[i]->mlogData->objID.data.array.id).getObj();
                        reductionMgr->incNumImmigrantRecObjs();
                        // incrementing immigrant counter at reduction manager
                        CkReductionMgr *reductionMgr = (CkReductionMgr*)CkpvAccess(_groupTable)->find(eltList[i]->mlogData->objID.data.array.id).getObj();
                        reductionMgr->incNumImmigrantRecObjs();
+                       reductionMgr->decGCount();
 
                        eltList[i]->ResumeFromSync();
                }
 
                        eltList[i]->ResumeFromSync();
                }
index d9d928b797b5d37f0babaa9187b278fdd0d786c4..48a174e61b54406ad596a1b72d6abdf9c51e366f 100644 (file)
@@ -679,7 +679,8 @@ void CkReductionMgr::finishReduction(void)
 #endif
 
 #if GROUP_LEVEL_REDUCTION
 #endif
 
 #if GROUP_LEVEL_REDUCTION
-  if (nRemote<treeKids()) return;//Need more remote messages
+  if (nRemote<treeKids())  return;//Need more remote messages
+       
 #endif
  
   DEBR((AA"Reducing data... %d %d\n"AB,nContrib,(lcount+adj(redNo).lcount)));
 #endif
  
   DEBR((AA"Reducing data... %d %d\n"AB,nContrib,(lcount+adj(redNo).lcount)));
@@ -693,7 +694,7 @@ void CkReductionMgr::finishReduction(void)
     DEBR((AA"Passing reduced data up to parent node %d.\n"AB,treeParent()));
     DEBR((AA"Message gcount is %d+%d+%d.\n"AB,result->gcount,gcount,adj(redNo).gcount));
 #if (defined(_FAULT_CAUSAL_))
     DEBR((AA"Passing reduced data up to parent node %d.\n"AB,treeParent()));
     DEBR((AA"Message gcount is %d+%d+%d.\n"AB,result->gcount,gcount,adj(redNo).gcount));
 #if (defined(_FAULT_CAUSAL_))
-    result->gcount+=gcount+adj(redNo).gcount-numImmigrantRecObjs;
+    result->gcount+=gcount+adj(redNo).gcount;
 #else
     result->gcount+=gcount+adj(redNo).gcount;
 #endif
 #else
     result->gcount+=gcount+adj(redNo).gcount;
 #endif
index f8c91416bb2a9d07741bc38c6dfab62154b30144..d4b06851510432f54155bccd1d2494608207b62f 100644 (file)
@@ -545,6 +545,7 @@ public:
        int getGCount(){return gcount;};
         static void sanitycheck();
 #if defined(_FAULT_CAUSAL_)
        int getGCount(){return gcount;};
         static void sanitycheck();
 #if defined(_FAULT_CAUSAL_)
+       void decGCount(){gcount--;}
        void incNumImmigrantRecObjs(){
                numImmigrantRecObjs++;
        }
        void incNumImmigrantRecObjs(){
                numImmigrantRecObjs++;
        }