bug fix: disable generate failure when not requested
authorXiang Ni <xiangni2@illinois.edu>
Sun, 31 Mar 2013 22:56:04 +0000 (17:56 -0500)
committerXiang Ni <xiangni2@illinois.edu>
Sun, 31 Mar 2013 22:56:04 +0000 (17:56 -0500)
src/ck-core/ckmemcheckpoint.C
src/libs/ck-libs/tcharm/tcharm.C

index b3aef384dadd3aa5d359f90f18fa4e355a9a98b5..1d3138b30f7bc9a7f5a1a11a640782f706cf7a0b 100644 (file)
@@ -1551,9 +1551,6 @@ void CkMemCheckPT::RollBack(){
         fflush(stdout);
       }
 
-      if(CkMyPe()==0){
-        replicaInjectFailure();
-      }
 
       CKLOCMGR_LOOP(mgr->resumeFromChkp(););
       inRestarting = 0;
@@ -1576,7 +1573,12 @@ void CkMemCheckPT::RollBack(){
         CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
       }
       //inject next failure
-      thisProxy[CkMyPe()].generateFailure();
+      if(killFlag==2){
+        if(CkMyPe()==0){
+          replicaInjectFailure();
+        }
+        thisProxy[CkMyPe()].generateFailure();
+      }
 #endif
 
 #if CK_NO_PROC_POOL
@@ -2379,7 +2381,7 @@ void CkMemCheckPT::RollBack(){
     {
 #if CMK_MEM_CHECKPOINT
       double now = CmiWallTimer();
-      if (lastPingTime > 0 && now - lastPingTime > 4 && !CkInLdb() && !CkInRestarting() && !CkInCheckpointing()) {
+      if (lastPingTime > 0 && now - lastPingTime > 2 && !CkInLdb() && !CkInRestarting() && !CkInCheckpointing()) {
         //if (lastPingTime > 0 && now - lastPingTime > 2 && !CkInLdb()) {
         int i, pe, buddy;
         // tell everyone the buddy dies
@@ -2535,9 +2537,11 @@ void CkMemCheckPT::RollBack(){
             CkDieNow();
           }else{
             //next failure
-            CProxy_CkMemCheckPT checkptMgr(ckCheckPTGroupID);
-            checkptMgr.generateFailure();
-            CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->replicaInjectFailure();
+            if(killFlag == 2){
+              CProxy_CkMemCheckPT checkptMgr(ckCheckPTGroupID);
+              checkptMgr.generateFailure();
+              CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->replicaInjectFailure();
+            }
           }
 #else 
           kill(getpid(),SIGKILL);                                               
index 664983422dbc667ad5e15c225ae9106157fefaec..03327376cc9d449f721c13e223cd0dcab7b33ec9 100644 (file)
@@ -594,6 +594,8 @@ void TCharm::ResumeFromSync(void)
 
 void TCharm::ResumeFromChkpSync(void)
 {
+
+  //CkPrintf("[%d][%d] Resume from chkp sync\n", CmiMyPartition(), CkMyPe());
   start();
 }