reset state for CmiReduce after crash; remove print
authorXiang Ni <xiangni2@illinois.edu>
Fri, 29 Mar 2013 16:36:55 +0000 (11:36 -0500)
committerXiang Ni <xiangni2@illinois.edu>
Fri, 29 Mar 2013 16:36:55 +0000 (11:36 -0500)
src/ck-core/ckmemcheckpoint.C
src/ck-ldb/LBDBManager.C
src/conv-core/convcore.c
src/conv-core/converse.h

index c4dc028c8dd537c689026fdc02c8c05a34bcbdc7..d678343ce6182a954c262637fe060403f31b37f3 100644 (file)
@@ -67,8 +67,8 @@ void noopck(const char*, ...)
 #endif
 
 #define CMK_CHKP_ALL           1
-#define CMK_USE_BARRIER                0
-#define CMK_USE_CHECKSUM               1
+#define CMK_USE_BARRIER                1
+#define CMK_USE_CHECKSUM               0
 
 //stream remote records happned only if CK_NO_PROC_POOL =1 which means the chares to pe map will change
 #define STREAMING_INFORMHOME                    1
@@ -1262,6 +1262,10 @@ void CkMemCheckPT::RollBack(){
         obj->flushStates();
         obj->ckJustMigrated();
       }
+
+      //reset CmiReduce
+      CmiResetReductions();
+
       // reset again
       //CpvAccess(_qd)->flushStates();
       if(CmiNumPartition()==1){
@@ -1697,9 +1701,7 @@ void CkMemCheckPT::RollBack(){
       char *restartmsg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
       CmiSetHandler(restartmsg, restartBeginHandlerIdx);
 #if CMK_USE_BARRIER
-      //CmiPrintf("before reduce\n");  
       CmiReduce(restartmsg,CmiMsgHeaderSizeBytes,doNothingMsg);
-      //CmiPrintf("after reduce\n");   
 #else
       CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
 #endif 
@@ -1878,12 +1880,12 @@ void CkMemCheckPT::RollBack(){
         CmiSetHandler(restartmsg, restartBeginHandlerIdx);
         //CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
 #if CMK_USE_BARRIER
-      //CmiPrintf("before reduce\n");  
+      CmiPrintf("[%d]before reduce proc\n", CkMyPe()); 
        if(CpvAccess(resilience)==1){
          CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
        }else
          CmiReduce(restartmsg,CmiMsgHeaderSizeBytes,doNothingMsg);
-      //CmiPrintf("after reduce\n");   
+      CmiPrintf("[%d]after reduce proc\n", CkMyPe());  
 #else
        CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
 #endif 
@@ -1910,12 +1912,12 @@ void CkMemCheckPT::RollBack(){
         CmiSetHandler(restartmsg, restartBeginHandlerIdx);
         //CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
 #if CMK_USE_BARRIER
-      //CmiPrintf("before reduce\n");  
+      CmiPrintf("[%d]before reduce array\n", CkMyPe());        
        if(CpvAccess(resilience)==1){
          CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
        }else
          CmiReduce(restartmsg,CmiMsgHeaderSizeBytes,doNothingMsg);
-      //CmiPrintf("after reduce\n");   
+      CmiPrintf("[%d]after reduce array\n", CkMyPe()); 
 #else
        CmiSyncSendAndFree(_diePE, CmiMsgHeaderSizeBytes, (char *)restartmsg);
 #endif 
index aa1bebf45848dd73f52ebcbe21fb9d85eedf76a8..35e3e575c4f420b9efaf1ca24697cc3cfb7a1f8e 100644 (file)
@@ -20,7 +20,6 @@ struct MigrateCB;
 //Called periodically-- starts next load balancing cycle
 void LBDB::batsyncer::gotoSync(void *bs)
 {
-       CkPrintf("[%d][%d] go to sync\n",CmiMyPartition(),CkMyPe());
   LBDB::batsyncer *s=(LBDB::batsyncer *)bs;
   s->db->AtLocalBarrier(s->BH);
 }
@@ -28,7 +27,6 @@ void LBDB::batsyncer::gotoSync(void *bs)
 void LBDB::batsyncer::resumeFromSync(void *bs)
 {
   LBDB::batsyncer *s=(LBDB::batsyncer *)bs;
-//  CmiPrintf("[%d] LBDB::batsyncer::resumeFromSync with %gs\n", CkMyPe(), s->period);
 
 #if 0
   double curT = CmiWallTimer();
index ccc86770b0dfabc3d57578ead4b7d48ead514f35..a7418e417da11633d05407a9a5dc7c63d2201cfe 100644 (file)
@@ -2587,6 +2587,14 @@ void CmiReductionsInit() {
   for (i=0; i<16; ++i) CpvAccess(_reduce_info)[i] = NULL;
 }
 
+void CmiResetReductions(){
+  CpvAccess(_reduce_seqID_global) = CmiReductionID_globalOffset;
+  CpvAccess(_reduce_seqID_request) = CmiReductionID_requestOffset;
+  CpvAccess(_reduce_seqID_dynamic) = CmiReductionID_dynamicOffset;
+  int i;
+  for (i=0; i<16; ++i) CpvAccess(_reduce_info)[i] = NULL;
+}
+
 /*****************************************************************************
  *
  * Multicast groups
index f156d9204b9356eb5b49efa6fcc84f73a328d76a..080f956a0648c4f32eb7586925c443a0f86faeea 100644 (file)
@@ -1121,6 +1121,8 @@ typedef CmiUInt2 CmiReductionID;
 void * CmiReduceMergeFn_random(int*, void*, void**, int);
 
 void CmiReduce(void *msg, int size, CmiReduceMergeFn mergeFn);
+
+void CmiResetReductions();
 void CmiReduceStruct(void *data, CmiReducePupFn pupFn,
                      CmiReduceMergeFn mergeFn, CmiHandler dest,
                      CmiReduceDeleteFn deleteFn);