runtime selection of comparison strategy
authorNikhil Jain <nikhil@illinois.edu>
Wed, 9 Jan 2013 02:53:35 +0000 (20:53 -0600)
committerNikhil Jain <nikhil@illinois.edu>
Wed, 9 Jan 2013 02:53:35 +0000 (20:53 -0600)
src/ck-core/ckmemcheckpoint.C

index 42b089a3f8d5cc56442e2572c032b2c9ea4d9775..7bb9b8cf36cb33d44b056d7fc41a85db9cef4489 100644 (file)
@@ -73,6 +73,7 @@ void noopck(const char*, ...)
 //stream remote records happned only if CK_NO_PROC_POOL =1 which means the chares to pe map will change
 #define STREAMING_INFORMHOME                    1
 CpvDeclare(int, _crashedNode);
+CpvDeclare(int, use_checksum);
 CpvDeclare(int, _remoteCrashedNode);
 
 // static, so that it is accessible from Converse part
@@ -707,15 +708,18 @@ void CkMemCheckPT::startCheckpoint(){
   msg->cp_flag = 1;
   int checksum;
   {
-#if CMK_USE_CHECKSUM
-    PUP::checker p(msg->packData);
-    pupAllElements(p);
-    checksum = p.getChecksum();
-    CmiPrintf("[%d][%d] checksum %d\n",CmiMyPartition(),CkMyPe(),checksum);
-#else 
-    PUP::toMem p(msg->packData);
-    pupAllElements(p);
-#endif
+//#if CMK_USE_CHECKSUM
+    if(CpvAccess(use_checksum)){
+      PUP::checker p(msg->packData);
+      pupAllElements(p);
+      checksum = p.getChecksum();
+    }else{  
+//    CmiPrintf("[%d][%d] checksum %d\n",CmiMyPartition(),CkMyPe(),checksum);
+//#else 
+      PUP::toMem p(msg->packData);
+      pupAllElements(p);
+    }
+//#endif
   }
   pointer = CpvAccess(curPointer);
   if(CpvAccess(chkpBuf)[pointer]) delete CpvAccess(chkpBuf)[pointer];
@@ -724,39 +728,49 @@ void CkMemCheckPT::startCheckpoint(){
     CmiPrintf("[%d][%d] local checkpoint done at %lf\n",CmiMyPartition(),CkMyPe(),CmiWallTimer());
   if(CkReplicaAlive()==1){
     CpvAccess(recvdLocal) = 1;
-#if CMK_USE_CHECKSUM
+//#if CMK_USE_CHECKSUM
 //    CkCheckPTMessage * tmpMsg = (CkCheckPTMessage *)CkCopyMsg((void **)&msg);
 //    CpvAccess(localChecksum) = getChecksum((char *)(tmpMsg->packData));
 //    delete tmpMsg;
-    CpvAccess(localChecksum) = checksum;
-    char *chkpMsg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes+sizeof(int));
-    *(int *)(chkpMsg+CmiMsgHeaderSizeBytes) = CpvAccess(localChecksum);
-    CmiSetHandler(chkpMsg,recvRemoteChkpHandlerIdx);
-    CmiRemoteSyncSendAndFree(CkMyPe(),CmiMyPartition()^1,CmiMsgHeaderSizeBytes+sizeof(int),chkpMsg);
-#else
-    envelope * env = (envelope *)(UsrToEnv((CkCheckPTMessage *)CkCopyMsg((void **)&msg)));
-    CkPackMessage(&env);
-    CmiSetHandler(env,recvRemoteChkpHandlerIdx);
-    CmiRemoteSyncSendAndFree(CkMyPe(),CmiMyPartition()^1,env->getTotalsize(),(char *)env);
-#endif
+    if(CpvAccess(use_checksum)){
+      CpvAccess(localChecksum) = checksum;
+      char *chkpMsg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes+sizeof(int));
+      *(int *)(chkpMsg+CmiMsgHeaderSizeBytes) = CpvAccess(localChecksum);
+      CmiSetHandler(chkpMsg,recvRemoteChkpHandlerIdx);
+      CmiRemoteSyncSendAndFree(CkMyPe(),CmiMyPartition()^1,CmiMsgHeaderSizeBytes+sizeof(int),chkpMsg);
+    }else{
+//#else
+      envelope * env = (envelope *)(UsrToEnv((CkCheckPTMessage *)CkCopyMsg((void **)&msg)));
+      CkPackMessage(&env);
+      CmiSetHandler(env,recvRemoteChkpHandlerIdx);
+      CmiRemoteSyncSendAndFree(CkMyPe(),CmiMyPartition()^1,env->getTotalsize(),(char *)env);
+    }
+//#endif
   }
   if(CpvAccess(recvdRemote)==1){
     //compare the checkpoint 
     int size = CpvAccess(chkpBuf)[pointer]->len;
 //    CkPrintf("[%d][%d] checkpoint size %d pointer %d \n",CmiMyPartition(),CkMyPe(),size,pointer);
-#if CMK_USE_CHECKSUM
-    if(CpvAccess(localChecksum) == CpvAccess(remoteChecksum)){
-      thisProxy[CkMyPe()].doneComparison(true);
-    }
-#else
-    if(CpvAccess(buddyBuf)->len == size && compare((char *)(CpvAccess(chkpBuf)[pointer]->packData),(char *)(CpvAccess(buddyBuf)->packData))){
-      thisProxy[CkMyPe()].doneComparison(true);
-    }
-#endif
-    else{
-      //CkPrintf("[%d][%d] failed the test pointer %d \n",CmiMyPartition(),CkMyPe(),pointer);
-      thisProxy[CkMyPe()].doneComparison(false);
+//#if CMK_USE_CHECKSUM
+    if(CpvAccess(use_checksum)){
+      if(CpvAccess(localChecksum) == CpvAccess(remoteChecksum)){
+        thisProxy[CkMyPe()].doneComparison(true);
+      }
+      else{
+        //CkPrintf("[%d][%d] failed the test pointer %d \n",CmiMyPartition(),CkMyPe(),pointer);
+        thisProxy[CkMyPe()].doneComparison(false);
+      }
+    }else{
+//#else
+      if(CpvAccess(buddyBuf)->len == size && compare((char *)(CpvAccess(chkpBuf)[pointer]->packData),(char *)(CpvAccess(buddyBuf)->packData))){
+        thisProxy[CkMyPe()].doneComparison(true);
+      }
+      else{
+        //CkPrintf("[%d][%d] failed the test pointer %d \n",CmiMyPartition(),CkMyPe(),pointer);
+        thisProxy[CkMyPe()].doneComparison(false);
+      }
     }
+//#endif
     if(CkMyPe()==0)
       CmiPrintf("[%d][%d] comparison done at %lf\n",CmiMyPartition(),CkMyPe(),CmiWallTimer());
   }
@@ -1670,41 +1684,44 @@ void CkMemCheckPT::RollBack(){
     }
 
     static void recvRemoteChkpHandler(char *msg){
-#if CMK_USE_CHECKSUM
-      if(CkMyPe()==0)
-        CmiPrintf("[%d][%d] receive checksum at %lf\n",CmiMyPartition(),CkMyPe(),CmiWallTimer());
-      CpvAccess(remoteChecksum) = *(int *)(msg+CmiMsgHeaderSizeBytes);
-      CpvAccess(recvdRemote) = 1;
-      if(CpvAccess(recvdLocal)==1){
-        if(CpvAccess(remoteChecksum) == CpvAccess(localChecksum)){
-          CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(true);
-        }
-        else{
-          CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(false);
-        }
-      }
-#else
-      envelope *env = (envelope *)msg;
-      CkUnpackMessage(&env);
-      CkCheckPTMessage* chkpMsg = (CkCheckPTMessage *)(EnvToUsr(env));
-      if(CpvAccess(recvdLocal)==1){
-        int pointer = CpvAccess(curPointer);
-        int size = CpvAccess(chkpBuf)[pointer]->len;
-        if(chkpMsg->len == size && compare((char *)(CpvAccess(chkpBuf)[pointer]->packData),(char *)(chkpMsg->packData))){
-          CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(true);
-        }else
-        {
-          CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(false);
-        }
-        delete chkpMsg;
+//#if CMK_USE_CHECKSUM
+      if(CpvAccess(use_checksum)){
         if(CkMyPe()==0)
-          CmiPrintf("[%d][%d] comparison done at %lf\n",CmiMyPartition(),CkMyPe(),CmiWallTimer());
-      }else{
+          CmiPrintf("[%d][%d] receive checksum at %lf\n",CmiMyPartition(),CkMyPe(),CmiWallTimer());
+        CpvAccess(remoteChecksum) = *(int *)(msg+CmiMsgHeaderSizeBytes);
         CpvAccess(recvdRemote) = 1;
-        if(CpvAccess(buddyBuf)) delete CpvAccess(buddyBuf);
-        CpvAccess(buddyBuf) = chkpMsg;
+        if(CpvAccess(recvdLocal)==1){
+          if(CpvAccess(remoteChecksum) == CpvAccess(localChecksum)){
+            CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(true);
+          }
+          else{
+            CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(false);
+          }
+        }
+      }else{
+//#else
+        envelope *env = (envelope *)msg;
+        CkUnpackMessage(&env);
+        CkCheckPTMessage* chkpMsg = (CkCheckPTMessage *)(EnvToUsr(env));
+        if(CpvAccess(recvdLocal)==1){
+          int pointer = CpvAccess(curPointer);
+          int size = CpvAccess(chkpBuf)[pointer]->len;
+          if(chkpMsg->len == size && compare((char *)(CpvAccess(chkpBuf)[pointer]->packData),(char *)(chkpMsg->packData))){
+            CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(true);
+          }else
+          {
+            CProxy_CkMemCheckPT(ckCheckPTGroupID).ckLocalBranch()->doneComparison(false);
+          }
+          delete chkpMsg;
+          if(CkMyPe()==0)
+            CmiPrintf("[%d][%d] comparison done at %lf\n",CmiMyPartition(),CkMyPe(),CmiWallTimer());
+        }else{
+          CpvAccess(recvdRemote) = 1;
+          if(CpvAccess(buddyBuf)) delete CpvAccess(buddyBuf);
+          CpvAccess(buddyBuf) = chkpMsg;
+        }
       }
-#endif
+//#endif
     }
 
     static void replicaRecoverHandler(char *msg){
@@ -2010,6 +2027,11 @@ void CkMemCheckPT::RollBack(){
       if (CmiGetArgFlagDesc(argv, "+ftc_disk", "Double-disk Checkpointing")) {
         arg_where = CkCheckPoint_inDISK;
       }
+      CpvInitialize(int, use_checksum);
+      CpvAccess(use_checksum)=0;
+      if(CmiGetArgFlagDesc(argv, "+use_checksum", "use checksum strategy")){
+        CpvAccess(use_checksum)=1;
+      }
       // initiliazing _crashedNode variable
       CpvInitialize(int, _crashedNode);
       CpvInitialize(int, _remoteCrashedNode);