refine failure generator
authorXiang Ni <xiangni2@illinois.edu>
Wed, 10 Apr 2013 03:43:16 +0000 (03:43 +0000)
committerXiang Ni <xiangni2@illinois.edu>
Wed, 10 Apr 2013 03:43:16 +0000 (03:43 +0000)
src/arch/mpi/machine.c
src/ck-core/ckmemcheckpoint.C
src/ck-core/init.C
src/libs/ck-libs/tcharm/tcharm.C
src/util/pup_util.C

index eae1cedb0d5ae973bd9c57b1c29996affe948059..6947317797c781a4f8964082e03a2526517c9a6a 100644 (file)
@@ -1126,11 +1126,14 @@ void CmiMachineProgressImpl() {
 
 /* ######Beginning of functions related with exiting programs###### */
 void LrtsDrainResources() {
+    if (CmiMyPe() == 0&&CmiMyPartition()==0){ 
+      printf("in drain resource\n");
+    }
 #if !CMK_SMP
-    while (!CmiAllAsyncMsgsSent()) {
+    /*while (!CmiAllAsyncMsgsSent()) {
         PumpMsgs();
         CmiReleaseSentMessages();
-    }
+    }*/
 #else
     if(Cmi_smp_mode_setting == COMM_THREAD_SEND_RECV){
         while (!MsgQueueEmpty() || !CmiAllAsyncMsgsSent()) {
@@ -1407,13 +1410,13 @@ void LrtsInit(int *argc, char ***argv, int *numNodes, int *myNodeID) {
       MPI_Recv(vals,2,MPI_INT,MPI_ANY_SOURCE,FAIL_TAG, charmComm,&sts);
       int newpe = vals[0];
       CpvAccess(_curRestartPhase) = vals[1];
-      CmiPrintf("Charm++> Spare MPI rank %d is activated for global PE %d phase %d.\n", *myNodeID, newpe,CpvAccess(_curRestartPhase));
 
       if (newpe == -1) {
           MPI_Barrier(charmComm);
           MPI_Finalize();
           exit(0);
-      }
+      }else
+       CmiPrintf("Charm++> Spare MPI rank %d is activated for global PE %d phase %d.\n", *myNodeID, newpe,CpvAccess(_curRestartPhase));
 
         /* update petorank */
       MPI_Recv(petorank, num_workpes, MPI_INT,MPI_ANY_SOURCE,FAIL_TAG,charmComm, &sts);
@@ -1953,7 +1956,7 @@ void mpi_end_spare()
     int i;
     for (i=nextrank; i<total_pes; i++) {
         int vals[2] = {-1,-1};
-        CmiPrintf("end spare send to rank %d\n",i);
+        //CmiPrintf("end spare send to rank %d\n",i);
         MPI_Send((void *)vals,2,MPI_INT,i,FAIL_TAG,charmComm);
     }
 }
@@ -2004,11 +2007,13 @@ void CkDieNow()
     CmiPrintf("[%d][%d] die now.\n",CmiMyPartition(), CmiMyPe());
     fflush(stdout);
       /* release old messages */
-    //while (!CmiAllAsyncMsgsSent()) {
-    //    PumpMsgs();
-    //    CmiReleaseSentMessages();
-    //}
+    while (!CmiAllAsyncMsgsSent()) {
+        PumpMsgs();
+        CmiReleaseSentMessages();
+    }
+    CmiPrintf("[%d][%d] die now clear msg\n",CmiMyPartition(), CmiMyPe());
     MPI_Barrier(charmComm);
+    CmiPrintf("[%d][%d] die now clear msg 1\n",CmiMyPartition(), CmiMyPe());
     MPI_Finalize();
     exit(0);
 #endif
index 3be7319ec988bb918c8936015c2eee6dd106b55b..0157a4424f94fd590477a71294296fedfdb54345 100644 (file)
@@ -80,7 +80,6 @@ CpvDeclare(int, _remoteCrashedNode);
 // static, so that it is accessible from Converse part
 int CkMemCheckPT::inRestarting = 0;
 int CkMemCheckPT::inCheckpointing = 0;
-int CkMemCheckPT::aboutToDie = 0;
 int CkMemCheckPT::replicaAlive = 1;
 int CkMemCheckPT::inLoadbalancing = 0;
 double CkMemCheckPT::startTime;
@@ -114,7 +113,6 @@ double s_alpha;
 double killTime=0.0;
 extern void killLocal(void *_dummy,double curWallTime);
 extern void sendKillNotify(void *_dummy,double curWallTime);
-extern void verifyDeadth(void *_dummy,double curWallTime);
 extern void injectSoftFailure(void *_dummy,double curWallTime);
 #endif
 
@@ -179,8 +177,6 @@ static int recoverRemoteProcDataHandlerIdx;
 static int recoverRemoteArrayDataHandlerIdx;
 static int notifyHandlerIdx;
 static int replicaDyingNotifyHandlerIdx;
-static int replicaDeadNotifyHandlerIdx;
-static int replicaDyingBroadcastHandlerIdx;
 // compute the backup processor
 // FIXME: avoid crashed processors
 #if CMK_CONVERSE_MPI
@@ -408,8 +404,8 @@ CkMemCheckPT::CkMemCheckPT(int w)
 #if CMK_CONVERSE_MPI
   void pingBuddy();
   void pingCheckHandler();
-  //CcdCallOnCondition(CcdPERIODIC_100ms,(CcdVoidFn)pingBuddy,NULL);
-  //CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
+  CcdCallOnCondition(CcdPERIODIC_100ms,(CcdVoidFn)pingBuddy,NULL);
+  CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
 #endif
   chkpTable[0] = NULL;
   chkpTable[1] = NULL;
@@ -417,7 +413,6 @@ CkMemCheckPT::CkMemCheckPT(int w)
   recvIterCount = 0;
   localDecided = false;
   softFailureInjected = false;
-  chkpCount=0;
   if(killFlag == 2){
     localSeed = failureSeed;
     softLocalSeed = failureSeed*2;
@@ -431,70 +426,75 @@ CkMemCheckPT::CkMemCheckPT(int w)
   }
 }
 
+void CkMemCheckPT::replicaInjectFailure(){
+  char * msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
+  CmiSetHandler(msg, replicaBeginFailureInjectionHandlerIdx);
+  CmiRemoteSyncSendAndFree(0,CmiMyPartition()^1,CmiMsgHeaderSizeBytes+sizeof(unsigned int),msg);
+}
+
+void CkMemCheckPT::generateFailure(){
+  int rand3 = rand_r(&localSeed);
+  double sec;
+  if(strcmp(failureDist,"E")==0)
+    sec = -log(1.0f - ((double)rand3)/(long long int)(RAND_MAX))*MTBF;
+  else if(strcmp(failureDist,"W")==0)
+    sec = alpha*pow(-log(1.0f - ((double)rand3)/(long long int)(RAND_MAX)),1/beta);
+  thisProxy[1].killAfter(sec);
+}
+
+
+void CkMemCheckPT::replicaInjectFailure(){
+  char * msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
+  CmiSetHandler(msg, replicaBeginFailureInjectionHandlerIdx);
+  CmiRemoteSyncSendAndFree(0,CmiMyPartition()^1,CmiMsgHeaderSizeBytes+sizeof(unsigned int),msg);
+}
+
+void CkMemCheckPT::generateFailure(){
+  int rand3 = rand_r(&localSeed);
+  double sec;
+  if(strcmp(failureDist,"E")==0)
+    sec = -log(1.0f - ((double)rand3)/(long long int)(RAND_MAX))*MTBF;
+  else if(strcmp(failureDist,"W")==0)
+    sec = alpha*pow(-log(1.0f - ((double)rand3)/(long long int)(RAND_MAX)),1/beta);
+  killTime = CmiWallTimer()+sec;
+  printf("[%d][%d] inject hard failure after %.6lf s (MEMCKPT)\n",CmiMyPartition(),CkMyPe(),sec);
+  CcdCallFnAfter(sendKillNotify,NULL,(sec-1)*1000);
+}
+
 void sendKillNotify(void *_dummy,double curWallTime){
   if(CkInCheckpointing()||CpvAccess(localStarted)==1||CkInRestarting()){
     //in checkpointing or restart, delaying sending the notify
+    CkPrintf("[%d][%d]in checkpointing, recheck after 0.5s at %lf\n", CmiMyPartition(), CkMyPe(), CmiWallTimer());
     CcdCallFnAfter(sendKillNotify,NULL,500);
   }else{
     CkMemCheckPT::aboutToDie =  1;
-    //send the notify to my replica, so my replica won't communicate with me until the phase is clear
-    //char * msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
-    //CmiSetHandler(msg, replicaDyingNotifyHandlerIdx);
-    //CmiRemoteSyncSendAndFree(0,CmiMyPartition()^1,CmiMsgHeaderSizeBytes,msg);
     char * msg1 = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
     CmiSetHandler(msg1, replicaDyingNotifyHandlerIdx);
     CmiRemoteSyncSendAndFree(1,CmiMyPartition()^1,CmiMsgHeaderSizeBytes,msg1);
     
     //now it can die
-    double sec = 0.001;
+    double sec = 0.01;
     if(CmiWallTimer()<killTime){
       sec=killTime-CmiWallTimer();  
     }
     CProxy_CkMemCheckPT checkptMgr(ckCheckPTGroupID);
     checkptMgr[1].killAfter(sec);
     
-    sec +=1;
-    CcdCallFnAfter(verifyDeadth,NULL,sec*1000); 
   }
 }
 
-void replicaDyingNotify(char * msg){
-  CkMemCheckPT::aboutToDie =  1;
-  CmiFree(msg);
-  //char * rmsg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
-  //CmiSetHandler(rmsg, replicaDyingBroadcastHandlerIdx);
-  //CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes, (char *)rmsg);
-  //then norify everyone
+void CkMemCheckPT::killAfter(double sec){
+  killTime = CmiWallTimer()+sec;
+  printf("[%d][%d] To be killed after %.6lf s (MEMCKPT) %lf\n",CmiMyPartition(),CkMyPe(),sec, killTime);
+  CcdCallFnAfter(killLocal,NULL,sec*1000);
 }
 
-void replicaDyingBroadcast(char * msg){
+void replicaDyingNotify(char * msg){
   CkMemCheckPT::aboutToDie =  1;
   CmiFree(msg);
 }
 
-void replicaDeadNotify(char * msg){
-  CkMemCheckPT::aboutToDie =  0;
-  CmiFree(msg);
-}
-
-void CkMemCheckPT::generateFailure(){
-  int rand3 = rand_r(&localSeed);
-  double sec;
-  if(strcmp(failureDist,"E")==0)
-    sec = -log(1.0f - ((double)rand3)/(long long int)(RAND_MAX))*MTBF;
-  else if(strcmp(failureDist,"W")==0)
-    sec = alpha*pow(-log(1.0f - ((double)rand3)/(long long int)(RAND_MAX)),1/beta);
-  killTime = CmiWallTimer()+sec;
-  printf("[%d][%d] inject hard failure after %.6lf s (MEMCKPT)\n",CmiMyPartition(),CkMyPe(),sec);
-  CcdCallFnAfter(sendKillNotify,NULL,(sec-1)*1000);
-}
-
-void CkMemCheckPT::killAfter(double sec){
-  killTime = CmiWallTimer()+sec;
-  CkPrintf("[%d][%d] To be killed after %.6lf s (MEMCKPT) %lf\n",CmiMyPartition(),CkMyPe(),sec, killTime);
-  CcdCallFnAfter(killLocal,NULL,sec*1000);
-}
-
 void CkMemCheckPT::generateSoftFailure(){
   int rand = rand_r(&softLocalSeed);
   double sec;
@@ -528,16 +528,13 @@ void CkMemCheckPT::pup(PUP::er& p)
   p|peCount;
   p|localSeed;
   p|softLocalSeed;
-  p|chkpCount;
-  p|lastChkpTime;
-  p|chkpPeriod;
   if (p.isUnpacking()) {
     recvCount = 0;
 #if CMK_CONVERSE_MPI
     void pingBuddy();
     void pingCheckHandler();
//   CcdCallOnCondition(CcdPERIODIC_100ms,(CcdVoidFn)pingBuddy,NULL);
//   CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
+    CcdCallOnCondition(CcdPERIODIC_100ms,(CcdVoidFn)pingBuddy,NULL);
+    CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
 #endif
     maxIter = -1;
     recvIterCount = 0;
@@ -589,7 +586,7 @@ void CkMemCheckPT::startChkp(){
   if(CkInCheckpointing()){
     return;
   }
-  CkPrintf("[%d][%d]start checkpoint at %lf in %lf\n",CmiMyPartition(), CkMyPe(),CmiWallTimer(),CmiWallTimer()-startTime);
+  CkPrintf("start checkpoint at %lf in %lf\n",CmiWallTimer(),CmiWallTimer()-startTime);
   CkStartMemCheckpoint(cpCallback);
 }
 
@@ -1011,13 +1008,9 @@ void CkMemCheckPT::doneBothComparison(){
   inCheckpointing = 0;
   notifyReplica = 0;
   if(CkMyPe() == 0){
-    CmiPrintf("[%d][%d] Checkpoint finished in %f seconds at %lf, checkpoint size %d, memory usage %lf, sending callback ... \n", CmiMyPartition(),CkMyPe(), CmiWallTimer()-startTime,CmiWallTimer(),size, CmiMemoryUsage()/1048576.0);
+    CmiPrintf("[%d][%d] Checkpoint finished in %f seconds at %lf, checkpoint size %d, sending callback ... \n", CmiMyPartition(),CkMyPe(), CmiWallTimer()-startTime,CmiWallTimer(),size);
   }
   CKLOCMGR_LOOP(mgr->resumeFromChkp(););//TODO wait until the replica finish the checkpoint
-  if(chkpCount!=0)
-    chkpPeriod = CmiWallTimer()-lastChkpTime;
-  chkpCount++;
-  lastChkpTime = CmiWallTimer();
 }
 
 void CkMemCheckPT::RollBack(){
@@ -1666,7 +1659,7 @@ void CkMemCheckPT::RollBack(){
       }
       if (CmiMyPe() == BuddyPE(thisFailedPe)) {
         lastPingTime = CmiWallTimer();
-//        CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
+        CcdCallOnCondition(CcdPERIODIC_1s,(CcdVoidFn)pingCheckHandler,NULL);
       }
       //inject next failure
       if(killFlag==2){
@@ -1732,7 +1725,6 @@ void CkMemCheckPT::RollBack(){
     void CkStartMemCheckpoint(CkCallback &cb)
     {
 #if CMK_MEM_CHECKPOINT
-      //only not letting the dying partition continue checkpoint
       if(CkMemCheckPT::aboutToDie&&CmiMyPartition()==0)
        return;
       CkPrintf("partition %d start checkpoint\n",CmiMyPartition());
@@ -1751,9 +1743,9 @@ void CkMemCheckPT::RollBack(){
       // store user callback and user data
       CkMemCheckPT::cpCallback = cb;
 
+
       //send to my replica that checkpoint begins 
       if(CkReplicaAlive()==1){
-        CkPrintf("[%d][%d]send checkpoint start notification to my partition\n",CmiMyPartition(), CkMyPe());
         char * msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
         CmiSetHandler(msg, replicaChkpStartHandlerIdx);
         CmiRemoteSyncSendAndFree(0,CmiMyPartition()^1,CmiMsgHeaderSizeBytes,msg);
@@ -1762,7 +1754,7 @@ void CkMemCheckPT::RollBack(){
       CProxy_CkMemCheckPT checkptMgr(ckCheckPTGroupID);
       checkptMgr.chkpLocalStart();
       // broadcast to start check pointing
-      if(CmiNumPartition()==1||(CmiNumPartition()==2&&CpvAccess(remoteStarted)==1)||(CkReplicaAlive()==0)){
+      if(CmiNumPartition()==1||(CmiNumPartition()==2&&CpvAccess(remoteStarted)==1)||CkReplicaAlive()==0){
         CProxy_CkMemCheckPT checkptMgr(ckCheckPTGroupID);
         checkptMgr.doItNow(CkMyPe());
       }
@@ -2187,13 +2179,8 @@ void CkMemCheckPT::RollBack(){
          char * msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
          CmiSetHandler(msg, replicaChkpStartHandlerIdx);
          CmiRemoteSyncSendAndFree(0,CmiMyPartition()^1,CmiMsgHeaderSizeBytes,msg);
-       }
-       {
           CkMemCheckPT::aboutToDie = 0;
-          //char * msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes);
-          //CmiSetHandler(msg, replicaDeadNotifyHandlerIdx);
-          //CmiRemoteSyncSendAndFree(0,CmiMyPartition(),CmiMsgHeaderSizeBytes,msg);
-        }
+       }
       }
     }
     // called on crashed processor
@@ -2477,7 +2464,6 @@ void CkMemCheckPT::RollBack(){
 #if CMK_MEM_CHECKPOINT
       // notify
       CkMemCheckPT::inRestarting = 1;
-      CkMemCheckPT::aboutToDie =  0;
       int diepe = *(int *)(msg+CmiMsgHeaderSizeBytes);
       notify_crash(diepe);
       // send message to crash pe to let it restart
@@ -2561,9 +2547,6 @@ void CkMemCheckPT::RollBack(){
         //for replica
         recvRemoteChkpHandlerIdx = CkRegisterHandler((CmiHandler)recvRemoteChkpHandler);
         replicaDieHandlerIdx = CkRegisterHandler((CmiHandler)replicaDieHandler);
-        replicaDyingNotifyHandlerIdx = CkRegisterHandler((CmiHandler)replicaDyingNotify);
-        replicaDeadNotifyHandlerIdx = CkRegisterHandler((CmiHandler)replicaDeadNotify);
-        replicaDyingBroadcastHandlerIdx = CkRegisterHandler((CmiHandler)replicaDyingBroadcast);
         replicaChkpStartHandlerIdx = CkRegisterHandler((CmiHandler)replicaChkpStartHandler);
         replicaDieBcastHandlerIdx = CkRegisterHandler((CmiHandler)replicaDieBcastHandler);
         replicaRecoverHandlerIdx = CkRegisterHandler((CmiHandler)replicaRecoverHandler);
@@ -2573,6 +2556,7 @@ void CkMemCheckPT::RollBack(){
         recvPhaseHandlerIdx = CkRegisterHandler((CmiHandler)recvPhaseHandler);
         recoverRemoteProcDataHandlerIdx = CkRegisterHandler((CmiHandler)recoverRemoteProcDataHandler);
         recoverRemoteArrayDataHandlerIdx = CkRegisterHandler((CmiHandler)recoverRemoteArrayDataHandler);
+        replicaDyingNotifyHandlerIdx = CkRegisterHandler((CmiHandler)replicaDyingNotify);
 
 #if CMK_CONVERSE_MPI
         pingHandlerIdx = CkRegisterHandler((CmiHandler)pingHandler);
@@ -2657,7 +2641,7 @@ void CkMemCheckPT::RollBack(){
         }else{ 
 #if CMK_CONVERSE_MPI
           printf("[%d][%d] KillLocal called at %.6lf \n",CmiMyPartition(),CkMyPe(),CmiWallTimer());          
-         CkDieNow();
+          CkDieNow();
 #else 
           kill(getpid(),SIGKILL);                                               
 #endif
@@ -2668,21 +2652,6 @@ void CkMemCheckPT::RollBack(){
         CmiAbort("kill() not supported!");
       }
 #endif
-
-      void verifyDeadth(void * _dummy, double curWallTime){
-        char *msg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes+sizeof(int));
-        *(int *)(msg+CmiMsgHeaderSizeBytes) = 1;
-        CmiSetHandler(msg, buddyDieHandlerIdx);
-        CmiSyncBroadcastAllAndFree(CmiMsgHeaderSizeBytes+sizeof(int), (char *)msg);
-        //send to everyone in the other world
-        if(CmiNumPartition()!=1){
-          char * rMsg = (char*)CmiAlloc(CmiMsgHeaderSizeBytes+sizeof(int));
-          *(int *)(rMsg+CmiMsgHeaderSizeBytes) = 1;
-          CmiSetHandler(rMsg, replicaDieHandlerIdx);
-          CmiRemoteSyncSendAndFree(CkMyPe(),CmiMyPartition()^1,CmiMsgHeaderSizeBytes+sizeof(int),(char *)rMsg);
-       }
-      }
-
       void injectSoftFailure(void *_dummy,double curWallTime){
         if(!CkInCheckpointing()&&!CkInRestarting()){
           CkPrintf("soft failure injected\n");
index fa7fc13d5fbbc25314c856d565b6e08e0e4adcf8..5c8cb718cfcef529869248ba13af21fcd1d8b705 100644 (file)
@@ -282,7 +282,7 @@ static inline void _parseCommandLineOpts(char **argv)
       //do not read the killfile if this is a restarting processor
       killFlag = 1;
       if(CmiMyPe() == 0){
-        printf("[%d] killFlag set to 1 for file %s\n",CkMyPe(),killFile);
+        CmiPrintf("[%d] killFlag set to 1 for file %s\n",CkMyPe(),killFile);
       }
     }
   }
@@ -291,14 +291,14 @@ static inline void _parseCommandLineOpts(char **argv)
   if(CmiGetArgStringDesc(argv,"+failureInject", &failureDist,"Runtime system injects failure")){
     if (CmiGetArgIntDesc(argv,"+MTBF",&MTBF,"Mean time between failures")){
       if(CmiMyPe()==0)
-        printf("Mean time between hard failures is %d\n",MTBF);
+        CmiPrintf("Mean time between hard failures is %d\n",MTBF);
       killFlag = 2;
       if(strcmp(failureDist, "E")==0){
         if(CmiMyPe()==0)
-          printf("Runtime system generates faiures according to exponential distribution\n");
+          CmiPrintf("Runtime system generates faiures according to exponential distribution\n");
       }else if(strcmp(failureDist, "W")==0){
         if(CmiMyPe()==0)
-          printf("Runtime system generates faiures according to weibull distribution\n");
+          CmiPrintf("Runtime system generates faiures according to weibull distribution\n");
         if(!CmiGetArgDoubleDesc(argv,"+shape",&beta,"shape parameter for Weibull distribution")){
           if(CmiMyPe()==0)
             CmiAbort("should provide shape parameter for Weibull distribution");
@@ -318,7 +318,7 @@ static inline void _parseCommandLineOpts(char **argv)
       }
       if (CmiGetArgIntDesc(argv,"+SMTBF",&SMTBF,"Mean time between failures")){
         if(CmiMyPe()==0)
-          printf("Mean time between soft failures is %d\n",SMTBF);
+          CmiPrintf("Mean time between soft failures is %d\n",SMTBF);
         if(strcmp(failureDist,"W")==0){
           s_alpha = SMTBF/tgammaf(1+1/beta);
         }
index 03327376cc9d449f721c13e223cd0dcab7b33ec9..4b229871cd14159947670c43743b5669e77af71e 100644 (file)
@@ -189,7 +189,7 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
   threadInfo.tProxy=CProxy_TCharm(thisArrayID);
   threadInfo.thisElement=thisIndex;
   threadInfo.numElements=initMsg->numElements;
-  if (1 || CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)) {
+  if (CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)) {
        heapBlocks=CmiIsomallocBlockListNew(tid);
   } else
        heapBlocks=0;
@@ -321,7 +321,7 @@ void TCharm::pup(PUP::er &p) {
 void TCharm::pupThread(PUP::er &pc) {
     pup_er p=(pup_er)&pc;
     checkPupMismatch(pc,5138,"before TCHARM thread");
-    if (1 || CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
+    if (CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
       CmiIsomallocBlockListPup(p,&heapBlocks,tid);
     tid = CthPup(p, tid);
     if (pc.isUnpacking()) {
index 2bd7290d058d233767837d3cb75504fe68ddefae..39c8416f44c3e5952e77e0b25dc8a50d665f502f 100644 (file)
@@ -149,56 +149,56 @@ void PUP::checker::bytes(void * p,int n,size_t itemSize,dataType t)
         case Tdouble:
           {
             double * p1;
-            double * p2;
+            //double * p2;
             p1 = (double*)p;
-            p2 = new double[n/itemSize];
-            memcpy((char *)p2,(const void *)buf,n); 
+            //p2 = new double[n/itemSize];
+            //memcpy((char *)p2,(const void *)buf,n); 
             for(int i=0;i<n/itemSize;i++){
-              if(fabs(p1[i]-p2[i])>accuracy){
-                if(result){
-                  printf("found incorrect double %e %e diff %e\n",p1[i],p2[i],(p1[i]-p2[i]));
-                }       
+              if(fabs(p1[i]-*(double *)((char *)buf+i*itemSize))>accuracy){
+    //            if(result){
+                  //printf("found incorrect double %e %e diff %e\n",p1[i],p2[i],(p1[i]-p2[i]));
+    //            }     
                 result = result && false;
            //     fault_bytes++;
               }
             }
-            delete p2;
+            //delete p2;
           }    
           break;       
         case Tint:
           {
             int * p1;
-            int * p2;
+            //int * p2;
             p1 = (int *)p;
-            p2 = new int[n/itemSize];
-            memcpy((char *)p2,(const void *)buf,n); 
+            //p2 = new int[n/itemSize];
+           // memcpy((char *)p2,(const void *)buf,n); 
             for(int i=0;i<n/itemSize;i++){
-              if(fabs(p1[i]-p2[i])>accuracy){
-                if(result)
-                  printf("found incorrect int %d %d at %d total %d\n",p1[i],p2[i],i,n/itemSize);
+              if(fabs(p1[i]-*(int *)((char *)buf+i*itemSize))>accuracy){
+  //              if(result)
+  //                printf("found incorrect int %d %d at %d total %d\n",p1[i],p2[i],i,n/itemSize);
                 result = result && false;
          //       fault_bytes++;
               }
             }
-            delete p2;
+            //delete p2;
           }
           break;
         case Tchar:
           {
             char * p1;
-            char * p2;
+            //char * p2;
             p1 = (char *)p;
-            p2 = new char[n/itemSize];
-            memcpy((char *)p2,(const void *)buf,n); 
+            //p2 = new char[n/itemSize];
+            //memcpy((char *)p2,(const void *)buf,n); 
             for(int i=0;i<n/itemSize;i++){
-              if(fabs(p1[i]-p2[i])>accuracy){
-                if(result)
-                  printf("found incorrect char %d %d at %d, total %d\n",p1[i],p2[i],i,n/itemSize);
+              if(fabs(p1[i]-*(char *)((char *)buf+i*itemSize))>accuracy){
+   //             if(result)
+   //               printf("found incorrect char %d %d at %d, total %d\n",p1[i],p2[i],i,n/itemSize);
                 result = result && false;
         //        fault_bytes++;
               }
             }
-            delete p2;
+            //delete p2;
           }
           break;
         default: