fix for migration
[charm.git] / src / libs / ck-libs / tcharm / tcharm.C
index 2f6cbe4bd8a86c606a7b52b173238ecfa239807a..aca121847d7ffba30c68799312af9a1afe2551f3 100644 (file)
@@ -5,6 +5,7 @@ Orion Sky Lawlor, olawlor@acm.org, 11/19/2001
  */
 #include "tcharm_impl.h"
 #include "tcharm.h"
+#include "ckevacuation.h"
 #include <ctype.h>
 
 #if 0 
@@ -81,7 +82,23 @@ void TCharm::procInit(void)
   char *traceLibName=NULL;
   while (CmiGetArgStringDesc(argv,"+tcharm_trace",&traceLibName,"Print each call to this library"))
       tcharm_tracelibs.addTracing(traceLibName);
-  CmiGetArgIntDesc(argv,"+tcharm_stacksize",&tcharm_stacksize,"Set the thread stack size (default 1MB)");
+  // CmiGetArgIntDesc(argv,"+tcharm_stacksize",&tcharm_stacksize,"Set the thread stack size (default 1MB)");
+  char *str;
+  if (CmiGetArgStringDesc(argv,"+tcharm_stacksize",&str,"Set the thread stack size (default 1MB)"))  {
+    if (strpbrk(str,"M")) {
+      sscanf(str, "%dM", &tcharm_stacksize);
+      tcharm_stacksize *= 1024*1024;
+    }
+    else if (strpbrk(str,"K")) {
+      sscanf(str, "%dK", &tcharm_stacksize);
+      tcharm_stacksize *= 1024;
+    }
+    else {
+      sscanf(str, "%d", &tcharm_stacksize);
+    }
+    if (CkMyPe() == 0)
+      CkPrintf("TCharm> stack size is set to %d.\n", tcharm_stacksize);
+  }
   if (CkMyPe()!=0) { //Processor 0 eats "+vp<N>" and "-vp<N>" later:
        int ignored;
        while (CmiGetArgIntDesc(argv,"-vp",&ignored,NULL)) {}
@@ -144,7 +161,6 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
   initMsg=initMsg_;
   initMsg->opts.sanityCheck();
   timeOffset=0.0;
-  threadGlobals=CtgCreate();
   if (tcharm_nothreads)
   { //Don't even make a new thread-- just use main thread
     tid=CthSelf();
@@ -156,11 +172,12 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
     } else {
       tid=CthCreateMigratable((CthVoidFn)startTCharmThread,initMsg,initMsg->opts.stackSize);
     }
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
     BgAttach(tid);
     BgUnsetStartOutOfCore();
 #endif
   }
+  threadGlobals=CtgCreate(tid);
   CtvAccessOther(tid,_curTCharm)=this;
   isStopped=true;
   resumeAfterMigration=false;
@@ -172,13 +189,15 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
   threadInfo.tProxy=CProxy_TCharm(thisArrayID);
   threadInfo.thisElement=thisIndex;
   threadInfo.numElements=initMsg->numElements;
-  if (CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
-       heapBlocks=CmiIsomallocBlockListNew();
-  else
+  if (1 || CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)) {
+       heapBlocks=CmiIsomallocBlockListNew(tid);
+  else
        heapBlocks=0;
   nUd=0;
   usesAtSync=CmiTrue;
   run();
+  CkCallback cb(CkIndex_TCharm::ResumeFromChkpSync(),thisProxy(thisIndex));
+  setChkpResumeClient(cb);
 }
 
 TCharm::TCharm(CkMigrateMessage *msg)
@@ -205,15 +224,18 @@ void checkPupMismatch(PUP::er &p,int expected,const char *where)
 void TCharm::pup(PUP::er &p) {
 //Pup superclass
   ArrayElement1D::pup(p);
-
   //BIGSIM_OOC DEBUGGING
   //if(!p.isUnpacking()){
   //  CmiPrintf("TCharm[%d] packing: ", thisIndex);
   //  CthPrintThdStack(tid);
   //}
-
+  //if(p.isUnpacking()&&CkInRestarting()){
+  if(p.isUnpacking()){
+    CkCallback cb(CkIndex_TCharm::ResumeFromChkpSync(),thisProxy(thisIndex));
+    setChkpResumeClient(cb);
+  }
   checkPupMismatch(p,5134,"before TCHARM");
-#ifdef _FAULT_MLOG_
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
     if(!isStopped){
 //      resumeAfterMigration = true;
     }
@@ -230,7 +252,7 @@ void TCharm::pup(PUP::er &p) {
 #ifndef CMK_OPTIMIZE
   DBG("Packing thread");
   if (!isStopped && !CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)){
-    if(BgOutOfCoreFlag==0) //not doing out-of-core scheduling
+    if(_BgOutOfCoreFlag==0) //not doing out-of-core scheduling
        CkAbort("Cannot pup a running thread.  You must suspend before migrating.\n");
   }    
   if (tcharm_nomig) CkAbort("Cannot migrate with the +tcharm_nomig option!\n");
@@ -299,15 +321,15 @@ void TCharm::pup(PUP::er &p) {
 void TCharm::pupThread(PUP::er &pc) {
     pup_er p=(pup_er)&pc;
     checkPupMismatch(pc,5138,"before TCHARM thread");
+    if (1 || CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
+      CmiIsomallocBlockListPup(p,&heapBlocks,tid);
     tid = CthPup(p, tid);
     if (pc.isUnpacking()) {
       CtvAccessOther(tid,_curTCharm)=this;
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
       BgAttach(tid);
 #endif
     }
-    if (CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
-      CmiIsomallocBlockListPup(p,&heapBlocks);
     threadGlobals=CtgPup(p,threadGlobals);
     checkPupMismatch(pc,5139,"after TCHARM thread");
 }
@@ -349,7 +371,9 @@ TCharm::~TCharm()
   //BIGSIM_OOC DEBUGGING
   //CmiPrintf("TCharm destructor called with heapBlocks=%p!\n", heapBlocks);
   
+#if !CMK_USE_MEMPOOL_ISOMALLOC
   if (heapBlocks) CmiIsomallocBlockListDelete(heapBlocks);
+#endif
   CthFree(tid);
   CtgFree(threadGlobals);
   delete initMsg;
@@ -357,6 +381,10 @@ TCharm::~TCharm()
 
 void TCharm::migrateTo(int destPE) {
        if (destPE==CkMyPe()) return;
+       if (CthMigratable() == 0) {
+           CkPrintf("Warning> thread migration is not supported!\n");
+            return;
+        }
        // Make sure migrateMe gets called *after* we suspend:
        thisProxy[thisIndex].migrateDelayed(destPE);
 //     resumeAfterMigration=true;
@@ -367,7 +395,7 @@ void TCharm::migrateDelayed(int destPE) {
 }
 void TCharm::ckJustMigrated(void) {
        ArrayElement::ckJustMigrated();
-#ifdef _FAULT_MLOG_
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
 //  resumeAfterMigration = true;
 #endif
        if (resumeAfterMigration) {
@@ -452,7 +480,7 @@ void TCharm::stop(void)
     we're resuming from migration!  (OSL 2003/9/23)
    */
   TCharm *dis=TCharm::get();
-#ifdef _FAULT_MLOG_ 
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) 
 /*  CpvAccess(_currentObj) = dis;
  *      printf("[%d] _currentObject set to TCharm index %d %p\n",CkMyPe(),dis->thisIndex,dis);*/
 #endif
@@ -469,12 +497,13 @@ void TCharm::start(void)
   DBG("thread resuming soon");
   //CkPrintf("TCharm[%d]::start()\n", thisIndex);
   //CmiPrintStackTrace(0);
-#ifdef _FAULT_MLOG_
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
 //CthAwakenPrio(tid, CQS_QUEUEING_BFIFO, 1, &prio);
   CthAwaken(tid);
 #else
   CthAwaken(tid);
 #endif
+  DBG("thread resuming soon");
 }
 
 //Block our thread, schedule, and come back:
@@ -497,13 +526,23 @@ void TCharm::migrate(void)
 }
 
 
+void TCharm::chkpsync(void)
+{
+#if CMK_LBDB_ON
+  DBG("going to sync");
+  AtChkpSync();
+  stop();
+#else
+  DBG("skipping sync, because there is no load balancer");
+#endif
+}
+
 void TCharm::evacuate(){
        /*
                FAULT_EVAC
        */
        //CkClearAllArrayElementsCPP();
-       if(CpvAccess(startedEvac)){
-               int nextPE = getNextPE(CkArrayIndex1D(thisIndex));
+       if(CkpvAccess(startedEvac)){
 //             resumeAfterMigration=true;
                CcdCallFnAfter((CcdVoidFn)CkEmmigrateElement, (void *)myRec, 1);
                suspend();
@@ -551,8 +590,13 @@ void TCharm::ResumeFromSync(void)
   //if(isSelfDone) return;
   //if (exitWhenDone) return; //for bigsim ooc execution
   if (!skipResume) start();
+  CkPrintf("thread ResumeFromSync\n");
 }
 
+void TCharm::ResumeFromChkpSync(void)
+{
+  start();
+}
 
 /****** TcharmClient ******/
 void TCharmClient1D::ckJustMigrated(void) {
@@ -574,20 +618,20 @@ CkArrayID TCHARM_Get_threads(void) {
 /************* Startup/Shutdown Coordination Support ************/
 
 // Useless values to reduce over:
-int vals[2]={0,1};
+int _vals[2]={0,1};
 
 //Called when we want to go to a barrier
 void TCharm::barrier(void) {
        //Contribute to a synchronizing reduction
        CkCallback cb(index_t::atBarrier(0), thisProxy[0]);
-       contribute(sizeof(vals),&vals,CkReduction::sum_int,cb);
-#if CMK_BLUEGENE_CHARM
+       contribute(sizeof(_vals),&_vals,CkReduction::sum_int,cb);
+#if CMK_BIGSIM_CHARM
         void *curLog;          // store current log in timeline
         _TRACE_BG_TLINE_END(&curLog);
-       TRACE_BG_AMPI_BREAK(NULL, "TCharm_Barrier_START", NULL, 0);
+       TRACE_BG_AMPI_BREAK(NULL, "TCharm_Barrier_START", NULL, 0, 1);
 #endif
        stop();
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
         _TRACE_BG_SET_INFO(NULL, "TCHARM_Barrier_END",  &curLog, 1);
 #endif
 }
@@ -606,7 +650,7 @@ void TCharm::done(void) {
        if (exitWhenDone) {
                //Contribute to a synchronizing reduction
                CkCallback cb(index_t::atExit(0), thisProxy[0]);
-               contribute(sizeof(vals),&vals,CkReduction::sum_int,cb);
+               contribute(sizeof(_vals),&_vals,CkReduction::sum_int,cb);
        }
        isSelfDone = true;
        stop();
@@ -722,8 +766,6 @@ CkGroupID CkCreatePropMap(void);
 
 static CProxy_TCharm TCHARM_Build_threads(TCharmInitMsg *msg)
 {
-  char *tmp;
-  char **argv=CkGetArgv();
   CkArrayOptions opts(msg->numElements);
   CkAssert(CkpvAccess(mapCreated)==1);
 
@@ -731,22 +773,28 @@ static CProxy_TCharm TCHARM_Build_threads(TCharmInitMsg *msg)
     CkPrintf("USING ConfigurableRRMap\n");
     mapID=CProxy_ConfigurableRRMap::ckNew();
   } else if(mapping==NULL){
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
     mapID=CProxy_BlockMap::ckNew();
+#else
+#if __FAULT__
+       mapID=CProxy_RRMap::ckNew();
 #else
     mapID=CkCreatePropMap();
 #endif
-  }else if(0==strcmp(mapping,"BLOCK_MAP")){
+#endif
+  } else if(0 == strcmp(mapping,"BLOCK_MAP")) {
     CkPrintf("USING BLOCK_MAP\n");
-    mapID=CProxy_BlockMap::ckNew();
-  }else if(0==strcmp(mapping,"RR_MAP")){
+    mapID = CProxy_BlockMap::ckNew();
+  } else if(0 == strcmp(mapping,"RR_MAP")) {
     CkPrintf("USING RR_MAP\n");
-    mapID=CProxy_RRMap::ckNew();
-  }else{  // "PROP_MAP" or anything else
-    mapID=CkCreatePropMap();
+    mapID = CProxy_RRMap::ckNew();
+  } else if(0 == strcmp(mapping,"MAPFILE")) {
+    CkPrintf("Reading map from file\n");
+    mapID = CProxy_ReadFileMap::ckNew();
+  } else {  // "PROP_MAP" or anything else
+    mapID = CkCreatePropMap();
   }
   opts.setMap(mapID);
-  int nElem=msg->numElements; //<- save it because msg will be deleted.
   return CProxy_TCharm::ckNew(msg,opts);
 }
 
@@ -800,8 +848,10 @@ static void checkAddress(void *data)
           if (!CmiIsomallocInRange(data))
            CkAbort("The UserData you register must be allocated on the stack!\n");
         }
-        else 
-          CkPrintf("Warning> checkAddress failed because isomalloc not supported.\n");
+        else {
+         if(CkMyPe() == 0)
+           CkPrintf("Warning> checkAddress failed because isomalloc not supported.\n");
+       }
 }
 
 /* Old "register"-based userdata: */
@@ -852,12 +902,26 @@ CDECL void TCHARM_Migrate(void)
 {
        TCHARMAPI("TCHARM_Migrate");
        if (CthMigratable() == 0) {
-          CkPrintf("Warning> thread migration is not supported!\n");
+         if(CkMyPe() == 0)
+           CkPrintf("Warning> thread migration is not supported!\n");
           return;
         }
        TCharm::get()->migrate();
 }
+
+CDECL void TCHARM_ChkpSync(void)
+{
+       TCHARMAPI("TCHARM_ChkpSync");
+       if (CthMigratable() == 0) {
+         if(CkMyPe() == 0)
+           CkPrintf("Warning> thread migration is not supported!\n");
+          return;
+        }
+       TCharm::get()->chkpsync();
+}
+
 FORTRAN_AS_C(TCHARM_MIGRATE,TCHARM_Migrate,tcharm_migrate,(void),())
+FORTRAN_AS_C(TCHARM_CHKPSYNC,TCHARM_ChkpSync,tcharm_chkpsync,(void),())
 
 CDECL void TCHARM_Async_Migrate(void)
 {
@@ -972,7 +1036,7 @@ FDECL void FTN_NAME(TCHARM_INIT,tcharm_init)(void)
 */
 /// Find this semaphore, or insert if there isn't one:
 TCharm::TCharmSemaphore *TCharm::findSema(int id) {
-       for (int s=0;s<sema.size();s++)
+       for (unsigned int s=0;s<sema.size();s++)
                if (sema[s].id==id) 
                        return &sema[s];
        sema.push_back(TCharmSemaphore(id));
@@ -981,7 +1045,7 @@ TCharm::TCharmSemaphore *TCharm::findSema(int id) {
 /// Remove this semaphore from the list
 void TCharm::freeSema(TCharmSemaphore *doomed) {
        int id=doomed->id;
-       for (int s=0;s<sema.size();s++)
+       for (unsigned int s=0;s<sema.size();s++)
                if (sema[s].id==id) {
                        sema[s]=sema[sema.length()-1];
                        sema.length()--;