fix for migration
[charm.git] / src / libs / ck-libs / tcharm / tcharm.C
index 344e2608d299822852b9a430338cf1bed6275f0f..aca121847d7ffba30c68799312af9a1afe2551f3 100644 (file)
@@ -5,6 +5,7 @@ Orion Sky Lawlor, olawlor@acm.org, 11/19/2001
  */
 #include "tcharm_impl.h"
 #include "tcharm.h"
+#include "ckevacuation.h"
 #include <ctype.h>
 
 #if 0 
@@ -81,7 +82,23 @@ void TCharm::procInit(void)
   char *traceLibName=NULL;
   while (CmiGetArgStringDesc(argv,"+tcharm_trace",&traceLibName,"Print each call to this library"))
       tcharm_tracelibs.addTracing(traceLibName);
-  CmiGetArgIntDesc(argv,"+tcharm_stacksize",&tcharm_stacksize,"Set the thread stack size (default 1MB)");
+  // CmiGetArgIntDesc(argv,"+tcharm_stacksize",&tcharm_stacksize,"Set the thread stack size (default 1MB)");
+  char *str;
+  if (CmiGetArgStringDesc(argv,"+tcharm_stacksize",&str,"Set the thread stack size (default 1MB)"))  {
+    if (strpbrk(str,"M")) {
+      sscanf(str, "%dM", &tcharm_stacksize);
+      tcharm_stacksize *= 1024*1024;
+    }
+    else if (strpbrk(str,"K")) {
+      sscanf(str, "%dK", &tcharm_stacksize);
+      tcharm_stacksize *= 1024;
+    }
+    else {
+      sscanf(str, "%d", &tcharm_stacksize);
+    }
+    if (CkMyPe() == 0)
+      CkPrintf("TCharm> stack size is set to %d.\n", tcharm_stacksize);
+  }
   if (CkMyPe()!=0) { //Processor 0 eats "+vp<N>" and "-vp<N>" later:
        int ignored;
        while (CmiGetArgIntDesc(argv,"-vp",&ignored,NULL)) {}
@@ -110,21 +127,31 @@ void TCHARM_Api_trace(const char *routineName,const char *libraryName)
        CmiPrintf("\n");
 }
 
-#if CMK_TCHARM_FNPTR_HACK
-CDECL void AMPI_threadstart(void *data);
-#endif
+// register thread start functions to get a function handler
+// this is portable across heterogeneous platforms, or on machines with
+// random stack/function pointer
+
+static CkVec<TCHARM_Thread_data_start_fn> threadFnTable;
+
+int TCHARM_Register_thread_function(TCHARM_Thread_data_start_fn fn)
+{
+  int idx = threadFnTable.size();
+  threadFnTable.push_back(fn);
+  return idx+1;                     // make 0 invalid number
+}
+
+TCHARM_Thread_data_start_fn getTCharmThreadFunction(int idx)
+{
+  CmiAssert(idx > 0);
+  return threadFnTable[idx-1];
+}
 
 static void startTCharmThread(TCharmInitMsg *msg)
 {
        DBGX("thread started");
        TCharm::activateThread();
-       typedef void (*threadFn_t)(void *);
-
-#if CMK_TCHARM_FNPTR_HACK
-       ((threadFn_t)AMPI_threadstart)(msg->data);
-#else
-       ((threadFn_t)msg->threadFn)(msg->data);
-#endif
+       TCHARM_Thread_data_start_fn threadFn = getTCharmThreadFunction(msg->threadFn);
+       threadFn(msg->data);
        TCharm::deactivateThread();
        CtvAccess(_curTCharm)->done();
 }
@@ -134,7 +161,6 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
   initMsg=initMsg_;
   initMsg->opts.sanityCheck();
   timeOffset=0.0;
-  threadGlobals=CtgCreate();
   if (tcharm_nothreads)
   { //Don't even make a new thread-- just use main thread
     tid=CthSelf();
@@ -146,11 +172,12 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
     } else {
       tid=CthCreateMigratable((CthVoidFn)startTCharmThread,initMsg,initMsg->opts.stackSize);
     }
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
     BgAttach(tid);
     BgUnsetStartOutOfCore();
 #endif
   }
+  threadGlobals=CtgCreate(tid);
   CtvAccessOther(tid,_curTCharm)=this;
   isStopped=true;
   resumeAfterMigration=false;
@@ -162,13 +189,15 @@ TCharm::TCharm(TCharmInitMsg *initMsg_)
   threadInfo.tProxy=CProxy_TCharm(thisArrayID);
   threadInfo.thisElement=thisIndex;
   threadInfo.numElements=initMsg->numElements;
-  if (CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
-       heapBlocks=CmiIsomallocBlockListNew();
-  else
+  if (1 || CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)) {
+       heapBlocks=CmiIsomallocBlockListNew(tid);
+  else
        heapBlocks=0;
   nUd=0;
   usesAtSync=CmiTrue;
   run();
+  CkCallback cb(CkIndex_TCharm::ResumeFromChkpSync(),thisProxy(thisIndex));
+  setChkpResumeClient(cb);
 }
 
 TCharm::TCharm(CkMigrateMessage *msg)
@@ -195,14 +224,23 @@ void checkPupMismatch(PUP::er &p,int expected,const char *where)
 void TCharm::pup(PUP::er &p) {
 //Pup superclass
   ArrayElement1D::pup(p);
-
   //BIGSIM_OOC DEBUGGING
   //if(!p.isUnpacking()){
   //  CmiPrintf("TCharm[%d] packing: ", thisIndex);
   //  CthPrintThdStack(tid);
   //}
-
+  //if(p.isUnpacking()&&CkInRestarting()){
+  if(p.isUnpacking()){
+    CkCallback cb(CkIndex_TCharm::ResumeFromChkpSync(),thisProxy(thisIndex));
+    setChkpResumeClient(cb);
+  }
   checkPupMismatch(p,5134,"before TCHARM");
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
+    if(!isStopped){
+//      resumeAfterMigration = true;
+    }
+    isStopped = true;
+#endif
   p(isStopped); p(resumeAfterMigration); p(exitWhenDone); p(isSelfDone); p(skipResume);
   p(threadInfo.thisElement);
   p(threadInfo.numElements);
@@ -214,7 +252,7 @@ void TCharm::pup(PUP::er &p) {
 #ifndef CMK_OPTIMIZE
   DBG("Packing thread");
   if (!isStopped && !CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC)){
-    if(BgOutOfCoreFlag==0) //not doing out-of-core scheduling
+    if(_BgOutOfCoreFlag==0) //not doing out-of-core scheduling
        CkAbort("Cannot pup a running thread.  You must suspend before migrating.\n");
   }    
   if (tcharm_nomig) CkAbort("Cannot migrate with the +tcharm_nomig option!\n");
@@ -283,15 +321,15 @@ void TCharm::pup(PUP::er &p) {
 void TCharm::pupThread(PUP::er &pc) {
     pup_er p=(pup_er)&pc;
     checkPupMismatch(pc,5138,"before TCHARM thread");
+    if (1 || CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
+      CmiIsomallocBlockListPup(p,&heapBlocks,tid);
     tid = CthPup(p, tid);
     if (pc.isUnpacking()) {
       CtvAccessOther(tid,_curTCharm)=this;
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
       BgAttach(tid);
 #endif
     }
-    if (CmiMemoryIs(CMI_MEMORY_IS_ISOMALLOC))
-      CmiIsomallocBlockListPup(p,&heapBlocks);
     threadGlobals=CtgPup(p,threadGlobals);
     checkPupMismatch(pc,5139,"after TCHARM thread");
 }
@@ -333,7 +371,9 @@ TCharm::~TCharm()
   //BIGSIM_OOC DEBUGGING
   //CmiPrintf("TCharm destructor called with heapBlocks=%p!\n", heapBlocks);
   
+#if !CMK_USE_MEMPOOL_ISOMALLOC
   if (heapBlocks) CmiIsomallocBlockListDelete(heapBlocks);
+#endif
   CthFree(tid);
   CtgFree(threadGlobals);
   delete initMsg;
@@ -341,6 +381,10 @@ TCharm::~TCharm()
 
 void TCharm::migrateTo(int destPE) {
        if (destPE==CkMyPe()) return;
+       if (CthMigratable() == 0) {
+           CkPrintf("Warning> thread migration is not supported!\n");
+            return;
+        }
        // Make sure migrateMe gets called *after* we suspend:
        thisProxy[thisIndex].migrateDelayed(destPE);
 //     resumeAfterMigration=true;
@@ -351,6 +395,9 @@ void TCharm::migrateDelayed(int destPE) {
 }
 void TCharm::ckJustMigrated(void) {
        ArrayElement::ckJustMigrated();
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
+//  resumeAfterMigration = true;
+#endif
        if (resumeAfterMigration) {
                resumeAfterMigration=false;
                resume(); //Start the thread running
@@ -433,6 +480,10 @@ void TCharm::stop(void)
     we're resuming from migration!  (OSL 2003/9/23)
    */
   TCharm *dis=TCharm::get();
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_)) 
+/*  CpvAccess(_currentObj) = dis;
+ *      printf("[%d] _currentObject set to TCharm index %d %p\n",CkMyPe(),dis->thisIndex,dis);*/
+#endif
   dis->isStopped=false;
   dis->startTiming();
   //CkPrintf("[%d] Thread resumed  for tid %p\n",dis->thisIndex,dis->tid);
@@ -446,7 +497,13 @@ void TCharm::start(void)
   DBG("thread resuming soon");
   //CkPrintf("TCharm[%d]::start()\n", thisIndex);
   //CmiPrintStackTrace(0);
+#if (defined(_FAULT_MLOG_) || defined(_FAULT_CAUSAL_))
+//CthAwakenPrio(tid, CQS_QUEUEING_BFIFO, 1, &prio);
+  CthAwaken(tid);
+#else
   CthAwaken(tid);
+#endif
+  DBG("thread resuming soon");
 }
 
 //Block our thread, schedule, and come back:
@@ -469,13 +526,23 @@ void TCharm::migrate(void)
 }
 
 
+void TCharm::chkpsync(void)
+{
+#if CMK_LBDB_ON
+  DBG("going to sync");
+  AtChkpSync();
+  stop();
+#else
+  DBG("skipping sync, because there is no load balancer");
+#endif
+}
+
 void TCharm::evacuate(){
        /*
                FAULT_EVAC
        */
        //CkClearAllArrayElementsCPP();
-       if(CpvAccess(startedEvac)){
-               int nextPE = getNextPE(CkArrayIndex1D(thisIndex));
+       if(CkpvAccess(startedEvac)){
 //             resumeAfterMigration=true;
                CcdCallFnAfter((CcdVoidFn)CkEmmigrateElement, (void *)myRec, 1);
                suspend();
@@ -520,10 +587,16 @@ void TCharm::allow_migrate(void)
 //Resume from sync: start the thread again
 void TCharm::ResumeFromSync(void)
 {
-  if(isSelfDone) return;
+  //if(isSelfDone) return;
+  //if (exitWhenDone) return; //for bigsim ooc execution
   if (!skipResume) start();
+  CkPrintf("thread ResumeFromSync\n");
 }
 
+void TCharm::ResumeFromChkpSync(void)
+{
+  start();
+}
 
 /****** TcharmClient ******/
 void TCharmClient1D::ckJustMigrated(void) {
@@ -545,20 +618,20 @@ CkArrayID TCHARM_Get_threads(void) {
 /************* Startup/Shutdown Coordination Support ************/
 
 // Useless values to reduce over:
-int vals[2]={0,1};
+int _vals[2]={0,1};
 
 //Called when we want to go to a barrier
 void TCharm::barrier(void) {
        //Contribute to a synchronizing reduction
        CkCallback cb(index_t::atBarrier(0), thisProxy[0]);
-       contribute(sizeof(vals),&vals,CkReduction::sum_int,cb);
-#if CMK_BLUEGENE_CHARM
+       contribute(sizeof(_vals),&_vals,CkReduction::sum_int,cb);
+#if CMK_BIGSIM_CHARM
         void *curLog;          // store current log in timeline
         _TRACE_BG_TLINE_END(&curLog);
-       TRACE_BG_AMPI_BREAK(NULL, "TCharm_Barrier_START", NULL, 0);
+       TRACE_BG_AMPI_BREAK(NULL, "TCharm_Barrier_START", NULL, 0, 1);
 #endif
        stop();
-#if CMK_BLUEGENE_CHARM
+#if CMK_BIGSIM_CHARM
         _TRACE_BG_SET_INFO(NULL, "TCHARM_Barrier_END",  &curLog, 1);
 #endif
 }
@@ -577,7 +650,7 @@ void TCharm::done(void) {
        if (exitWhenDone) {
                //Contribute to a synchronizing reduction
                CkCallback cb(index_t::atExit(0), thisProxy[0]);
-               contribute(sizeof(vals),&vals,CkReduction::sum_int,cb);
+               contribute(sizeof(_vals),&_vals,CkReduction::sum_int,cb);
        }
        isSelfDone = true;
        stop();
@@ -655,26 +728,26 @@ CDECL void TCHARM_Set_exit(void) { g_tcharmOptions.exitWhenDone=1; }
 
 /*Create a new array of threads, which will be bound to by subsequent libraries*/
 CDECL void TCHARM_Create(int nThreads,
-                       TCHARM_Thread_start_fn threadFn)
+                       int threadFn)
 {
        TCHARMAPI("TCHARM_Create");
        TCHARM_Create_data(nThreads,
-                        (TCHARM_Thread_data_start_fn)threadFn,NULL,0);
+                        threadFn,NULL,0);
 }
 FDECL void FTN_NAME(TCHARM_CREATE,tcharm_create)
-       (int *nThreads,TCHARM_Thread_start_fn threadFn)
+       (int *nThreads,int threadFn)
 { TCHARM_Create(*nThreads,threadFn); }
 
 static CProxy_TCharm TCHARM_Build_threads(TCharmInitMsg *msg);
 
 /*As above, but pass along (arbitrary) data to threads*/
 CDECL void TCHARM_Create_data(int nThreads,
-                 TCHARM_Thread_data_start_fn threadFn,
+                 int threadFn,
                  void *threadData,int threadDataLen)
 {
        TCHARMAPI("TCHARM_Create_data");
        TCharmInitMsg *msg=new (threadDataLen,0) TCharmInitMsg(
-               (CthVoidFn)threadFn,g_tcharmOptions);
+               threadFn,g_tcharmOptions);
        msg->numElements=nThreads;
        memcpy(msg->data,threadData,threadDataLen);
        TCHARM_Build_threads(msg);
@@ -685,7 +758,7 @@ CDECL void TCHARM_Create_data(int nThreads,
 
 FDECL void FTN_NAME(TCHARM_CREATE_DATA,tcharm_create_data)
        (int *nThreads,
-                 TCHARM_Thread_data_start_fn threadFn,
+                 int threadFn,
                  void *threadData,int *threadDataLen)
 { TCHARM_Create_data(*nThreads,threadFn,threadData,*threadDataLen); }
 
@@ -695,17 +768,33 @@ static CProxy_TCharm TCHARM_Build_threads(TCharmInitMsg *msg)
 {
   CkArrayOptions opts(msg->numElements);
   CkAssert(CkpvAccess(mapCreated)==1);
-  if(mapping==NULL){
-    mapID=CkCreatePropMap();
-  }else if(0==strcmp(mapping,"BLOCK_MAP")){
+
+  if(haveConfigurableRRMap()){
+    CkPrintf("USING ConfigurableRRMap\n");
+    mapID=CProxy_ConfigurableRRMap::ckNew();
+  } else if(mapping==NULL){
+#if CMK_BIGSIM_CHARM
     mapID=CProxy_BlockMap::ckNew();
-  }else if(0==strcmp(mapping,"RR_MAP")){
-    mapID=CProxy_RRMap::ckNew();
-  }else{  // "PROP_MAP" or anything else
+#else
+#if __FAULT__
+       mapID=CProxy_RRMap::ckNew();
+#else
     mapID=CkCreatePropMap();
+#endif
+#endif
+  } else if(0 == strcmp(mapping,"BLOCK_MAP")) {
+    CkPrintf("USING BLOCK_MAP\n");
+    mapID = CProxy_BlockMap::ckNew();
+  } else if(0 == strcmp(mapping,"RR_MAP")) {
+    CkPrintf("USING RR_MAP\n");
+    mapID = CProxy_RRMap::ckNew();
+  } else if(0 == strcmp(mapping,"MAPFILE")) {
+    CkPrintf("Reading map from file\n");
+    mapID = CProxy_ReadFileMap::ckNew();
+  } else {  // "PROP_MAP" or anything else
+    mapID = CkCreatePropMap();
   }
   opts.setMap(mapID);
-  int nElem=msg->numElements; //<- save it because msg will be deleted.
   return CProxy_TCharm::ckNew(msg,opts);
 }
 
@@ -755,8 +844,14 @@ static void checkAddress(void *data)
 {
        if (tcharm_nomig||tcharm_nothreads) return; //Stack is not isomalloc'd
        if (CmiThreadIs(CMI_THREAD_IS_ALIAS)||CmiThreadIs(CMI_THREAD_IS_STACKCOPY)) return; // memory alias thread
-       if (!CmiIsomallocInRange(data))
+       if (CmiIsomallocEnabled()) {
+          if (!CmiIsomallocInRange(data))
            CkAbort("The UserData you register must be allocated on the stack!\n");
+        }
+        else {
+         if(CkMyPe() == 0)
+           CkPrintf("Warning> checkAddress failed because isomalloc not supported.\n");
+       }
 }
 
 /* Old "register"-based userdata: */
@@ -806,9 +901,27 @@ CDECL void *TCHARM_Get_global(int globalID)
 CDECL void TCHARM_Migrate(void)
 {
        TCHARMAPI("TCHARM_Migrate");
+       if (CthMigratable() == 0) {
+         if(CkMyPe() == 0)
+           CkPrintf("Warning> thread migration is not supported!\n");
+          return;
+        }
        TCharm::get()->migrate();
 }
+
+CDECL void TCHARM_ChkpSync(void)
+{
+       TCHARMAPI("TCHARM_ChkpSync");
+       if (CthMigratable() == 0) {
+         if(CkMyPe() == 0)
+           CkPrintf("Warning> thread migration is not supported!\n");
+          return;
+        }
+       TCharm::get()->chkpsync();
+}
+
 FORTRAN_AS_C(TCHARM_MIGRATE,TCHARM_Migrate,tcharm_migrate,(void),())
+FORTRAN_AS_C(TCHARM_CHKPSYNC,TCHARM_ChkpSync,tcharm_chkpsync,(void),())
 
 CDECL void TCHARM_Async_Migrate(void)
 {
@@ -908,8 +1021,8 @@ CDECL void TCHARM_Init(int *argc,char ***argv) {
 FDECL void FTN_NAME(TCHARM_INIT,tcharm_init)(void)
 {
        int argc=1;
-       char *argv_sto[2]={"foo",NULL};
-       char **argv=argv_sto;
+       const char *argv_sto[2]={"foo",NULL};
+       char **argv=(char **)argv_sto;
        TCHARM_Init(&argc,&argv);
 }
 
@@ -923,7 +1036,7 @@ FDECL void FTN_NAME(TCHARM_INIT,tcharm_init)(void)
 */
 /// Find this semaphore, or insert if there isn't one:
 TCharm::TCharmSemaphore *TCharm::findSema(int id) {
-       for (int s=0;s<sema.size();s++)
+       for (unsigned int s=0;s<sema.size();s++)
                if (sema[s].id==id) 
                        return &sema[s];
        sema.push_back(TCharmSemaphore(id));
@@ -932,7 +1045,7 @@ TCharm::TCharmSemaphore *TCharm::findSema(int id) {
 /// Remove this semaphore from the list
 void TCharm::freeSema(TCharmSemaphore *doomed) {
        int id=doomed->id;
-       for (int s=0;s<sema.size();s++)
+       for (unsigned int s=0;s<sema.size();s++)
                if (sema[s].id==id) {
                        sema[s]=sema[sema.length()-1];
                        sema.length()--;