Changes for out-of-core emulation in BigSim. Details could be referred to Chao Mei...
[charm.git] / src / ck-core / ckcheckpoint.C
1 /*
2 Charm++ File: Checkpoint Library
3 added 01/03/2003 by Chao Huang, chuang10@uiuc.edu
4
5 More documentation goes here...
6 --- Updated 12/14/2003 by Gengbin, gzheng@uiuc.edu
7     see ckcheckpoint.h for change log
8 */
9
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include "charm++.h"
14 #include "ck.h"
15 #include "ckcheckpoint.h"
16
17 #define DEBCHK  // CkPrintf
18
19 #define DEBUGC(x) x
20 //#define DEBUGC(x) 
21
22 CkGroupID _sysChkptMgr;
23
24 typedef struct _GroupInfo{
25         CkGroupID gID;
26         int MigCtor, DefCtor;
27         char name[256];
28 } GroupInfo;
29 PUPbytes(GroupInfo)
30 PUPmarshall(GroupInfo)
31
32 int _inrestart = 0;
33
34 // help class to find how many array elements
35 class ElementCounter : public CkLocIterator {
36 private:
37         int count;
38 public:
39         ElementCounter():count(0){};
40         void addLocation(CkLocation &loc)  { count++; }
41         int getCount() { return count; }
42 };
43
44 // helper class to pup all elements that belong to same ckLocMgr
45 class ElementCheckpointer : public CkLocIterator {
46 private:
47         CkLocMgr *locMgr;
48         PUP::er &p;
49 public:
50         ElementCheckpointer(CkLocMgr* mgr_, PUP::er &p_):locMgr(mgr_),p(p_){};
51         void addLocation(CkLocation &loc) {
52                 CkArrayIndexMax idx=loc.getIndex();
53                 CkGroupID gID = locMgr->ckGetGroupID();
54                 p|gID;      // store loc mgr's GID as well for easier restore
55                 p|idx;
56                 p|loc;
57                 //CkPrintf("[%d] addLocation: ", CkMyPe()), idx.print();
58         }
59 };
60
61
62 extern void _initDone();
63
64 static void bdcastRO(void){
65         int i;
66         //Determine the size of the RODataMessage
67         PUP::sizer ps;
68         for(i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(ps);
69
70         //Allocate and fill out the RODataMessage
71         envelope *env = _allocEnv(RODataMsg, ps.size());
72         PUP::toMem pp((char *)EnvToUsr(env));
73         for(i=0;i<_readonlyTable.size();i++) _readonlyTable[i]->pupData(pp);
74         
75         env->setCount(++_numInitMsgs);
76         env->setSrcPe(CkMyPe());
77         CmiSetHandler(env, _roRestartHandlerIdx);
78         CmiSyncBroadcastAndFree(env->getTotalsize(), (char *)env);
79 }
80
81 // Print out an array index to this string as decimal fields
82 // separated by underscores.
83 void printIndex(const CkArrayIndex &idx,char *dest) {
84         const int *idxData=idx.data();
85         for (int i=0;i<idx.nInts;i++) {
86                 sprintf(dest,"%s%d",i==0?"":"_", idxData[i]);
87                 dest+=strlen(dest);
88         }
89 }
90
91 static void checkpointOne(const char* dirname, CkCallback& cb);
92
93 // broadcast
94 void CkCheckpointMgr::Checkpoint(const char *dirname, CkCallback& cb){
95         chkptStartTimer = CmiWallTimer();
96         // every body make dir in case it is local directory
97         CmiMkdir(dirname);
98
99         if (CkMyPe() == 0) {
100           checkpointOne(dirname, cb);
101         }
102
103         char fileName[1024];
104         // save groups into Groups.dat
105         // content of the file: numGroups, GroupInfo[numGroups], _groupTable(PUP'ed), groups(PUP'ed)
106         sprintf(fileName,"%s/Groups_%d.dat",dirname,CkMyPe());
107         FILE* fGroups = fopen(fileName,"wb");
108         if(!fGroups) CkAbort("Failed to create checkpoint file for group table!");
109         PUP::toDisk pGroups(fGroups);
110         CkPupGroupData(pGroups);
111         fclose(fGroups);
112
113         // save nodegroups into NodeGroups.dat
114         // content of the file: numNodeGroups, GroupInfo[numNodeGroups], _nodeGroupTable(PUP'ed), nodegroups(PUP'ed)
115         if (CkMyRank() == 0) {
116           sprintf(fileName,"%s/NodeGroups_%d.dat",dirname,CkMyNode());
117           FILE* fNodeGroups = fopen(fileName,"wb");
118           if(!fNodeGroups) 
119             CkAbort("Failed to create checkpoint file for nodegroup table!");
120           PUP::toDisk pNodeGroups(fNodeGroups);
121           CkPupNodeGroupData(pNodeGroups);
122           fclose(fNodeGroups);
123         }
124
125         //DEBCHK("[%d]CkCheckpointMgr::Checkpoint called dirname={%s}\n",CkMyPe(),dirname);
126         sprintf(fileName,"%s/arr_%d.dat",dirname, CkMyPe());
127         FILE *datFile=fopen(fileName,"wb");
128         if (datFile==NULL) CkAbort("Could not create data file");
129         PUP::toDisk  p(datFile);
130         CkPupArrayElementsData(p);
131         fclose(datFile);
132
133 #if CMK_HAS_SYNC && ! CMK_DISABLE_SYNC
134 #if 0
135         system("sync");
136 #endif
137 #endif
138
139         restartCB = cb;
140         DEBCHK("[%d]restartCB installed\n",CkMyPe());
141         CkCallback localcb(CkIndex_CkCheckpointMgr::SendRestartCB(NULL),0,thisgroup);
142         contribute(0,NULL,CkReduction::sum_int,localcb);
143 }
144
145 void CkCheckpointMgr::SendRestartCB(CkReductionMsg *m){ 
146         delete m; 
147         DEBCHK("[%d]Sending out the cb\n",CkMyPe());
148         CkPrintf("Checkpoint to disk finished in %fs, sending out the cb...\n", CmiWallTimer() - chkptStartTimer);
149         restartCB.send(); 
150 }
151
152 void CkPupROData(PUP::er &p)
153 {
154         int _numReadonlies;
155         if (!p.isUnpacking()) _numReadonlies=_readonlyTable.size();
156         p|_numReadonlies;
157         if (p.isUnpacking()) {
158           if (_numReadonlies != _readonlyTable.size())
159             CkAbort("You cannot add readonlies and restore from checkpoint...");
160         }
161         for(int i=0;i<_numReadonlies;i++) _readonlyTable[i]->pupData(p);
162 }
163
164 // handle main chare
165 void CkPupMainChareData(PUP::er &p, CkArgMsg *args)
166 {
167         int nMains=_mainTable.size();
168         DEBCHK("[%d] CkPupMainChareData %s: nMains = %d\n", CkMyPe(),p.typeString(),nMains);
169         for(int i=0;i<nMains;i++){  /* Create all mainchares */
170                 ChareInfo *entry = _chareTable[_mainTable[i]->chareIdx];
171                 int entryMigCtor = entry->getMigCtor();
172                 if(entryMigCtor!=-1) {
173                         Chare* obj;
174                         if (p.isUnpacking()) {
175                                 int size = entry->size;
176                                 DEBCHK("MainChare PUP'ed: name = %s, idx = %d, size = %d\n", entry->name, i, size);
177                                 obj = (Chare*)malloc(size);
178                                 _MEMCHECK(obj);
179                                 _mainTable[i]->setObj(obj);
180                                 //void *m = CkAllocSysMsg();
181                                 _entryTable[entryMigCtor]->call(args, obj);
182                         }
183                         else 
184                                 obj = (Chare *)_mainTable[i]->getObj();
185                         obj->pup(p);
186                 }
187         }
188         // to update mainchare proxy
189         // only readonly variables of Chare Proxy is taken care of here;
190         // in general, if chare proxy is contained in some data structure
191         // for example CkCallback, it is user's responsibility to
192         // update them after restarting
193         if (p.isUnpacking() && CkMyPe()==0)
194                 bdcastRO();
195 }
196
197 // handle GroupTable and data
198 void CkPupGroupData(PUP::er &p)
199 {
200         int numGroups, i;
201
202         if (!p.isUnpacking()) {
203           numGroups = CkpvAccess(_groupIDTable)->size();
204         }
205         p|numGroups;
206         if (p.isUnpacking()) {
207           if(CkMyPe()==0)  
208             CkpvAccess(_numGroups) = numGroups+1; 
209           else 
210             CkpvAccess(_numGroups) = 1;
211         }
212         DEBCHK("[%d] CkPupGroupData %s: numGroups = %d\n", CkMyPe(),p.typeString(),numGroups);
213
214         GroupInfo *tmpInfo = new GroupInfo [numGroups];
215         if (!p.isUnpacking()) {
216           for(i=0;i<numGroups;i++) {
217                 tmpInfo[i].gID = (*CkpvAccess(_groupIDTable))[i];
218                 TableEntry ent = CkpvAccess(_groupTable)->find(tmpInfo[i].gID);
219                 tmpInfo[i].MigCtor = _chareTable[ent.getcIdx()]->migCtor;
220                 tmpInfo[i].DefCtor = _chareTable[ent.getcIdx()]->defCtor;
221                 strncpy(tmpInfo[i].name,_chareTable[ent.getcIdx()]->name,255);
222                 DEBCHK("[%d] CkPupGroupData: %s group %s \n",
223                         CkMyPe(), p.typeString(), tmpInfo[i].name);
224
225                 if(tmpInfo[i].MigCtor==-1) {
226                         char buf[512];
227                         sprintf(buf,"Group %s needs a migration constructor and PUP'er routine for restart.\n", tmpInfo[i].name);
228                         CkAbort(buf);
229                 }
230           }
231         }
232         for (i=0; i<numGroups; i++) p|tmpInfo[i];
233
234         for(i=0;i<numGroups;i++) 
235         {
236           CkGroupID gID = tmpInfo[i].gID;
237           if (p.isUnpacking()) {
238             //CkpvAccess(_groupIDTable)->push_back(gID);
239             int eIdx = tmpInfo[i].MigCtor;
240             // error checking
241             if (eIdx == -1) {
242               CkPrintf("[%d] ERROR> Group %s's migration constructor is not defined!\n", CkMyPe(), tmpInfo[i].name); CkAbort("Abort");
243             }
244             void *m = CkAllocSysMsg();
245             envelope* env = UsrToEnv((CkMessage *)m);
246             CkCreateLocalGroup(gID, eIdx, env);
247           }   // end of unPacking
248           IrrGroup *gobj = CkpvAccess(_groupTable)->find(gID).getObj();
249           // if using migration constructor, you'd better have a pup
250           gobj->pup(p);
251           DEBCHK("Group PUP'ed: gid = %d, name = %s\n",
252                         gobj->ckGetGroupID().idx, tmpInfo[i].name);
253         }
254         delete [] tmpInfo;
255 }
256
257 // handle NodeGroupTable and data
258 void CkPupNodeGroupData(PUP::er &p)
259 {
260         int numNodeGroups, i;
261         if (!p.isUnpacking()) {
262           numNodeGroups = CksvAccess(_nodeGroupIDTable).size();
263         }
264         p|numNodeGroups;
265         if (p.isUnpacking()) {
266           if(CkMyPe()==0){ CksvAccess(_numNodeGroups) = numNodeGroups+1; }
267           else { CksvAccess(_numNodeGroups) = 1; }
268         }
269         DEBCHK("[%d] CkPupNodeGroupData %s: numNodeGroups = %d\n",CkMyPe(),p.typeString(),numNodeGroups);
270
271         GroupInfo *tmpInfo = new GroupInfo [numNodeGroups];
272         if (!p.isUnpacking()) {
273           for(i=0;i<numNodeGroups;i++) {
274                 tmpInfo[i].gID = CksvAccess(_nodeGroupIDTable)[i];
275                 TableEntry ent2 = CksvAccess(_nodeGroupTable)->find(tmpInfo[i].gID);
276                 tmpInfo[i].MigCtor = _chareTable[ent2.getcIdx()]->migCtor;
277                 if(tmpInfo[i].MigCtor==-1) {
278                         char buf[512];
279                         sprintf(buf,"NodeGroup %s either need a migration constructor and\n\
280                                      declared as [migratable] in .ci to be able to checkpoint.",\
281                                      _chareTable[ent2.getcIdx()]->name);
282                         CkAbort(buf);
283                 }
284           }
285         }
286         for (i=0; i<numNodeGroups; i++) p|tmpInfo[i];
287         for (i=0;i<numNodeGroups;i++) {
288                 CkGroupID gID = tmpInfo[i].gID;
289                 if (p.isUnpacking()) {
290                         //CksvAccess(_nodeGroupIDTable).push_back(gID);
291                         int eIdx = tmpInfo[i].MigCtor;
292                         void *m = CkAllocSysMsg();
293                         envelope* env = UsrToEnv((CkMessage *)m);
294                         CkCreateLocalNodeGroup(gID, eIdx, env);
295                 }
296                 TableEntry ent2 = CksvAccess(_nodeGroupTable)->find(gID);
297                 IrrGroup *obj = ent2.getObj();
298                 obj->pup(p);
299                 DEBCHK("Nodegroup PUP'ed: gid = %d, name = %s\n",
300                         obj->ckGetGroupID().idx,
301                         _chareTable[ent2.getcIdx()]->name);
302         }
303         delete [] tmpInfo;
304 }
305
306
307 // handle chare array elements for this processor
308 void CkPupArrayElementsData(PUP::er &p, int notifyListeners)
309 {
310         int i;
311         // safe in both packing/unpakcing at this stage
312         int numGroups = CkpvAccess(_groupIDTable)->size();
313
314         // number of array elements on this processor
315         int numElements;
316         if (!p.isUnpacking()) {
317           ElementCounter  counter;
318           CKLOCMGR_LOOP(mgr->iterate(counter););
319           numElements = counter.getCount();
320         }
321         p|numElements;
322
323         DEBCHK("[%d] CkPupArrayElementsData %s numGroups:%d numElements:%d \n",CkMyPe(),p.typeString(), numGroups, numElements);
324
325         if (!p.isUnpacking())
326         {
327           // let CkLocMgr to iterate and store every array elements
328           CKLOCMGR_LOOP(ElementCheckpointer chk(mgr, p); mgr->iterate(chk););
329         }
330         else {
331           // loop and create all array elements ourselves
332           //CkPrintf("total chare array cnts: %d\n", numElements);
333           for (int i=0; i<numElements; i++) {
334                 CkGroupID gID;
335                 CkArrayIndexMax idx;
336                 p|gID;
337                 p|idx;
338                 CkLocMgr *mgr = (CkLocMgr*)CkpvAccess(_groupTable)->find(gID).getObj();
339                 if (notifyListeners){
340                   mgr->resume(idx,p);
341                 }
342                 else{
343                   mgr->restore(idx,p);
344                 }
345           }
346         }
347         // finish up
348         if (notifyListeners)
349         for(i=0;i<numGroups;i++) {
350                 IrrGroup *obj = CkpvAccess(_groupTable)->find((*CkpvAccess(_groupIDTable))[i]).getObj();
351                 obj->ckJustMigrated();
352         }
353 }
354
355 void CkPupProcessorData(PUP::er &p)
356 {
357     // save readonlys, and callback BTW
358     if(CkMyRank()==0) {
359         CkPupROData(p);
360     }
361
362     // save mainchares into MainChares.dat
363     if(CkMyPe()==0) {
364       CkPupMainChareData(p, NULL);
365     }
366         
367     // save groups into Groups.dat
368     CkPupGroupData(p);
369
370     // save nodegroups into NodeGroups.dat
371     if(CkMyRank()==0) {
372         CkPupNodeGroupData(p);
373     }
374
375     // pup array elements
376     CkPupArrayElementsData(p);
377 }
378
379 // called only on pe 0
380 static void checkpointOne(const char* dirname, CkCallback& cb){
381         CmiAssert(CkMyPe()==0);
382         int i;
383         char filename[1024];
384         
385         // save readonlys, and callback BTW
386         sprintf(filename,"%s/RO.dat",dirname);
387         FILE* fRO = fopen(filename,"wb");
388         if(!fRO) CkAbort("Failed to create checkpoint file for readonly data!");
389         PUP::toDisk pRO(fRO);
390         int _numPes = CkNumPes();
391         pRO|_numPes;
392         CkPupROData(pRO);
393         pRO((char *)&cb, sizeof(cb));
394         fclose(fRO);
395
396         // save mainchares into MainChares.dat
397         {
398                 sprintf(filename,"%s/MainChares.dat",dirname);
399                 FILE* fMain = fopen(filename,"wb");
400                 if(!fMain) CkAbort("Failed to open checkpoint file for mainchare data!");
401                 PUP::toDisk pMain(fMain);
402                 CkPupMainChareData(pMain, NULL);
403                 fclose(fMain);
404         }
405 }
406
407 void CkRemoveArrayElements()
408 {
409   int i;
410   int numGroups = CkpvAccess(_groupIDTable)->size();
411   CKLOCMGR_LOOP(mgr->flushAllRecs(););
412 /*  GroupTable *gTbl = CkpvAccess(_groupTable);
413   for(i=0; i<numGroups; i++){
414     IrrGroup *obj = CkpvAccess(_groupTable)->find((*CkpvAccess(_groupIDTable))[i]).getObj();
415     if(obj->isLocMgr()) {
416         CkLocMgr *mgr = (CkLocMgr *)obj;
417         mgr->flushAllRecs();
418     }
419   }*/
420 }
421
422 /*
423 void CkTestArrayElements()
424 {
425   int i;
426   int numGroups = CkpvAccess(_groupIDTable)->size();
427   //CKLOCMGR_LOOP(mgr->flushAllRecs(););
428   GroupTable *gTbl = CkpvAccess(_groupTable);
429   for(i=0; i<numGroups; i++){
430     IrrGroup *obj = CkpvAccess(_groupTable)->find((*CkpvAccess(_groupIDTable))[i]).getObj();
431     CkPrintf("An object at [%d]: %p | isLocMgr: %d\n", i, obj, obj->isLocMgr());
432   }
433 }
434 */
435
436 void CkStartCheckpoint(char* dirname,const CkCallback& cb)
437 {
438         CkPrintf("[%d] Checkpoint starting in %s\n", CkMyPe(), dirname);
439         
440         // hand over to checkpoint managers for per-processor checkpointing
441         CProxy_CkCheckpointMgr(_sysChkptMgr).Checkpoint((char *)dirname, cb);
442 }
443
444 /**
445   * Restart: There's no such object as restart manager is created
446   *          because a group cannot restore itself anyway.
447   *          The mechanism exists as converse code and get invoked by
448   *          broadcast message.
449   **/
450
451 void CkRestartMain(const char* dirname, CkArgMsg *args){
452         int i;
453         char filename[1024];
454         CkCallback cb;
455         
456         _inrestart = 1;
457
458         // restore readonlys
459         sprintf(filename,"%s/RO.dat",dirname);
460         FILE* fRO = fopen(filename,"rb");
461         if(!fRO) CkAbort("Failed to open checkpoint file for readonly data!");
462         int _numPes = -1;
463         PUP::fromDisk pRO(fRO);
464         pRO|_numPes;
465         CkPupROData(pRO);
466         pRO|cb;
467         fclose(fRO);
468         DEBCHK("[%d]CkRestartMain: readonlys restored\n",CkMyPe());
469
470         CmiNodeBarrier();
471
472         // restore mainchares
473         sprintf(filename,"%s/MainChares.dat",dirname);
474         FILE* fMain = fopen(filename,"rb");
475         if(fMain && CkMyPe()==0){ // only main chares have been checkpointed, we restart on PE0
476                 PUP::fromDisk pMain(fMain);
477                 CkPupMainChareData(pMain, args);
478                 fclose(fMain);
479                 DEBCHK("[%d]CkRestartMain: mainchares restored\n",CkMyPe());
480                 //bdcastRO(); // moved to CkPupMainChareData()
481         }
482         
483         // restore groups
484         // content of the file: numGroups, GroupInfo[numGroups], _groupTable(PUP'ed), groups(PUP'ed)
485         // restore from PE0's copy if shrink/expand
486         if(CkNumPes() != _numPes)
487                 sprintf(filename,"%s/Groups_0.dat",dirname);
488         else
489                 sprintf(filename,"%s/Groups_%d.dat",dirname,CkMyPe());
490         FILE* fGroups = fopen(filename,"rb");
491         if(!fGroups) CkAbort("Failed to open checkpoint file for group table!");
492         PUP::fromDisk pGroups(fGroups);
493         CkPupGroupData(pGroups);
494         fclose(fGroups);
495
496         // restore nodegroups
497         // content of the file: numNodeGroups, GroupInfo[numNodeGroups], _nodeGroupTable(PUP'ed), nodegroups(PUP'ed)
498         if(CkMyRank()==0){
499                 if(CkNumPes() != _numPes)
500                         sprintf(filename,"%s/NodeGroups_0.dat",dirname);
501                 else
502                         sprintf(filename,"%s/NodeGroups_%d.dat",dirname,CkMyNode());
503                 FILE* fNodeGroups = fopen(filename,"rb");
504                 if(!fNodeGroups) CkAbort("Failed to open checkpoint file for nodegroup table!");
505                 PUP::fromDisk pNodeGroups(fNodeGroups);
506                 CkPupNodeGroupData(pNodeGroups);
507                 fclose(fNodeGroups);
508         }
509
510         // for each location, restore arrays
511         //DEBCHK("[%d]Trying to find location manager\n",CkMyPe());
512         DEBCHK("[%d]Number of PE: %d -> %d\n",CkMyPe(),_numPes,CkNumPes());
513         if(CkMyPe() < _numPes)  // in normal range: restore, otherwise, do nothing
514           for (i=0; i<_numPes;i++) {
515             if (i%CkNumPes() == CkMyPe()) {
516               sprintf(filename,"%s/arr_%d.dat",dirname, i);
517               FILE *datFile=fopen(filename,"rb");
518               if (datFile==NULL) CkAbort("Could not read data file");
519               PUP::fromDisk  p(datFile);
520               CkPupArrayElementsData(p);
521               fclose(datFile);
522             }
523           }
524
525         _inrestart = 0;
526
527         _initDone();
528
529         if(CkMyPe()==0) {
530                 CmiPrintf("[%d]CkRestartMain done. sending out callback.\n",CkMyPe());
531                 cb.send();
532         }
533 }
534
535 // Main chare: initialize system checkpoint manager
536 class CkCheckpointInit : public Chare {
537 public:
538   CkCheckpointInit(CkArgMsg *msg) {
539     _sysChkptMgr = CProxy_CkCheckpointMgr::ckNew();
540     delete msg;
541   }
542   CkCheckpointInit(CkMigrateMessage *m) {delete m;}
543 };
544
545 #include "CkCheckpoint.def.h"
546