12cd51c2f72f2cb56eb931d4473af2b890a2d9bb
[charm.git] / NodeHelper.C
1 #include "NodeHelper.h"
2
3 FuncNodeHelper::FuncNodeHelper()
4 {  
5 #if CMK_SMP     
6     //CkPrintf("FuncNodeHelper created on node %d\n", CkMyNode());
7          
8     traceRegisterUserEvent("nodehelper total work",20);
9     traceRegisterUserEvent("nodehlelper finish signal",21);
10     
11         numHelpers = CkMyNodeSize();
12         helperPtr = new FuncSingleHelper *[numHelpers];
13         useTreeBcast = (numHelpers >= USE_TREE_BROADCAST_THRESHOLD);
14         
15         int pestart = CkNodeFirst(CkMyNode());
16                 
17         for (int i=0; i<numHelpers; i++) {
18         CkChareID helper;
19         CProxy_FuncSingleHelper::ckNew((size_t)this, &helper, pestart+i);
20         }       
21 #endif
22 }
23
24 int FuncNodeHelper::MAX_CHUNKS = 64;
25
26 #if CMK_TRACE_ENABLED
27 #define TRACE_START(id) _start = CmiWallTimer()
28 #define TRACE_BRACKET(id) traceUserBracketEvent(id,_start,CmiWallTimer())
29 #else
30 #define TRACE_START(id)
31 #define TRACE_BRACKET(id)
32 #endif
33
34 #define ALLOW_MULTIPLE_UNSYNC 1
35 void FuncNodeHelper::parallelizeFunc(HelperFn func, int paramNum, void * param, 
36                                     int numChunks, int lowerRange, 
37                                     int upperRange, int sync,
38                                     void *redResult, REDUCTION_TYPE type) {
39                                         
40     double _start; //may be used for tracing
41     
42     if(numChunks > MAX_CHUNKS){ 
43         CkPrintf("NodeHelper[%d]: WARNING! chunk is set to MAX_CHUNKS=%d\n", CmiMyPe(), MAX_CHUNKS);
44         numChunks = MAX_CHUNKS;
45     }
46         
47     /* "stride" determines the number of loop iterations to be done in each chunk
48      * for chunk indexed at 0 to remainder-1, stride is "unit+1";
49      * for chunk indexed at remainder to numChunks-1, stride is "unit"
50      */
51      int stride;
52     
53     //for using nodequeue
54         TRACE_START(20);
55         
56         FuncSingleHelper *thisHelper = helperPtr[CkMyRank()];
57 #if ALLOW_MULTIPLE_UNSYNC
58     ConverseNotifyMsg *notifyMsg = thisHelper->getNotifyMsg();
59 #else
60     ConverseNotifyMsg *notifyMsg = thisHelper->notifyMsg;
61 #endif
62     CurLoopInfo *curLoop = (CurLoopInfo *)(notifyMsg->ptr);
63         curLoop->set(numChunks, func, lowerRange, upperRange, paramNum, param); 
64         if(useTreeBcast){               
65                 int loopTimes = TREE_BCAST_BRANCH>(CmiMyNodeSize()-1)?CmiMyNodeSize()-1:TREE_BCAST_BRANCH;
66                 //just implicit binary tree
67                 int pe = CmiMyRank()+1;        
68                 for(int i=0; i<loopTimes; i++, pe++){
69                         if(pe >= CmiMyNodeSize()) pe -= CmiMyNodeSize();
70                         CmiPushPE(pe, (void *)(notifyMsg));    
71                 }
72         }else{
73                 for (int i=0; i<numHelpers; i++) {
74                         if (i!=CkMyRank()) CmiPushPE(i, (void *)(notifyMsg));            
75                 }
76         }
77     
78         curLoop->stealWork();
79         TRACE_BRACKET(20);
80         
81         TRACE_START(21);                
82         curLoop->waitLoopDone(sync);
83         TRACE_BRACKET(21);        
84
85     if (type!=NODEHELPER_NONE)
86         reduce(curLoop->getRedBufs(), redResult, type, numChunks);            
87     return;
88 }
89
90 #define COMPUTE_REDUCTION(T) {\
91     for(int i=0; i<numChunks; i++) {\
92      result += *((T *)(redBufs[i])); \
93      /*CkPrintf("Nodehelper Reduce: %d\n", result);*/ \
94     }\
95 }
96
97 void FuncNodeHelper::reduce(void **redBufs, void *redBuf, REDUCTION_TYPE type, int numChunks) {
98     switch(type){
99         case NODEHELPER_INT_SUM:
100         {
101             int result=0;
102             COMPUTE_REDUCTION(int)
103             *((int *)redBuf) = result;
104             break;
105         }
106         case NODEHELPER_FLOAT_SUM:
107         {
108             float result=0;
109             COMPUTE_REDUCTION(float)
110             *((float *)redBuf) = result;
111             break;
112         }
113         case NODEHELPER_DOUBLE_SUM:
114         {
115             double result=0;
116             COMPUTE_REDUCTION(double)
117             *((double *)redBuf) = result;
118             break;
119         }
120         default:
121         break;
122     }
123 }
124
125 CpvStaticDeclare(int, NdhStealWorkHandler);
126 static void RegisterNodeHelperHdlrs(){
127     CpvInitialize(int, NdhStealWorkHandler);
128     CpvAccess(NdhStealWorkHandler) = CmiRegisterHandler((CmiHandler)SingleHelperStealWork);
129 }
130
131 FuncSingleHelper::FuncSingleHelper(size_t ndhPtr) {
132     thisNodeHelper = (FuncNodeHelper *)ndhPtr;
133     CmiAssert(thisNodeHelper!=NULL);
134         
135         nextFreeNotifyMsg = 0;
136     notifyMsg = (ConverseNotifyMsg *)malloc(sizeof(ConverseNotifyMsg)*MSG_BUFFER_SIZE);
137     for(int i=0; i<MSG_BUFFER_SIZE; i++){
138         ConverseNotifyMsg *tmp = notifyMsg+i;
139         if(thisNodeHelper->useTreeBcast){
140             tmp->srcRank = CmiMyRank();
141         }else{
142             tmp->srcRank = -1;
143         }            
144         tmp->ptr = (void *)(new CurLoopInfo(FuncNodeHelper::MAX_CHUNKS));
145         CmiSetHandler(tmp, CpvAccess(NdhStealWorkHandler));
146     }
147     thisNodeHelper->helperPtr[CkMyRank()] = this;
148 }
149
150
151 void SingleHelperStealWork(ConverseNotifyMsg *msg){
152         
153         int srcRank = msg->srcRank;
154         
155         if(srcRank >= 0){
156                 //means using tree-broadcast to send the notification msg
157                 
158                 //int numHelpers = CmiMyNodeSize(); //the value of "numHelpers" should be obtained somewhere else
159                 int relPE = CmiMyRank()-msg->srcRank;
160                 if(relPE<0) relPE += CmiMyNodeSize();
161                 
162                 //CmiPrintf("Rank[%d]: got msg from src %d with relPE %d\n", CmiMyRank(), msg->srcRank, relPE);
163                 relPE=relPE*TREE_BCAST_BRANCH+1;
164                 for(int i=0; i<TREE_BCAST_BRANCH; i++, relPE++){
165                         if(relPE >= CmiMyNodeSize()) break;
166                         int pe = (relPE + msg->srcRank)%CmiMyNodeSize();
167                         //CmiPrintf("Rank[%d]: send msg to dst %d (relPE: %d) from src %d\n", CmiMyRank(), pe, relPE, msg->srcRank);
168                         CmiPushPE(pe, (void *)msg);
169                 }
170         }
171     CurLoopInfo *loop = (CurLoopInfo *)msg->ptr;
172     loop->stealWork();
173 }
174
175 void CurLoopInfo::stealWork(){
176     //indicate the current work hasn't been initialized
177     //or the old work has finished.
178     if(inited == 0) return;
179     
180     int first, last;
181     int unit = (upperIndex-lowerIndex+1)/numChunks;
182     int remainder = (upperIndex-lowerIndex+1)-unit*numChunks;
183     int markIdx = remainder*(unit+1);
184     
185     int nextChunkId = getNextChunkIdx();
186     int execTimes = 0;
187     while(nextChunkId < numChunks){
188         if(nextChunkId < remainder){
189             first = (unit+1)*nextChunkId;
190             last = first+unit;
191         }else{
192             first = (nextChunkId - remainder)*unit + markIdx;
193             last = first+unit-1;
194         }
195                 
196         fnPtr(first, last, redBufs[nextChunkId], paramNum, param);
197         execTimes++;
198         nextChunkId = getNextChunkIdx();
199     }
200     reportFinished(execTimes);
201 }
202
203 //======================================================================//
204 //   End of functions related with FuncSingleHelper                     //
205 //======================================================================//
206
207 CProxy_FuncNodeHelper NodeHelper_Init(){
208     CkPrintf("NodeHelperLib is used in SMP with a simple dynamic scheduling but not using node-level queue\n");
209     return CProxy_FuncNodeHelper::ckNew();
210 }
211
212 void NodeHelper_Parallelize(CProxy_FuncNodeHelper nodeHelper, HelperFn func, 
213                         int paramNum, void * param, 
214                         int numChunks, int lowerRange, int upperRange,
215                         int sync,
216                         void *redResult, REDUCTION_TYPE type)
217 {
218     nodeHelper[CkMyNode()].ckLocalBranch()->parallelizeFunc(func, paramNum, param, numChunks, lowerRange, upperRange, sync, redResult, type);
219 }
220
221 #include "NodeHelper.def.h"