Give MSAs handle objects that enforce phase, and adapt ParFUM to use them. FEM_call_p...
authorPhil Miller <mille121@illinois.edu>
Mon, 13 Oct 2008 14:49:02 +0000 (14:49 +0000)
committerPhil Miller <mille121@illinois.edu>
Mon, 13 Oct 2008 14:49:02 +0000 (14:49 +0000)
src/libs/ck-libs/ParFUM/ParFUM_internals.h
src/libs/ck-libs/ParFUM/adapt_adj.C
src/libs/ck-libs/ParFUM/adapt_adj.h
src/libs/ck-libs/ParFUM/parallel_part.C
src/libs/ck-libs/multiphaseSharedArrays/msa-distArray.h

index 5f05e8ebd01b617fecb0003758a1fc4a46061ced..0c65e2e20c93fddff68e006a205b51c30bba3bf7 100644 (file)
@@ -47,7 +47,7 @@
 #  include <iostream.h>
 #else /* ISO C++ */
 #  include <iostream>
-   using namespace std;
+   using std::ostream;
 #endif
 
 #include "ParFUM_Adapt.decl.h"
@@ -2520,75 +2520,133 @@ struct ghostdata{
 };
 
 
-class MsaHashtable{
- public:
-  int numSlots;
-  MSA1DHASH table;
-  MsaHashtable(int _numSlots,int numWorkers):numSlots(_numSlots),table(_numSlots,numWorkers){
-  }
-  MsaHashtable(){};
+class MsaHashtable : private MSA1DHASH
+{
+public:
+       class Read; class Add;
+       Read& syncToRead(Add&);
+       Add&  syncToAdd(Read&);
+       Add& getInitialAdd();
+       using MSA1DHASH::pup;
+       using MSA1DHASH::enroll;
+
+       class Read : private MSA1DHASH::Read
+       {
+       public:
+               using MSA1DHASH::Read::get;
+               friend Read &MsaHashtable::syncToRead(Add&);
+               friend Add& MsaHashtable::syncToAdd(Read&);
+               void print();
+
+       private:
+       Read(MsaHashtable &m) : MSA1DHASH::Read(m) { }
+       };
+
+       class Add : private MSA1DHASH::Accum
+       {
+               using MSA1DHASH::Accum::accumulate;
+               friend Add& MsaHashtable::syncToAdd(Read&);
+               friend Read &MsaHashtable::syncToRead(Add&);
+               friend Add& MsaHashtable::getInitialAdd();
+       Add(MsaHashtable &m) : MSA1DHASH::Accum(m) { }
+       public:
+               int addTuple(int *tuple, int nodesPerTuple, int chunk, int elementNo);
+
+       };
 
-  virtual void pup(PUP::er &p){
-    p | numSlots;
-    p | table;
-  }
-  int addTuple(int *tuple,int nodesPerTuple,int chunk,int elementNo){
-    // sort the tuples to get a canonical form
-    // bubble sort should do just as well since the number
-    // of nodes is less than 10.
-    for(int i=0;i<nodesPerTuple-1;i++){
-      for(int j=i+1;j<nodesPerTuple;j++){
-       if(tuple[j] < tuple[i]){
-         int t = tuple[j];
-         tuple[j] = tuple[i];
-         tuple[i] = t;
-       }
-      }
-    }
-    //find out the index
-    long long sum = 0;
-    for(int i=0;i<nodesPerTuple;i++){
-      sum = sum *numSlots + tuple[i];
-    }
-    int index = (int )(sum %(long )numSlots);
-    Hashnode entry(nodesPerTuple,chunk,elementNo,tuple);
-
-    Hashtuple &list=table.accumulate(index);
-    list.vec->push_back(entry);
-    char str[100];
-    DEBUG(printf("[%d] adding tuple %s element %d to index %d \n",chunk,entry.nodes.toString(nodesPerTuple,str),elementNo,index));
-    return index;
-  }
-
-  void print(){
-    char str[100];
-    for(int i=0;i<numSlots;i++){
-      const Hashtuple &t = table.get(i);
-      for(int j=0;j<t.vec->size();j++){
-       Hashnode &tuple = (*t.vec)[j];
-       printf("ghost element chunk %d element %d index %d tuple < %s>\n",tuple.chunk,tuple.elementNo,i,tuple.nodes.toString(tuple.numnodes,str));
-      }
-    }
-  }
-  void sync(){
-    table.sync();
-  }
-  const Hashtuple &get(int i){
-    return table.get(i);
-  }
 
+MsaHashtable(int _numSlots,int numWorkers)
+       : MSA1DHASH(_numSlots, numWorkers) { }
+       MsaHashtable(){};
 };
 
+MsaHashtable::Add& MsaHashtable::getInitialAdd()
+{
+       if(initHandleGiven)
+               throw MSA_InvalidHandle();
+       
+       Add *a = new Add(*this);
+       sync();
+       initHandleGiven = true;
+       return *a;
+}
+
+MsaHashtable::Add& MsaHashtable::syncToAdd(Read &r)
+{
+       r.checkInvalidate(this);
+       delete &r;
+       sync();
+       Add *a = new Add(*this);
+       return *a;
+}
+
+MsaHashtable::Read& MsaHashtable::syncToRead(Add &a)
+{
+       a.checkInvalidate(this);
+       delete &a;
+       sync();
+       Read *r = new Read(*this);
+       return *r;
+}
+
+void MsaHashtable::Read::print()
+{
+       unsigned nEntries = MSA1DHASH::Read::msa.length();
+       char str[100];
+       for(int i=0;i<nEntries;i++){
+               const Hashtuple &t = get(i);
+               for(int j=0;j<t.vec->size();j++){
+                       Hashnode &tuple = (*t.vec)[j];
+                       printf("ghost element chunk %d element %d index %d tuple < %s>\n", 
+                              tuple.chunk, tuple.elementNo, i, 
+                              tuple.nodes.toString(tuple.numnodes,str));
+               }
+       }
+}
+
+int MsaHashtable::Add::addTuple(int *tuple,int nodesPerTuple,int chunk,int elementNo)
+{
+       int slots = msa.length();
+
+       // sort the tuples to get a canonical form
+       // bubble sort should do just as well since the number
+       // of nodes is less than 10.
+       for(int i=0;i<nodesPerTuple-1;i++){
+               for(int j=i+1;j<nodesPerTuple;j++){
+                       if(tuple[j] < tuple[i]){
+                               int t = tuple[j];
+                               tuple[j] = tuple[i];
+                               tuple[i] = t;
+                       }
+               }
+       }
+
+       //find out the index
+       long long sum = 0;
+       for(int i=0;i<nodesPerTuple;i++){
+               sum = sum*slots + tuple[i];
+       }
+       int index = (int )(sum %(long )slots);
+       Hashnode entry(nodesPerTuple,chunk,elementNo,tuple);
+
+       Hashtuple &list=accumulate(index);
+       list.vec->push_back(entry);
+       char str[100];
+       DEBUG(printf("[%d] adding tuple %s element %d to index %d \n",chunk,entry.nodes.toString(nodesPerTuple,str),elementNo,index));
+       return index;
+}
+
+
 
 int FEM_master_parallel_part(int ,int ,FEM_Comm_t);
 int FEM_slave_parallel_part(int ,int ,FEM_Comm_t);
 struct partconndata* FEM_call_parmetis(struct conndata &data,FEM_Comm_t comm_context);
-void FEM_write_nodepart(MSA1DINTLIST   &nodepart,struct partconndata *data,MPI_Comm comm_context);
-void FEM_write_part2node(MSA1DINTLIST  &nodepart,MSA1DNODELIST &part2node,struct partconndata *data,MPI_Comm comm_context);
-void FEM_write_part2elem(MSA1DINTLIST &part2elem,struct partconndata *data,MPI_Comm comm_context);
+void FEM_write_nodepart(MSA1DINTLIST::Accum &nodepart, struct partconndata *data, MPI_Comm comm_context);
+void FEM_write_part2node(MSA1DINTLIST::Read &nodepart, MSA1DNODELIST::Accum &part2node, struct partconndata *data, MPI_Comm comm_context);
+void FEM_write_part2elem(MSA1DINTLIST::Accum &part2elem, struct partconndata *data, MPI_Comm comm_context);
 FEM_Mesh * FEM_break_mesh(FEM_Mesh *m,int numElements,int numChunks);
 void sendBrokenMeshes(FEM_Mesh *mesh_array,FEM_Comm_t comm_context);
-void   FEM_write_part2mesh(MSA1DFEMMESH &part2mesh,struct partconndata *partdata,struct conndata *data,MSA1DINTLIST &nodepart,int numChunks,int myChunk,FEM_Mesh *mypiece);
+void FEM_write_part2mesh(MSA1DFEMMESH::Accum &part2mesh, struct partconndata *partdata, struct conndata *data, MSA1DINTLIST::Read &nodepart, int numChunks, int myChunk, FEM_Mesh *mypiece);
 void addIDXLists(FEM_Mesh *m,NodeList &lnodes,int myChunk);
 struct ghostdata *gatherGhosts();
 void makeGhosts(FEM_Mesh *m,MPI_Comm comm,int masterRank,int numLayers,FEM_Ghost_Layer **layers);
index d5adcf4534e99db34089a2b56f7f341e2bae5f1d..9f1034b3fc88bb55ff99814664902bbb5a0784a9 100644 (file)
@@ -280,18 +280,18 @@ void CreateAdaptAdjacencies(int meshid, int elemType)
     }
     MPI_Bcast_pup(*requestTable,0,MPI_COMM_WORLD);
     requestTable->enroll(numChunks);
-    requestTable->sync();
+    MSA1DREQLIST::Accum &requestTableAcc = requestTable->getInitialAccum();
 
     makeAdjacencyRequests(
             numNodes,
             node,
             faceTable,
-            requestTable,
+            requestTableAcc,
             faceSize,
             myRank,
             elemType);
 
-    requestTable->sync();
+    MSA1DREQLIST::Read &reqTableRead = requestTable->syncToRead(requestTableAcc);
     //printf("[%d] All face requests made \n",myRank);
 
     MSA1DREPLYLIST *replyTable;
@@ -302,11 +302,11 @@ void CreateAdaptAdjacencies(int meshid, int elemType)
     }
     MPI_Bcast_pup(*replyTable,0,MPI_COMM_WORLD);
     replyTable->enroll(numChunks);
-    replyTable->sync();
+    MSA1DREPLYLIST::Accum &replyAcc = replyTable->getInitialAccum();
 
     replyAdjacencyRequests(
-            requestTable,
-            replyTable,
+            reqTableRead.get(myRank).vec,
+            replyAcc,
             node,
             faceTable,
             adaptFaceAdjacencies,
@@ -317,8 +317,8 @@ void CreateAdaptAdjacencies(int meshid, int elemType)
             elemType,
             false);
 
-    requestTable->sync();
-    replyTable->sync();
+    requestTable->syncToRead(reqTableRead);
+    replyTable->syncToRead(replyAcc);
 
 //    // Once the replies are back, loop through each reply and update the
 //    // adjacencies for each element in the reply
@@ -339,42 +339,43 @@ void CreateAdaptAdjacencies(int meshid, int elemType)
     delete replyTable;
 
     if (adaptEdgeAdjacencies != NULL) {
+           MSA1DREQLIST *edgeRequestTable;
+           MSA1DREPLYLIST *edgeReplyTable;
 
         // do the same thing for the edges
         if (myRank == 0) {
-            requestTable = new MSA1DREQLIST(numChunks,numChunks);
+            edgeRequestTable = new MSA1DREQLIST(numChunks,numChunks);
         } else {
-            requestTable = new MSA1DREQLIST;
+            edgeRequestTable = new MSA1DREQLIST;
         }
-        MPI_Bcast_pup(*requestTable,0,MPI_COMM_WORLD);
-        requestTable->enroll(numChunks);
-        requestTable->sync();
+        MPI_Bcast_pup(*edgeRequestTable,0,MPI_COMM_WORLD);
+        edgeRequestTable->enroll(numChunks);
+       MSA1DREQLIST::Accum &edgeRequestTableAcc = requestTable->getInitialAccum();
 
         makeAdjacencyRequests(
                 numNodes,
                 node,
                 edgeTable,
-                requestTable,
+                edgeRequestTableAcc,
                 2,
                 myRank,
                 elemType);
 
-        requestTable->sync();
+       MSA1DREQLIST::Read &edgeReqRead = edgeRequestTable->syncToRead(edgeRequestTableAcc);
         //printf("[%d] All edge requests made \n",myRank);
 
-        MSA1DREPLYLIST *replyTable;
         if (myRank == 0) {
-            replyTable = new MSA1DREPLYLIST(numChunks,numChunks);
+            edgeReplyTable = new MSA1DREPLYLIST(numChunks,numChunks);
         } else {
-            replyTable = new MSA1DREPLYLIST;
+            edgeReplyTable = new MSA1DREPLYLIST;
         }
-        MPI_Bcast_pup(*replyTable,0,MPI_COMM_WORLD);
-        replyTable->enroll(numChunks);
-        replyTable->sync();
+        MPI_Bcast_pup(*edgeReplyTable,0,MPI_COMM_WORLD);
+        edgeReplyTable->enroll(numChunks);
+       MSA1DREPLYLIST::Accum &edgeReplyAcc = edgeReplyTable->getInitialAccum();
 
         replyAdjacencyRequests(
-                requestTable,
-                replyTable,
+                edgeReqRead.get(myRank).vec,
+                edgeReplyAcc,
                 node,
                 edgeTable,
                 adaptFaceAdjacencies,
@@ -385,12 +386,12 @@ void CreateAdaptAdjacencies(int meshid, int elemType)
                 elemType,
                 true);
 
-        requestTable->sync();
-        replyTable->sync();
+        edgeRequestTable->syncToRead(edgeReqRead);
+        edgeReplyTable->syncToRead(edgeReplyAcc);
 
 //        // Once the replies are back, loop through each reply and update the
 //        // adjacencies for each element in the reply
-//        CkVec<adjReply> *receivedReplyVec = replyTable->get(myRank).vec;
+//        CkVec<adjReply> *receivedReplyVec = edgeReplyTable->get(myRank).vec;
 //        for(int i=0;i< receivedReplyVec->size();i++){
 //            adjReply *receivedReply = &(*receivedReplyVec)[i];
 //            printf("[%d] Replies received for (%d,%d) (%d,%d,%d)\n",
@@ -403,10 +404,10 @@ void CreateAdaptAdjacencies(int meshid, int elemType)
 //                receivedReply->requestingNodeSetID]->push_back(
 //                        receivedReply->replyingElem);
 //        }
-        replyTable->sync();
+//        edgeReplyTable->sync();
 
-        delete requestTable;
-        delete replyTable;
+        delete edgeRequestTable;
+        delete edgeReplyTable;
     }
 
     for (int i=0; i<numNodes; ++i) {
@@ -575,7 +576,7 @@ void makeAdjacencyRequests(
         const int numNodes,
         FEM_Node *node,
         adjNode *adaptAdjTable,
-        MSA1DREQLIST *requestTable, 
+        MSA1DREQLIST::Accum &requestTable, 
         const int nodeSetSize,
         const int myRank,
         const int elemType)
@@ -682,14 +683,15 @@ void makeAdjacencyRequests(
                                 chunkIterator != commonSharedChunks.end();
                                 chunkIterator++){
                             int chunk = *chunkIterator;
-//                            printf("[%d] Sending to chunk %d request (%d,%d,%d,%d) \n",
-//                                     myRank,
-//                                     chunk,
-//                                     adjRequestList[countChunk].elemID,
-//                                     adjRequestList[countChunk].chunkID,
-//                                     adjRequestList[countChunk].elemType,
-//                                     adjRequestList[countChunk].nodeSetID);
-                            (*requestTable).accumulate(
+#if 0
+                            printf("[%d] Sending to chunk %d request (%d,%d,%d,%d) \n",
+                                  myRank, chunk,
+                                  adjRequestList[countChunk].elemID,
+                                  adjRequestList[countChunk].chunkID,
+                                  adjRequestList[countChunk].elemType,
+                                  adjRequestList[countChunk].nodeSetID);
+#endif
+                            requestTable.accumulate(
                                     chunk,adjRequestList[countChunk]);
                             countChunk++;
                         }
@@ -704,8 +706,8 @@ void makeAdjacencyRequests(
 
 
 void replyAdjacencyRequests(
-        MSA1DREQLIST* requestTable,
-        MSA1DREPLYLISTreplyTable,
+       CkVec<adjRequest> *receivedRequestVec,
+        MSA1DREPLYLIST::Accum &replyTable,
         FEM_Node* node,
         adjNode* adaptAdjTable,
         adaptAdj* adaptFaceAdjacencies,
@@ -727,7 +729,6 @@ void replyAdjacencyRequests(
 
     //Look at each request that in the requestTable for this chunk
     //Put the data for the requests in our own table and then create replies
-    CkVec<adjRequest> *receivedRequestVec = requestTable->get(myRank).vec;
     for (int i=0;i<receivedRequestVec->length();i++) {    
         adjRequest &receivedRequest = (*receivedRequestVec)[i];
         const IDXL_List &sharedNodeList = 
@@ -814,7 +815,7 @@ void replyAdjacencyRequests(
             reply.replyingElem.localID = matchingElemID;
             reply.replyingElem.elemType = elemType;
             //Write into the replyTable
-            replyTable->accumulate(receivedRequest.chunkID,reply);
+            replyTable.accumulate(receivedRequest.chunkID,reply);
         } else {
             //we have no matching nodeset for this request.. hopefully some
             //other chunk does; we can ignore this request
index 6935e81bade2bed4ed1e8566721e465d93ce7a96..8f227807375891937b9acabf5cd66f415153d443 100644 (file)
@@ -436,13 +436,13 @@ void makeAdjacencyRequests(
         const int numNodes, 
         FEM_Node* node, 
         adjNode* adaptAdjTable,
-        MSA1DREQLISTrequestTable, 
+        MSA1DREQLIST::Accum &requestTable, 
         const int nodeSetSize, 
         const int myRank,
         int elemType);
 void replyAdjacencyRequests(
-        MSA1DREQLIST* requestTable
-        MSA1DREPLYLISTreplyTable,
+        CkVec<adjRequest> *requests
+        MSA1DREPLYLIST::Accum &replyTable,
         FEM_Node* node, 
         adjNode* adaptAdjTable, 
         adaptAdj* adaptFaceAdjacencies, 
index a362faf204e504abf71bca4b1a1d63f13ffeef03..43615eadd4589c3de2ac66d2582dfb2cbea93366 100644 (file)
@@ -74,21 +74,23 @@ int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context
 
   eptrMSA.enroll(numChunks);
   eindMSA.enroll(numChunks);
+  MSA1DINT::Write &wPtr = eptrMSA.getInitialWrite();
+  MSA1DINT::Write &wInd = eindMSA.getInitialWrite();
   int indcount=0,ptrcount=0;
   for(int t=0;t<m->elem.size();t++){
     if(m->elem.has(t)){
       FEM_Elem &k=m->elem[t];
       for(int e=0;e<k.size();e++){
-                               eptrMSA.set(ptrcount)=indcount;
+                               wPtr.set(ptrcount)=indcount;
                                ptrcount++;
                                for(int n=0;n<k.getNodesPer();n++){
-                                 eindMSA.set(indcount)=k.getConn(e,n);
+                                 wInd.set(indcount)=k.getConn(e,n);
                                  indcount++;
                                }
       }
     }
   }
-  eptrMSA.set(ptrcount) = indcount;
+  wPtr.set(ptrcount) = indcount;
   printf("master -> ptrcount %d indcount %d sizeof(MSA1DINT) %d sizeof(MSA1DINTLIST) %d memory %d\n",ptrcount,indcount,sizeof(MSA1DINT),sizeof(MSA1DINTLIST),CmiMemoryUsage());
   /*
     break up the mesh such that each chunk gets the same number of elements
@@ -124,10 +126,12 @@ int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context
   MSA1DINTLIST nodepart(totalNodes,numChunks);
   MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
   nodepart.enroll(numChunks);
+  MSA1DINTLIST::Accum &nodepartAcc = nodepart.getInitialAccum();
        
-  FEM_write_nodepart(nodepart,partdata,(MPI_Comm)comm_context);
-       printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
-       dataArrangeStartTime = CkWallTimer();
+  FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
+  printf("Creating mapping of node to partition took %.6lf\n",CkWallTimer()-dataArrangeStartTime);
+  dataArrangeStartTime = CkWallTimer();
+  MSA1DINTLIST::Read &nodepartRead = nodepart.syncToRead(nodepartAcc);
        
   /*
     Set up a msa to store the nodes that belong to a partition
@@ -135,15 +139,17 @@ int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context
   MSA1DNODELIST part2node(numChunks,numChunks);
   MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
   part2node.enroll(numChunks);
+  MSA1DNODELIST::Accum &part2nodeAcc = part2node.getInitialAccum();
 
-  FEM_write_part2node(nodepart,part2node,partdata,(MPI_Comm)comm_context);
+  FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);
 
        
   /*
     Get the list of elements and nodes that belong to this partition
   */
-  NodeList lnodes = part2node.get(masterRank);
-       lnodes.uniquify();
+  MSA1DNODELIST::Read &rPart2node = part2node.syncToRead(part2nodeAcc);
+  NodeList lnodes = rPart2node.get(masterRank);
+  lnodes.uniquify();
 //  IntList lelems = part2elem.get(masterRank);
        
 
@@ -157,11 +163,14 @@ int FEM_master_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context
   MSA1DFEMMESH part2mesh(numChunks,numChunks);
   MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
   part2mesh.enroll(numChunks);
-  FEM_write_part2mesh(part2mesh,partdata, &data,nodepart,numChunks,masterRank,&mypiece);
+  MSA1DFEMMESH::Accum &aPart2mesh = part2mesh.getInitialAccum();
+
+  FEM_write_part2mesh(aPart2mesh,partdata, &data,nodepartRead,numChunks,masterRank,&mypiece);
   /*
     Get your mesh consisting of elements and nodes out of the mesh MSA
   */
-  MeshElem me = part2mesh.get(masterRank);
+  MSA1DFEMMESH::Read &rPart2mesh = part2mesh.syncToRead(aPart2mesh);
+  MeshElem me = rPart2mesh.get(masterRank);
   //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",masterRank,me.m->nElems(),me.m->node.size());
        
   DEBUG(printf("[%d] Memory usage on vp 0 close to max %d \n",CkMyPe(),CmiMemoryUsage()));
@@ -228,7 +237,7 @@ int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context)
   /*Receive the name of the msa arrays that contain the
     connectivity information*/
   struct conndata data;
-  MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);               
+  MPI_Bcast_pup(data,masterRank,(MPI_Comm)comm_context);
   data.arr1.enroll(numChunks);
   data.arr2.enroll(numChunks);
   DEBUG(printf("Recv -> %d \n",data.nelem));
@@ -252,8 +261,9 @@ int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context)
   MSA1DINTLIST nodepart;
   MPI_Bcast_pup(nodepart,masterRank,(MPI_Comm)comm_context);
   nodepart.enroll(numChunks);
+  MSA1DINTLIST::Accum &nodepartAcc = nodepart.getInitialAccum();
        
-  FEM_write_nodepart(nodepart,partdata,(MPI_Comm)comm_context);
+  FEM_write_nodepart(nodepartAcc,partdata,(MPI_Comm)comm_context);
        
   /*
     write to the msa that stores the nodes that belong to each partition
@@ -262,14 +272,18 @@ int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context)
   MSA1DNODELIST part2node;
   MPI_Bcast_pup(part2node,masterRank,(MPI_Comm)comm_context);
   part2node.enroll(numChunks);
-               
-  FEM_write_part2node(nodepart,part2node,partdata,(MPI_Comm)comm_context);
+  MSA1DNODELIST::Accum &part2nodeAcc = part2node.getInitialAccum();
+  MSA1DINTLIST::Read &nodepartRead = nodepart.syncToRead(nodepartAcc);
+
+
+  FEM_write_part2node(nodepartRead, part2nodeAcc, partdata, (MPI_Comm)comm_context);
 
   /*
     Get the list of elements and nodes that belong to this partition
   */
-  NodeList lnodes = part2node.get(myRank);
-       lnodes.uniquify();
+  MSA1DNODELIST::Read &part2nodeRead = part2node.syncToRead(part2nodeAcc);
+  NodeList lnodes = part2nodeRead.get(myRank);
+  lnodes.uniquify();
 //  IntList lelems = part2elem.get(myRank);
 
   /*
@@ -278,12 +292,14 @@ int FEM_slave_parallel_part(int fem_mesh,int masterRank,FEM_Comm_t comm_context)
   MSA1DFEMMESH part2mesh;
   MPI_Bcast_pup(part2mesh,masterRank,(MPI_Comm)comm_context);
   part2mesh.enroll(numChunks);
-  FEM_write_part2mesh(part2mesh,partdata,&data,nodepart,numChunks,myRank,&mypiece);
+  MSA1DFEMMESH::Accum &aPart2mesh = part2mesh.getInitialAccum();
+  FEM_write_part2mesh(aPart2mesh, partdata, &data, nodepartRead,numChunks, myRank, &mypiece);
        
   /*
     Get your mesh consisting of elements and nodes out of the mesh MSA
   */
-  MeshElem me = part2mesh.get(myRank);
+  MSA1DFEMMESH::Read &rPart2mesh = part2mesh.syncToRead(aPart2mesh);
+  MeshElem me = rPart2mesh.get(myRank);
   //printf("[%d] Number of elements in my partitioned mesh %d number of nodes %d \n",myRank,me.m->nElems(),me.m->node.size());
        
        //Free up the eptr and eind MSA arrays stored in data
@@ -415,8 +431,7 @@ struct partconndata * FEM_call_parmetis(struct conndata &data,FEM_Comm_t comm_co
   Write the partition number of the nodes to the msa array nodepart
   A node might belong to more than one partition
 */
-void FEM_write_nodepart(MSA1DINTLIST   &nodepart,struct partconndata *data,MPI_Comm comm_context){
-  nodepart.sync();
+void FEM_write_nodepart(MSA1DINTLIST::Accum &nodepart,struct partconndata *data,MPI_Comm comm_context){
   for(int i=0;i<data->nelem;i++){
     int start=data->eptr[i];
     int end = data->eptr[i+1];
@@ -425,15 +440,17 @@ void FEM_write_nodepart(MSA1DINTLIST      &nodepart,struct partconndata *data,MPI_Com
        DEBUG(printf(" write_nodepart %d %d \n",data->eind[j],data->part[i]));
     }
   }
-  nodepart.sync();
-  
 }
 
 /*
   Read the msa array written in FEM_write_nodepart and for each node
   write it to the msa array containing the nodes for each partition
 */
-void FEM_write_part2node(MSA1DINTLIST  &nodepart,MSA1DNODELIST &part2node,struct partconndata *data,MPI_Comm comm_context){
+void FEM_write_part2node(MSA1DINTLIST::Read &nodepart,
+                        MSA1DNODELIST::Accum &part2node,
+                        struct partconndata *data,
+                        MPI_Comm comm_context)
+{
   int nodes = nodepart.length();
   int myRank,numChunks;
   /*
@@ -444,7 +461,6 @@ void FEM_write_part2node(MSA1DINTLIST       &nodepart,MSA1DNODELIST &part2node,struct
   MPI_Comm_size(comm_context,&numChunks);
   int start = (nodes*myRank)/numChunks;
   int end = (nodes*(myRank+1))/numChunks;
-  part2node.sync();
   for(int i=start;i<end;i++){
     IntList t = nodepart.get(i);
                t.uniquify();
@@ -464,20 +480,19 @@ void FEM_write_part2node(MSA1DINTLIST     &nodepart,MSA1DNODELIST &part2node,struct
       part2node.accumulate((*t.vec)[j],en);
     }
   }
-  part2node.sync();
   DEBUG(printf("done write_part2node\n"));
 }
 
 /*
   Read the element partition data and write it to the msa
 */
-void FEM_write_part2elem(MSA1DINTLIST &part2elem,struct partconndata *data,MPI_Comm comm_context){
-  part2elem.sync();
+void FEM_write_part2elem(MSA1DINTLIST::Accum &part2elem,struct partconndata *data,MPI_Comm comm_context)
+{
   for(int i=0;i<data->nelem;i++){
     part2elem.accumulate(data->part[i],data->startindex+i);
   }
-  part2elem.sync();
 }
+
 /*
   Break the mesh up into numChunks pieces randomly.
   Pass  nEl/numChunks elements to each 
@@ -534,8 +549,15 @@ void sendBrokenMeshes(FEM_Mesh *mesh_array,FEM_Comm_t comm_context){
     MPI_Send_pup(mesh_array[i],i,MESH_CHUNK_TAG,(MPI_Comm)comm_context);
   }
 }
-void   FEM_write_part2mesh(MSA1DFEMMESH &part2mesh,struct partconndata *partdata,struct conndata *data,MSA1DINTLIST &nodepart,int numChunks,int myChunk,FEM_Mesh *m){
-  part2mesh.sync();
+
+void FEM_write_part2mesh(MSA1DFEMMESH::Accum &part2mesh,
+                        struct partconndata *partdata,
+                        struct conndata *data,
+                        MSA1DINTLIST::Read &nodepart,
+                        int numChunks,
+                        int myChunk,
+                        FEM_Mesh *m)
+{
   int count=0;
   /// reading my part of the broken mesh and  sending the element data to the mesh 
   /// that actually should have it according to parmetis
@@ -551,7 +573,6 @@ void        FEM_write_part2mesh(MSA1DFEMMESH &part2mesh,struct partconndata *partdata,s
       }
     }
   }
-  nodepart.sync();
   /// send out the nodes that I have the data for to the meshes that have them
   int startnode=(myChunk * data->nnode)/numChunks;
   for(int i=0;i<m->node.size();i++){
@@ -563,8 +584,8 @@ void        FEM_write_part2mesh(MSA1DFEMMESH &part2mesh,struct partconndata *partdata,s
       (myme.m->node).push_back(m->node,i);
     }
   }
-  part2mesh.sync();
 }
+
 /*
   horrible bubble sort, replace by quicksort : done
 */
@@ -690,7 +711,8 @@ struct ghostdata *gatherGhosts(){
 double listSearchTime=0;
 double sharedSearchTime=0;
 
-void makeGhosts(FEM_Mesh *m,MPI_Comm comm,int masterRank,int numLayers,FEM_Ghost_Layer **layers){
+void makeGhosts(FEM_Mesh *m, MPI_Comm comm, int masterRank, int numLayers, FEM_Ghost_Layer **layers)
+{
   int myChunk;
   int numChunks;
   MPI_Comm_rank((MPI_Comm)comm,&myChunk);
@@ -796,7 +818,14 @@ bool listContains(FEM_Comm_List &list,int entry){
   return false;
 };
 
-void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghost_Layer *layer,        CkHashtableT<CkHashtableAdaptorT<int>,char> &sharedNode,CkHashtableT<CkHashtableAdaptorT<int>,int> &global2local){
+void makeGhost(FEM_Mesh *m, 
+              MPI_Comm comm,
+              int masterRank,
+              int totalShared,
+              FEM_Ghost_Layer *layer,
+              CkHashtableT<CkHashtableAdaptorT<int>,char> &sharedNode,
+              CkHashtableT<CkHashtableAdaptorT<int>,int> &global2local)
+{
   int myChunk;
   int numChunks;
   MPI_Comm_rank((MPI_Comm)comm,&myChunk);
@@ -808,18 +837,18 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
   */
   MsaHashtable *distTab;
   if(myChunk == masterRank){
-    distTab = new MsaHashtable(totalShared,numChunks); 
+    distTab = new MsaHashtable(totalShared, numChunks);        
   }else{
     distTab = new MsaHashtable;
   }
   MPI_Bcast_pup(*distTab,masterRank,comm);
-  distTab->table.enroll(numChunks);
+  distTab->enroll(numChunks);
   DEBUG(printf("[%d] distributed table calling sync \n",myChunk));
 
 
   //   distTab->table.sync((numChunks == 1));
-  distTab->table.sync();
-       
+  MsaHashtable::Add &aDistTab = distTab->getInitialAdd();
+
   DEBUG(printf("Chunk %d Mesh: *********************************** \n",myChunk));
   //DEBUG(m->print(0));
   DEBUG(printf("**********************************\n"));
@@ -872,7 +901,7 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
            }
            // if the tuple is a possible ghost add it to the distributed hashtable
            if(possibleGhost){
-             int index=distTab->addTuple(globalNodeTuple,nodesPerTuple,myChunk,m->nElems(i)+e);
+             int index = aDistTab.addTuple(globalNodeTuple,nodesPerTuple,myChunk,m->nElems(i)+e);
              tupleVec.push_back(Hashnode::tupledata(globalNodeTuple));
              indexVec.push_back(index);
              elementVec.push_back(i);
@@ -915,23 +944,21 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
              }
            }
            //all the tuples of a ghost element are possible generators of ghosts
-           distTab->addTuple(globalNodeTuple,nodesPerTuple,myChunk,ghostcount);
+           aDistTab.addTuple(globalNodeTuple,nodesPerTuple,myChunk,ghostcount);
          }
        }
       }
     }
   }
-  distTab->table.sync();
+  MsaHashtable::Read &rDistTab = distTab->syncToRead(aDistTab);
 
 
   //debug - print the whole table
   /*   printf("Ghosts chunk %d \n",myChunk);*/
   if(myChunk == masterRank){
-    DEBUG(distTab->print());
+    DEBUG(rDistTab.print());
   }
 
-  distTab->sync();
-
   DEBUG(printf("[%d] id %d says Ghost distributed hashtable printed \n",CkMyPe(),myChunk));
   /* create a new FEM_Mesh msa to transfer the ghost elements from the original mesh to target meshes */
   MSA1DFEMMESH *ghostmeshes;
@@ -944,7 +971,7 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
   ghostmeshes->enroll(numChunks);
   DEBUG(printf("[%d] id %d says ghostmeshes enroll done \n",CkMyPe(),myChunk));
 
-  ghostmeshes->sync();
+  MSA1DFEMMESH::Accum &aGhostMeshes = ghostmeshes->getInitialAccum();
 
   DEBUG(printf("[%d] id %d says ghostmeshes sync done \n",CkMyPe(),myChunk));
   /*
@@ -955,7 +982,7 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
   */
   char str[100];
   for(int i=0;i<tupleVec.size();i++){
-    const Hashtuple &listTuple = distTab->get(indexVec[i]);
+    const Hashtuple &listTuple = rDistTab.get(indexVec[i]);
     //         printf("[%d] Elements for index %d tuple< %s> number %d \n",myChunk,indexVec[i],tupleVec[i].toString(layer->nodesPerTuple,str),listTuple.vec->size());
     int elType = elementVec[2*i];
     int elNo = elementVec[2*i+1];
@@ -987,7 +1014,7 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
 
                                        
          //add an element to the ghost mesh for this chunk
-         MeshElem &myme = ghostmeshes->accumulate(destChunk);
+         MeshElem &myme = aGhostMeshes.accumulate(destChunk);
          myme.m->elem[elType].copyShape(m->elem[elType]);
          int index=myme.m->elem[elType].push_back(m->elem[elType],elNo);
          int globalelem = m->elem[elType].getGlobalno(elNo);
@@ -1044,7 +1071,7 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
 
   DEBUG(printf("[%d] finished creating ghost mesh \n",myChunk));
 
-  ghostmeshes->sync();
+  MSA1DFEMMESH::Read& rGhostMeshes = ghostmeshes->syncToRead(aGhostMeshes);
 
   /*
     Go through the ghost nodes and check for nodes that dont exist in the hashtable
@@ -1053,7 +1080,7 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
     ghost nodes.
   */   
 
-  FEM_Mesh *gmesh = ghostmeshes->get(myChunk).m;
+  FEM_Mesh *gmesh = rGhostMeshes.get(myChunk).m;
   DEBUG(printf("[%d] my ghost mesh is at %p \n",myChunk,gmesh));
        
   FEM_Node *gnodes = (FEM_Node *)m->node.getGhost();
@@ -1132,8 +1159,9 @@ void makeGhost(FEM_Mesh *m,MPI_Comm comm,int masterRank,int totalShared,FEM_Ghos
   DEBUG(printf("[%d] Recv ghost nodes \n",myChunk));
   DEBUG(m->node.getGhostRecv().print());
 
-       
-  delete distTab;      
+  delete &rDistTab;
+  delete &rGhostMeshes;
+  delete distTab;
   delete ghostmeshes;
   MPI_Barrier(comm);
 }
@@ -1160,3 +1188,4 @@ bool sharedWith(int lnode,int chunk,FEM_Mesh *m){
 }
 #include "ParFUM.def.h"
 /*@}*/
+
index 5d0b649e9fe8a3508357b92688f7abe3bc4cc973..5379014a18640a35442cc09348bc2a6611e11639 100644 (file)
@@ -1,9 +1,12 @@
-// emacs mode line -*- mode: c++; tab-width: 4 -*-
+// emacs mode line -*- mode: c++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
 #ifndef MSA_DISTARRAY_H
 #define MSA_DISTARRAY_H
 
 #include "msa-DistPageMgr.h"
 
+
+struct MSA_InvalidHandle { };
+
 /**
    The MSA1D class is a handle to a distributed shared array of items
    of data type ENTRY. There are nEntries total numer of ENTRY's, with
@@ -31,10 +34,108 @@ public:
     typedef MSA_CacheGroup<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE> CacheGroup_t;
     typedef CProxy_MSA_CacheGroup<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE> CProxy_CacheGroup_t;
     typedef CProxy_MSA_PageArray<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE> CProxy_PageArray_t;
+    typedef MSA1D<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE> curMSA1D;
+
+    // Forward-declare so that things returning them can be friended
+    class Read; class Write; class Accum;
+
+       class MSA1D_Handle
+       {
+    public:
+        inline unsigned int length() const { return msa.length(); }
+
+       protected:
+        curMSA1D &msa;
+        bool valid;
+
+        friend Read  &curMSA1D::syncToRead (MSA1D_Handle &m, int single = DEFAULT_SYNC_SINGLE);
+        friend Write &curMSA1D::syncToWrite(MSA1D_Handle &m, int single = DEFAULT_SYNC_SINGLE);
+        friend Accum &curMSA1D::syncToAccum(MSA1D_Handle &m, int single = DEFAULT_SYNC_SINGLE);
+        void inline checkInvalidate(curMSA1D *m) 
+        {
+            if (m != &msa || !valid)
+                throw MSA_InvalidHandle();
+            valid = false;
+        }
+
+        MSA1D_Handle(curMSA1D &msa_) 
+            : msa(msa_), valid(true) 
+        { }
+        void checkValid()
+        {
+            if (!valid)
+                throw MSA_InvalidHandle();
+        }
+
+    private:
+        // Disallow copy construction
+        MSA1D_Handle(MSA1D_Handle &m) {}
+    };
+
+    class Read : public MSA1D_Handle
+    {
+    protected:
+        friend Read &curMSA1D::syncToRead(MSA1D_Handle &, int);
+        Read(curMSA1D &msa_)
+            :  MSA1D_Handle(msa_) { }
+        using MSA1D_Handle::checkValid;
+        using MSA1D_Handle::checkInvalidate;
+
+    public:
+        inline const ENTRY& get(unsigned int idx)
+        {
+            checkValid();
+            return MSA1D_Handle::msa.get(idx); 
+        }
+        inline const ENTRY& operator[](unsigned int idx) { return get(idx); }
+        inline const ENTRY& get2(unsigned int idx)
+        {
+            checkValid();
+            return MSA1D_Handle::msa.get2(idx);
+        }
+    };
+
+    class Write : public MSA1D_Handle
+    {
+    protected:
+        friend Write &curMSA1D::syncToWrite(MSA1D_Handle &, int);
+        friend Write &curMSA1D::getInitialWrite();
+        Write(curMSA1D &msa_)
+            : MSA1D_Handle(msa_) { }
+
+    public:
+        inline ENTRY& set(unsigned int idx)
+        {
+            MSA1D_Handle::checkValid();
+            return MSA1D_Handle::msa.set(idx);
+        }
+    };
+
+    class Accum : public MSA1D_Handle
+    {
+    protected:
+        friend Accum &curMSA1D::syncToAccum(MSA1D_Handle &, int);
+        friend Accum &curMSA1D::getInitialAccum();
+        Accum(curMSA1D &msa_)
+            : MSA1D_Handle(msa_) { }
+        using MSA1D_Handle::checkInvalidate;
+    public:
+        inline ENTRY& accumulate(unsigned int idx)
+        { 
+            MSA1D_Handle::checkValid();
+            return MSA1D_Handle::msa.accumulate(idx);
+        }
+        inline void accumulate(unsigned int idx, const ENTRY& ent)
+        {
+            MSA1D_Handle::checkValid();
+            MSA1D_Handle::msa.accumulate(idx, ent);
+        }
+    };
 
 protected:
     /// Total number of ENTRY's in the whole array.
     unsigned int nEntries;
+    bool initHandleGiven;
 
     /// Handle to owner of cache.
     CacheGroup_t* cache;
@@ -71,7 +172,9 @@ public:
       Create a completely new MSA array.  This call creates the
       corresponding groups, so only call it once per array.
     */
-    inline MSA1D(unsigned int nEntries_, unsigned int num_wrkrs, unsigned int maxBytes=MSA_DEFAULT_MAX_BYTES) : nEntries(nEntries_)
+    inline MSA1D(unsigned int nEntries_, unsigned int num_wrkrs, 
+                 unsigned int maxBytes=MSA_DEFAULT_MAX_BYTES) 
+        : nEntries(nEntries_), initHandleGiven(false)
     {
         // first create the Page Array and the Page Group
         unsigned int nPages = (nEntries + ENTRIES_PER_PAGE - 1)/ENTRIES_PER_PAGE;
@@ -82,8 +185,8 @@ public:
         cache = cg.ckLocalBranch();
     }
 
-// Depricated API for accessing CacheGroup directly.
-    inline MSA1D(CProxy_CacheGroup_t cg_) : cg(cg_)
+    // Deprecated API for accessing CacheGroup directly.
+    inline MSA1D(CProxy_CacheGroup_t cg_) : cg(cg_), initHandleGiven(false)
     {
         cache = cg.ckLocalBranch();
         nEntries = cache->getNumEntries();
@@ -149,20 +252,6 @@ public:
         cache->enroll(num_workers);
     }
 
-    /// Return a read-only copy of the element at idx.
-    ///   May block if the element is not already in the cache.
-    inline const ENTRY& get(unsigned int idx)
-    {
-        unsigned int page = idx / ENTRIES_PER_PAGE;
-        unsigned int offset = idx % ENTRIES_PER_PAGE;
-        return readablePage(page)[offset];
-    }
-
-    inline const ENTRY& operator[](unsigned int idx)
-    {
-        return get(idx);
-    }
-
     // idx is the element to be read/written
     //
     // This function returns a reference to the first element on the
@@ -181,6 +270,105 @@ public:
         }
     }
 
+    inline void FreeMem()
+    {
+        cache->FreeMem();
+    }
+
+    /// Non-blocking prefetch of entries from start to end, inclusive.
+    /// Prefetch'd pages are locked into the cache, so you must call
+    ///   unlock afterwards.
+    inline void Prefetch(unsigned int start, unsigned int end)
+    {
+        unsigned int page1 = start / ENTRIES_PER_PAGE;
+        unsigned int page2 = end / ENTRIES_PER_PAGE;
+        cache->Prefetch(page1, page2);
+    }
+
+    /// Block until all prefetched pages arrive.
+    inline int WaitAll()    { return cache->WaitAll(); }
+
+    /// Unlock all locked pages
+    inline void Unlock()    { return cache->UnlockPages(); }
+
+    /// start and end are element indexes.
+    /// Unlocks completely spanned pages given a range of elements
+    /// index'd from "start" to "end", inclusive.  If start/end does not span a
+    /// page completely, i.e. start/end is in the middle of a page,
+    /// the entire page is still unlocked--in particular, this means
+    /// you should not have several adjacent ranges locked.
+    inline void Unlock(unsigned int start, unsigned int end)
+    {
+        unsigned int page1 = start / ENTRIES_PER_PAGE;
+        unsigned int page2 = end / ENTRIES_PER_PAGE;
+        cache->UnlockPages(page1, page2);
+    }
+
+    static const int DEFAULT_SYNC_SINGLE = 0;
+
+    inline Read &syncToRead(MSA1D_Handle &m, int single = DEFAULT_SYNC_SINGLE)
+    {
+        m.checkInvalidate(this);
+        delete &m;
+        sync(single);
+        return *(new Read(*this));
+    }
+
+    inline Write &syncToWrite(MSA1D_Handle &m, int single = DEFAULT_SYNC_SINGLE)
+    {
+        m.checkInvalidate(this);
+        delete &m;
+        sync(single);
+        return *(new Write(*this));
+    }
+
+    inline Accum &syncToAccum(MSA1D_Handle &m, int single = DEFAULT_SYNC_SINGLE)
+    {
+        m.checkInvalidate(this);
+        delete &m;
+        sync(single);
+        return *(new Accum(*this));
+    }
+
+    inline Write &getInitialWrite()
+    {
+        if (initHandleGiven)
+            throw MSA_InvalidHandle();
+
+        Write *w = new Write(*this);
+        sync();
+        initHandleGiven = true;
+        return *w;
+    }
+
+    inline Accum &getInitialAccum()
+    {
+        if (initHandleGiven)
+            throw MSA_InvalidHandle();
+
+        Accum *a = new Accum(*this);
+        sync();
+        initHandleGiven = true;
+        return *a;
+    }
+
+  // These are the meat of the MSA API, but they are only accessible
+  // through appropriate handles (defined in the public section above).
+protected:
+    /// Return a read-only copy of the element at idx.
+    ///   May block if the element is not already in the cache.
+    inline const ENTRY& get(unsigned int idx)
+    {
+        unsigned int page = idx / ENTRIES_PER_PAGE;
+        unsigned int offset = idx % ENTRIES_PER_PAGE;
+        return readablePage(page)[offset];
+    }
+
+    inline const ENTRY& operator[](unsigned int idx)
+    {
+        return get(idx);
+    }
+
     /// Return a read-only copy of the element at idx;
     ///   ONLY WORKS WHEN ELEMENT IS ALREADY IN THE CACHE--
     ///   WILL SEGFAULT IF ELEMENT NOT ALREADY PRESENT.
@@ -203,11 +391,6 @@ public:
         return e[offset];
     }
 
-    /// Synchronize reads and writes across the entire array.
-    inline void sync(int single=0) { 
-         cache->SyncReq(single); 
-       }
-
     /// Fetch the ENTRY at idx to be accumulated.
     ///   You must perform the accumulation on 
     ///     the return value before calling "sync".
@@ -226,40 +409,11 @@ public:
     {
         accumulate(idx)+=ent;
     }
-    
-
-    inline void FreeMem()
-    {
-        cache->FreeMem();
-    }
-
-    /// Non-blocking prefetch of entries from start to end, inclusive.
-    /// Prefetch'd pages are locked into the cache, so you must call
-    ///   unlock afterwards.
-    inline void Prefetch(unsigned int start, unsigned int end)
-    {
-        unsigned int page1 = start / ENTRIES_PER_PAGE;
-        unsigned int page2 = end / ENTRIES_PER_PAGE;
-        cache->Prefetch(page1, page2);
-    }
-
-    /// Block until all prefetched pages arrive.
-    inline int WaitAll()    { return cache->WaitAll(); }
-
-    /// Unlock all locked pages
-    inline void Unlock()    { return cache->UnlockPages(); }
 
-    /// start and end are element indexes.
-    /// Unlocks completely spanned pages given a range of elements
-    /// index'd from "start" to "end", inclusive.  If start/end does not span a
-    /// page completely, i.e. start/end is in the middle of a page,
-    /// the entire page is still unlocked--in particular, this means
-    /// you should not have several adjacent ranges locked.
-    inline void Unlock(unsigned int start, unsigned int end)
+    /// Synchronize reads and writes across the entire array.
+    inline void sync(int single=0)
     {
-        unsigned int page1 = start / ENTRIES_PER_PAGE;
-        unsigned int page2 = end / ENTRIES_PER_PAGE;
-        cache->UnlockPages(page1, page2);
+        cache->SyncReq(single); 
     }
 };
 
@@ -272,6 +426,7 @@ class MSA2D : public MSA1D<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE>
 public:
     typedef CProxy_MSA_CacheGroup<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE> CProxy_CacheGroup_t;
     typedef MSA1D<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE> super;
+    typedef MSA2D<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE, ARRAY_LAYOUT> curMSA2D;
 
 protected:
     unsigned int rows, cols;
@@ -284,6 +439,73 @@ public:
        p|rows; p|cols;
     };
 
+    class Read; class Write; class Accum;
+
+       class MSA2D_Handle
+       {
+       protected:
+        curMSA2D &msa;
+        bool valid;
+
+        friend Read  &curMSA2D::syncToRead (MSA2D_Handle&, int);
+        friend Write &curMSA2D::syncToWrite(MSA2D_Handle&, int);
+        friend Accum &curMSA2D::syncToAccum(MSA2D_Handle&, int);
+        inline void checkInvalidate(curMSA2D *m)
+        {
+            if (&msa != m || !valid)
+                throw MSA_InvalidHandle();
+            valid = false;
+        }
+
+        MSA2D_Handle(curMSA2D &msa_) 
+            : msa(msa_), valid(true) 
+        { }
+        inline void checkValid()
+        {
+            if (!valid)
+                throw MSA_InvalidHandle();
+        }
+    private:
+        // Disallow copy construction
+        MSA2D_Handle(MSA2D_Handle &m) {}
+    };
+
+    class Read : public MSA2D_Handle
+    {
+    private:
+        friend Read &curMSA2D::syncToRead(MSA2D_Handle &, int);
+        Read(curMSA2D &msa_)
+            :  MSA2D_Handle(msa_) { }
+
+    public: 
+        inline const ENTRY& get(unsigned int row, unsigned int col)
+        {
+            MSA2D_Handle::checkValid();
+            return MSA2D_Handle::msa.get(row, col);
+        }
+        inline const ENTRY& get2(unsigned int row, unsigned int col)
+        {
+            MSA2D_Handle::checkValid();
+            return MSA2D_Handle::msa.get2(row, col);
+        }
+    };
+
+    class Write : public MSA2D_Handle
+    {
+    private:
+        friend Write &curMSA2D::syncToWrite(MSA2D_Handle &, int);
+        friend Write &curMSA2D::getInitialWrite();
+       Write(curMSA2D &msa_)
+            :  MSA2D_Handle(msa_) { }
+
+    public: 
+        inline ENTRY& set(unsigned int row, unsigned int col)
+        {
+            MSA2D_Handle::checkValid();
+            return MSA2D_Handle::msa.set(row, col);
+        }
+    };
+
     inline MSA2D(unsigned int rows_, unsigned int cols_, unsigned int numwrkrs,
                  unsigned int maxBytes=MSA_DEFAULT_MAX_BYTES)
         :super(rows_*cols_, numwrkrs, maxBytes)
@@ -324,23 +546,6 @@ public:
     inline unsigned int getColumns(void) const {return cols;}
     inline MSA_Array_Layout_t getArrayLayout() const {return ARRAY_LAYOUT;}
 
-    inline const ENTRY& get(unsigned int row, unsigned int col)
-    {
-        return super::get(getIndex(row, col));
-    }
-
-    // known local
-    inline const ENTRY& get2(unsigned int row, unsigned int col)
-    {
-        return super::get2(getIndex(row, col));
-    }
-
-    // MSA2D::
-    inline ENTRY& set(unsigned int row, unsigned int col)
-    {
-        return super::set(getIndex(row, col));
-    }
-
     inline void Prefetch(unsigned int start, unsigned int end)
     {
         // prefetch the start ... end rows/columns into the cache
@@ -372,6 +577,50 @@ public:
 
         MSA1D<ENTRY, ENTRY_OPS_CLASS, ENTRIES_PER_PAGE>::Unlock(index1, index2);
     }
+
+    inline Read& syncToRead(MSA2D_Handle &m, int single = super::DEFAULT_SYNC_SINGLE)
+    {
+        m.checkInvalidate(this);
+        delete &m;
+        super::sync(single);
+        return *(new Read(*this));
+    }
+
+    inline Write& syncToWrite(MSA2D_Handle &m, int single = super::DEFAULT_SYNC_SINGLE)
+    {
+        m.checkInvalidate(this);
+        delete &m;
+        super::sync(single);
+        return *(new Write(*this));
+    }
+
+    inline Write& getInitialWrite()
+    {
+        if (super::initHandleGiven)
+            throw MSA_InvalidHandle();
+
+        Write *w = new Write(*this);
+        super::initHandleGiven = true;
+        return *w;
+    }
+
+protected:
+    inline const ENTRY& get(unsigned int row, unsigned int col)
+    {
+        return super::get(getIndex(row, col));
+    }
+
+    // known local
+    inline const ENTRY& get2(unsigned int row, unsigned int col)
+    {
+        return super::get2(getIndex(row, col));
+    }
+
+    // MSA2D::
+    inline ENTRY& set(unsigned int row, unsigned int col)
+    {
+        return super::set(getIndex(row, col));
+    }
 };
 
 #endif