added parallel PME sending
authorYanhua Sun <sun51@hopper09.(none)>
Sun, 11 Sep 2011 17:14:33 +0000 (10:14 -0700)
committerYanhua Sun <sun51@hopper09.(none)>
Sun, 11 Sep 2011 17:14:33 +0000 (10:14 -0700)
examples/charm++/PMEMimic/PMEMimic/Makefile [new file with mode: 0644]
examples/charm++/PMEMimic/PMEMimic/PMEMimic.C [new file with mode: 0644]
examples/charm++/PMEMimic/PMEMimic/PMEMimic.ci [new file with mode: 0644]
examples/charm++/PMEMimic/PMEMimic_Parallel/Makefile [new file with mode: 0644]
examples/charm++/PMEMimic/PMEMimic_Parallel/PMEMimic.C [new file with mode: 0644]
examples/charm++/PMEMimic/PMEMimic_Parallel/PMEMimic.ci [new file with mode: 0644]

diff --git a/examples/charm++/PMEMimic/PMEMimic/Makefile b/examples/charm++/PMEMimic/PMEMimic/Makefile
new file mode 100644 (file)
index 0000000..b694cdf
--- /dev/null
@@ -0,0 +1,26 @@
+CHARMC=../../../../bin/charmc $(OPTS)
+
+OBJS = PMEMimic.o
+
+all: pgm pgm.prj
+
+pgm: $(OBJS)
+       $(CHARMC) -language charm++ -o pgm $(OBJS)
+
+pgm.prj: $(OBJS)
+       $(CHARMC) -language charm++ -tracemode projections -o pgm.prj $(OBJS)
+
+PMEMimic.decl.h: PMEMimic.ci
+       $(CHARMC)  PMEMimic.ci
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o PMEMimic charmrun
+
+PMEMimic.o: PMEMimic.C PMEMimic.decl.h
+       $(CHARMC) -c PMEMimic.C
+
+test: all
+       ./charmrun +p12 pgm 3 2 2 $(TESTOPTS)
+
+bgtest: all
+       ./charmrun pgm +p4 10 +x2 +y2 +z1
diff --git a/examples/charm++/PMEMimic/PMEMimic/PMEMimic.C b/examples/charm++/PMEMimic/PMEMimic/PMEMimic.C
new file mode 100644 (file)
index 0000000..d0d7d48
--- /dev/null
@@ -0,0 +1,388 @@
+#include <stdio.h>
+#include "PMEMimic.decl.h"
+
+/*readonly*/ CProxy_Main mainProxy;
+/*readonly*/ 
+
+int     N;
+int     grid_x;
+int     grid_y;
+int     grid_z;
+int     max_iter;
+int     pes_per_node;
+
+CProxy_PMEPencil_X pme_x;
+CProxy_PMEPencil_Y pme_y;
+CProxy_PMEPencil_Z pme_z;
+
+class DataMsg : public CMessage_DataMsg
+{
+public:
+    int phrase;
+    char data[2048];
+
+};
+
+class PMEMap : public CkArrayMap 
+{ 
+    int offset;
+public: 
+    PMEMap(int off) { offset = off;} 
+    PMEMap(CkMigrateMessage *m){} 
+    int registerArray(CkArrayIndex& numElements,CkArrayID aid) { 
+        return 0; 
+    } 
+    int procNum(int /*arrayHdl*/,const CkArrayIndex &idx) { 
+        int penum;
+        int *index =  (int *)idx.data();
+        int obj_index =  index[0]*grid_x + index[1];
+        penum = obj_index * pes_per_node + offset;
+        return penum; 
+    } 
+}; 
+
+
+/*mainchare*/
+class Main : public CBase_Main
+{
+    double startTimer;
+    int done_pme, iteration;
+public:
+
+    Main(CkArgMsg* m)
+    {
+        //Process command-line arguments
+        grid_x = grid_y = grid_z = 10;
+        max_iter = 100;
+        pes_per_node = 3;
+        if(m->argc > 1)
+        {
+            pes_per_node = atoi(m->argv[1]);;
+            grid_x = grid_y = grid_z = atoi(m->argv[2]);
+            max_iter = atoi(m->argv[3]);
+        }
+        delete m;
+
+    //Start the computation
+      CkPrintf("Running PMEMimic on %d processors for %d elements\n",
+          CkNumPes(), grid_x);
+      mainProxy = thisProxy;
+
+      CProxy_PMEMap myMap_x=CProxy_PMEMap::ckNew(0); 
+      CkArrayOptions opts_x(grid_y, grid_z); 
+      opts_x.setMap(myMap_x);
+
+      CProxy_PMEMap myMap_y=CProxy_PMEMap::ckNew(1); 
+      CkArrayOptions opts_y(grid_x, grid_z); 
+      opts_y.setMap(myMap_y);
+
+      CProxy_PMEMap myMap_z=CProxy_PMEMap::ckNew(2); 
+      CkArrayOptions opts_z(grid_x, grid_y); 
+      opts_z.setMap(myMap_z);
+
+      pme_x = CProxy_PMEPencil_X::ckNew(0, opts_x);
+      pme_y = CProxy_PMEPencil_Y::ckNew(1, opts_y);
+      pme_z = CProxy_PMEPencil_Z::ckNew(2, opts_z);
+
+      done_pme=0;
+      startTimer = CmiWallTimer();
+      pme_x.start();
+      
+    };
+
+    void done()
+    {
+        done_pme++;
+        if(done_pme == grid_x*grid_x)
+        {
+            done_pme = 0;
+
+            CkPrintf("PME(%d, %d, %d) on %d PEs, %d iteration, avg time:%f(ms)\n", grid_x, grid_y, grid_z, CkNumPes(), max_iter, (CmiWallTimer()-startTimer)/max_iter*1000);
+            CkExit();
+        }
+    }
+};
+
+/*array [1D]*/
+class PMEPencil_X : public CBase_PMEPencil_X
+{
+    int PME_index;
+    int buffered_num, buffered_phrase;
+    int recv_nums, iteration;
+public:
+  PMEPencil_X(int i)
+  {
+      PME_index = i;
+      recv_nums = 0;
+      iteration = 0;
+      buffered_num = 0;
+  }
+  PMEPencil_X(CkMigrateMessage *m) {}
+
+  void start()
+  {
+   //thisindex.x thisindex.y
+    // x (yz), y(x, z)
+    for(int x=0; x<grid_x; x++)
+    {
+      DataMsg *msg= new DataMsg;
+      msg->phrase = 1;
+      pme_y(x, thisIndex.y).recvTrans(msg);  
+    }
+  }
+  void recvTrans(DataMsg *msg_recv)
+  {
+    int expect_num, index;
+    expect_num = grid_x;
+    index = msg_recv->phrase;
+
+    if(msg_recv->phrase != PME_index)
+    {
+        buffered_num++;
+        buffered_phrase = msg_recv->phrase;
+        delete msg_recv;
+        return;
+    }
+    recv_nums++;
+    if(recv_nums == expect_num)
+    {
+        //CkPrintf("[%d, %d] phrase %d, iter=%d\n", thisIndex.x, thisIndex.y, msg_recv->phrase, iteration);
+        if(index == 0  ) //x (y,z) to y(x,z)
+        {
+            iteration++;
+            if(iteration == max_iter)
+            {
+                mainProxy.done();
+                return;
+            }
+            for(int x=0; x<grid_x; x++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(x, thisIndex.y).recvTrans(msg);  
+            }
+        }else if(index == 1) //y(x,z) send to z(x,y)
+        {
+            for(int y=0; y<grid_y; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_z(thisIndex.x, y).recvTrans(msg); 
+            }
+            PME_index = 3;
+            recv_nums = buffered_num;
+        }else if(index == 2) //Z(x,y) send to y(x,z)
+        {
+            for(int z=0; z<grid_z; z++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(thisIndex.x, z).recvTrans(msg); 
+            }
+        } else if(index == 3) //y(x,z) to x(y,z)
+        {
+            for(int y=0; y<grid_y; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = 0;
+                pme_x(y, thisIndex.y).recvTrans(msg); 
+            }
+            PME_index = 1;
+            recv_nums = buffered_num;
+        }
+        recv_nums = 0;
+    }
+    delete msg_recv;
+  }
+};
+
+/*array [1D]*/
+class PMEPencil_Y : public CBase_PMEPencil_Y
+{
+    int PME_index;
+    int buffered_num, buffered_phrase;
+    int recv_nums, iteration;
+public:
+  PMEPencil_Y(int i)
+  {
+      PME_index = i;
+      recv_nums = 0;
+      iteration = 0;
+      buffered_num = 0;
+  }
+  PMEPencil_Y(CkMigrateMessage *m) {}
+
+  void start()
+  {
+   //thisindex.x thisindex.y
+    // x (yz), y(x, z)
+    for(int x=0; x<grid_x; x++)
+    {
+      DataMsg *msg= new DataMsg;
+      msg->phrase = 1;
+      pme_y(x, thisIndex.y).recvTrans(msg);  
+    }
+  }
+  void recvTrans(DataMsg *msg_recv)
+  {
+    int expect_num, index;
+    expect_num = grid_x;
+    index = msg_recv->phrase;
+
+    if(msg_recv->phrase != PME_index)
+    {
+        buffered_num++;
+        buffered_phrase = msg_recv->phrase;
+        delete msg_recv;
+        return;
+    }
+    recv_nums++;
+    if(recv_nums == expect_num)
+    {
+        //CkPrintf("[%d, %d] phrase %d, iter=%d\n", thisIndex.x, thisIndex.y, msg_recv->phrase, iteration);
+        if(index == 0  ) //x (y,z) to y(x,z)
+        {
+            iteration++;
+            if(iteration == max_iter)
+            {
+                mainProxy.done();
+                return;
+            }
+            for(int x=0; x<grid_x; x++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(x, thisIndex.y).recvTrans(msg);  
+            }
+        }else if(index == 1) //y(x,z) send to z(x,y)
+        {
+            for(int y=0; y<grid_y; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_z(thisIndex.x, y).recvTrans(msg); 
+            }
+            PME_index = 3;
+            recv_nums = buffered_num;
+        }else if(index == 2) //Z(x,y) send to y(x,z)
+        {
+            for(int z=0; z<grid_z; z++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(thisIndex.x, z).recvTrans(msg); 
+            }
+        } else if(index == 3) //y(x,z) to x(y,z)
+        {
+            for(int y=0; y<grid_y; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = 0;
+                pme_x(y, thisIndex.y).recvTrans(msg); 
+            }
+            PME_index = 1;
+            recv_nums = buffered_num;
+        }
+        recv_nums = 0;
+    }
+    delete msg_recv;
+  }
+};
+
+/*array [1D]*/
+class PMEPencil_Z : public CBase_PMEPencil_Z
+{
+    int PME_index;
+    int buffered_num, buffered_phrase;
+    int recv_nums, iteration;
+public:
+  PMEPencil_Z(int i)
+  {
+      PME_index = i;
+      recv_nums = 0;
+      iteration = 0;
+      buffered_num = 0;
+  }
+  PMEPencil_Z(CkMigrateMessage *m) {}
+
+  void start()
+  {
+   //thisindex.x thisindex.y
+    // x (yz), y(x, z)
+    for(int x=0; x<grid_x; x++)
+    {
+      DataMsg *msg= new DataMsg;
+      msg->phrase = 1;
+      pme_y(x, thisIndex.y).recvTrans(msg);  
+    }
+  }
+  void recvTrans(DataMsg *msg_recv)
+  {
+    int expect_num, index;
+    expect_num = grid_x;
+    index = msg_recv->phrase;
+
+    if(msg_recv->phrase != PME_index)
+    {
+        buffered_num++;
+        buffered_phrase = msg_recv->phrase;
+        delete msg_recv;
+        return;
+    }
+    recv_nums++;
+    if(recv_nums == expect_num)
+    {
+        //CkPrintf("[%d, %d] phrase %d, iter=%d\n", thisIndex.x, thisIndex.y, msg_recv->phrase, iteration);
+        if(index == 0  ) //x (y,z) to y(x,z)
+        {
+            iteration++;
+            if(iteration == max_iter)
+            {
+                mainProxy.done();
+                return;
+            }
+            for(int x=0; x<grid_x; x++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(x, thisIndex.y).recvTrans(msg);  
+            }
+        }else if(index == 1) //y(x,z) send to z(x,y)
+        {
+            for(int y=0; y<grid_y; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_z(thisIndex.x, y).recvTrans(msg); 
+            }
+            PME_index = 3;
+            recv_nums = buffered_num;
+        }else if(index == 2) //Z(x,y) send to y(x,z)
+        {
+            for(int z=0; z<grid_z; z++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(thisIndex.x, z).recvTrans(msg); 
+            }
+        } else if(index == 3) //y(x,z) to x(y,z)
+        {
+            for(int y=0; y<grid_y; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = 0;
+                pme_x(y, thisIndex.y).recvTrans(msg); 
+            }
+            PME_index = 1;
+            recv_nums = buffered_num;
+        }
+        recv_nums = 0;
+    }
+    delete msg_recv;
+  }
+};
+
+
+
+#include "PMEMimic.def.h"
diff --git a/examples/charm++/PMEMimic/PMEMimic/PMEMimic.ci b/examples/charm++/PMEMimic/PMEMimic/PMEMimic.ci
new file mode 100644 (file)
index 0000000..58ce796
--- /dev/null
@@ -0,0 +1,42 @@
+mainmodule PMEMimic {
+  readonly CProxy_Main mainProxy;
+
+  readonly int     N;
+  readonly int     grid_x;
+  readonly int     grid_y;
+  readonly int     grid_z;
+  readonly int     pes_per_node;
+  readonly int     max_iter;
+  readonly CProxy_PMEPencil_X pme_x;
+  readonly CProxy_PMEPencil_Y pme_y;
+  readonly CProxy_PMEPencil_Z pme_z;
+
+  group PMEMap : CkArrayMap {
+      entry PMEMap(int); 
+  }
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry void done();
+  };
+
+  message DataMsg;
+
+  array [2D] PMEPencil_X {
+    entry PMEPencil_X(int);
+    entry void start();
+    entry void recvTrans( DataMsg *m);
+  };        
+  array [2D] PMEPencil_Y {
+    entry PMEPencil_Y(int);
+    entry void start();
+    entry void recvTrans( DataMsg *m);
+  }; 
+array [2D] PMEPencil_Z {
+    entry PMEPencil_Z(int);
+    entry void start();
+    entry void recvTrans( DataMsg *m);
+  };  
+
+};
diff --git a/examples/charm++/PMEMimic/PMEMimic_Parallel/Makefile b/examples/charm++/PMEMimic/PMEMimic_Parallel/Makefile
new file mode 100644 (file)
index 0000000..b694cdf
--- /dev/null
@@ -0,0 +1,26 @@
+CHARMC=../../../../bin/charmc $(OPTS)
+
+OBJS = PMEMimic.o
+
+all: pgm pgm.prj
+
+pgm: $(OBJS)
+       $(CHARMC) -language charm++ -o pgm $(OBJS)
+
+pgm.prj: $(OBJS)
+       $(CHARMC) -language charm++ -tracemode projections -o pgm.prj $(OBJS)
+
+PMEMimic.decl.h: PMEMimic.ci
+       $(CHARMC)  PMEMimic.ci
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o PMEMimic charmrun
+
+PMEMimic.o: PMEMimic.C PMEMimic.decl.h
+       $(CHARMC) -c PMEMimic.C
+
+test: all
+       ./charmrun +p12 pgm 3 2 2 $(TESTOPTS)
+
+bgtest: all
+       ./charmrun pgm +p4 10 +x2 +y2 +z1
diff --git a/examples/charm++/PMEMimic/PMEMimic_Parallel/PMEMimic.C b/examples/charm++/PMEMimic/PMEMimic_Parallel/PMEMimic.C
new file mode 100644 (file)
index 0000000..45bd336
--- /dev/null
@@ -0,0 +1,402 @@
+#include <stdio.h>
+#include "PMEMimic.decl.h"
+
+/*readonly*/ CProxy_Main mainProxy;
+/*readonly*/ 
+
+int     N;
+int     grid_x;
+int     grid_y;
+int     grid_z;
+int     max_iter;
+int     pes_per_node;
+int     grain_size;
+CProxy_PMEPencil_X pme_x;
+CProxy_PMEPencil_Y pme_y;
+CProxy_PMEPencil_Z pme_z;
+
+class DataMsg : public CMessage_DataMsg
+{
+public:
+    int phrase;
+    char data[2048];
+
+};
+
+class PMEMap : public CkArrayMap 
+{ 
+    int offset;
+public: 
+    PMEMap(int off) { offset = off;} 
+    PMEMap(CkMigrateMessage *m){} 
+    int registerArray(CkArrayIndex& numElements,CkArrayID aid) { 
+        return 0; 
+    } 
+    int procNum(int /*arrayHdl*/,const CkArrayIndex &idx) { 
+        int penum;
+        int *index =  (int *)idx.data();
+        int node_index =  index[0]*grid_x + index[1];
+        penum = node_index * pes_per_node + index[2];
+        return penum; 
+    } 
+}; 
+
+
+/*mainchare*/
+class Main : public CBase_Main
+{
+    double startTimer;
+    int done_pme, iteration;
+public:
+
+    Main(CkArgMsg* m)
+    {
+        //Process command-line arguments
+        grid_x = grid_y = grid_z = 10;
+        max_iter = 100;
+        pes_per_node = 3;
+        grain_size = grid_x/pes_per_node;
+        if(m->argc > 1)
+        {
+            pes_per_node = atoi(m->argv[1]);;
+            grid_x = grid_y = grid_z = atoi(m->argv[2]);
+            max_iter = atoi(m->argv[3]);
+        }
+        delete m;
+
+    //Start the computation
+      CkPrintf("Running PMEMimic on %d processors for %d elements\n",
+          CkNumPes(), grid_x);
+      mainProxy = thisProxy;
+
+      CProxy_PMEMap myMap_x=CProxy_PMEMap::ckNew(0); 
+      CkArrayOptions opts_x(grid_y, grid_z, pes_per_node); 
+      opts_x.setMap(myMap_x);
+
+      CProxy_PMEMap myMap_y=CProxy_PMEMap::ckNew(1); 
+      CkArrayOptions opts_y(grid_x, grid_z, pes_per_node); 
+      opts_y.setMap(myMap_y);
+
+      CProxy_PMEMap myMap_z=CProxy_PMEMap::ckNew(2); 
+      CkArrayOptions opts_z(grid_x, grid_y, pes_per_node); 
+      opts_z.setMap(myMap_z);
+
+      pme_x = CProxy_PMEPencil_X::ckNew(0, opts_x);
+      pme_y = CProxy_PMEPencil_Y::ckNew(1, opts_y);
+      pme_z = CProxy_PMEPencil_Z::ckNew(2, opts_z);
+
+      done_pme=0;
+      startTimer = CmiWallTimer();
+      pme_x.start();
+      
+    };
+
+    void done()
+    {
+        done_pme++;
+        if(done_pme == grid_x*grid_x)
+        {
+            done_pme = 0;
+
+            CkPrintf("PME(%d, %d, %d) on %d PEs, %d iteration, avg time:%f(ms)\n", grid_x, grid_y, grid_z, CkNumPes(), max_iter, (CmiWallTimer()-startTimer)/max_iter*1000);
+            CkExit();
+        }
+    }
+};
+
+/*array [1D]*/
+class PMEPencil_X : public CBase_PMEPencil_X
+{
+    int PME_index;
+    int buffered_num, buffered_phrase;
+    int recv_nums, iteration;
+public:
+  PMEPencil_X(int i)
+  {
+      PME_index = i;
+      recv_nums = 0;
+      iteration = 0;
+      buffered_num = 0;
+  }
+  PMEPencil_X(CkMigrateMessage *m) {}
+
+  void start()
+  {
+   //thisindex.x thisindex.y
+    // x (yz)(x), y(x, z)(y)
+    int yindex = thisIndex.x/grain_size;
+    for(int x=0; x<grain_size; x++)
+    {
+      DataMsg *msg= new DataMsg;
+      msg->phrase = 1;
+      pme_y(x+thisIndex.z*grain_size, thisIndex.y, yindex ).recvTrans(msg);  
+    }
+  }
+  void recvTrans(DataMsg *msg_recv)
+  {
+    int expect_num, index;
+    expect_num = grid_x;
+    index = msg_recv->phrase;
+
+    if(msg_recv->phrase != PME_index)
+    {
+        buffered_num++;
+        buffered_phrase = msg_recv->phrase;
+        delete msg_recv;
+        return;
+    }
+    recv_nums++;
+    if(recv_nums == expect_num)
+    {
+        //CkPrintf("[%d, %d] phrase %d, iter=%d\n", thisIndex.x, thisIndex.y, msg_recv->phrase, iteration);
+        if(index == 0  ) //x (y,z) to y(x,z)
+        {
+            iteration++;
+            if(iteration == max_iter)
+            {
+                mainProxy.done();
+                return;
+            }
+            int yindex = thisIndex.x/grain_size;
+            for(int x=0; x<grain_size; x++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(x+thisIndex.z*grain_size, thisIndex.y, yindex ).recvTrans(msg);  
+            }
+        }else if(index == 1) //y(x,z) send to z(x,y)
+        {
+            int zindex = thisIndex.y/grain_size;
+            for(int y=0; y<grain_size; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_z(thisIndex.x, y+thisIndex.z*grain_size, zindex).recvTrans(msg); 
+            }
+            PME_index = 3;
+            recv_nums = buffered_num;
+        }else if(index == 2) //Z(x,y) send to y(x,z)
+        {
+            int yindex = thisIndex.y/grain_size;
+            for(int z=0; z<grain_size; z++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(thisIndex.x, z+thisIndex.z*grain_size, yindex).recvTrans(msg); 
+            }
+        } else if(index == 3) //y(x,z) to x(y,z)
+        {
+            int xindex = thisIndex.x/grain_size;
+            for(int y=0; y<grain_size; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = 0;
+                pme_x(y+grain_size*thisIndex.z, thisIndex.y, xindex).recvTrans(msg); 
+            }
+            PME_index = 1;
+            recv_nums = buffered_num;
+        }
+        recv_nums = 0;
+    }
+    delete msg_recv;
+  }
+};
+/*array [1D]*/
+class PMEPencil_Y : public CBase_PMEPencil_Y
+{
+    int PME_index;
+    int buffered_num, buffered_phrase;
+    int recv_nums, iteration;
+public:
+  PMEPencil_Y(int i)
+  {
+      PME_index = i;
+      recv_nums = 0;
+      iteration = 0;
+      buffered_num = 0;
+  }
+  PMEPencil_Y(CkMigrateMessage *m) {}
+
+  void start()
+  {
+   //thisindex.x thisindex.y
+    // x (yz)(x), y(x, z)(y)
+    int yindex = thisIndex.x/grain_size;
+    for(int x=0; x<grain_size; x++)
+    {
+      DataMsg *msg= new DataMsg;
+      msg->phrase = 1;
+      pme_y(x+thisIndex.z*grain_size, thisIndex.y, yindex ).recvTrans(msg);  
+    }
+  }
+  void recvTrans(DataMsg *msg_recv)
+  {
+    int expect_num, index;
+    expect_num = grid_x;
+    index = msg_recv->phrase;
+
+    if(msg_recv->phrase != PME_index)
+    {
+        buffered_num++;
+        buffered_phrase = msg_recv->phrase;
+        delete msg_recv;
+        return;
+    }
+    recv_nums++;
+    if(recv_nums == expect_num)
+    {
+        //CkPrintf("[%d, %d] phrase %d, iter=%d\n", thisIndex.x, thisIndex.y, msg_recv->phrase, iteration);
+        if(index == 0  ) //x (y,z) to y(x,z)
+        {
+            iteration++;
+            if(iteration == max_iter)
+            {
+                mainProxy.done();
+                return;
+            }
+            int yindex = thisIndex.x/grain_size;
+            for(int x=0; x<grain_size; x++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(x+thisIndex.z*grain_size, thisIndex.y, yindex ).recvTrans(msg);  
+            }
+        }else if(index == 1) //y(x,z) send to z(x,y)
+        {
+            int zindex = thisIndex.y/grain_size;
+            for(int y=0; y<grain_size; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_z(thisIndex.x, y+thisIndex.z*grain_size, zindex).recvTrans(msg); 
+            }
+            PME_index = 3;
+            recv_nums = buffered_num;
+        }else if(index == 2) //Z(x,y) send to y(x,z)
+        {
+            int yindex = thisIndex.y/grain_size;
+            for(int z=0; z<grain_size; z++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(thisIndex.x, z+thisIndex.z*grain_size, yindex).recvTrans(msg); 
+            }
+        } else if(index == 3) //y(x,z) to x(y,z)
+        {
+            int xindex = thisIndex.x/grain_size;
+            for(int y=0; y<grain_size; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = 0;
+                pme_x(y+grain_size*thisIndex.z, thisIndex.y, xindex).recvTrans(msg); 
+            }
+            PME_index = 1;
+            recv_nums = buffered_num;
+        }
+        recv_nums = 0;
+    }
+    delete msg_recv;
+  }
+};
+
+/*array [1D]*/
+class PMEPencil_Z : public CBase_PMEPencil_Z
+{
+    int PME_index;
+    int buffered_num, buffered_phrase;
+    int recv_nums, iteration;
+public:
+  PMEPencil_Z(int i)
+  {
+      PME_index = i;
+      recv_nums = 0;
+      iteration = 0;
+      buffered_num = 0;
+  }
+  PMEPencil_Z(CkMigrateMessage *m) {}
+
+  void start()
+  {
+   //thisindex.x thisindex.y
+    // x (yz)(x), y(x, z)(y)
+    int yindex = thisIndex.x/grain_size;
+    for(int x=0; x<grain_size; x++)
+    {
+      DataMsg *msg= new DataMsg;
+      msg->phrase = 1;
+      pme_y(x+thisIndex.z*grain_size, thisIndex.y, yindex ).recvTrans(msg);  
+    }
+  }
+  void recvTrans(DataMsg *msg_recv)
+  {
+    int expect_num, index;
+    expect_num = grid_x;
+    index = msg_recv->phrase;
+
+    if(msg_recv->phrase != PME_index)
+    {
+        buffered_num++;
+        buffered_phrase = msg_recv->phrase;
+        delete msg_recv;
+        return;
+    }
+    recv_nums++;
+    if(recv_nums == expect_num)
+    {
+        //CkPrintf("[%d, %d] phrase %d, iter=%d\n", thisIndex.x, thisIndex.y, msg_recv->phrase, iteration);
+        if(index == 0  ) //x (y,z) to y(x,z)
+        {
+            iteration++;
+            if(iteration == max_iter)
+            {
+                mainProxy.done();
+                return;
+            }
+            int yindex = thisIndex.x/grain_size;
+            for(int x=0; x<grain_size; x++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(x+thisIndex.z*grain_size, thisIndex.y, yindex ).recvTrans(msg);  
+            }
+        }else if(index == 1) //y(x,z) send to z(x,y)
+        {
+            int zindex = thisIndex.y/grain_size;
+            for(int y=0; y<grain_size; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_z(thisIndex.x, y+thisIndex.z*grain_size, zindex).recvTrans(msg); 
+            }
+            PME_index = 3;
+            recv_nums = buffered_num;
+        }else if(index == 2) //Z(x,y) send to y(x,z)
+        {
+            int yindex = thisIndex.y/grain_size;
+            for(int z=0; z<grain_size; z++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = msg_recv->phrase+1;
+                pme_y(thisIndex.x, z+thisIndex.z*grain_size, yindex).recvTrans(msg); 
+            }
+        } else if(index == 3) //y(x,z) to x(y,z)
+        {
+            int xindex = thisIndex.x/grain_size;
+            for(int y=0; y<grain_size; y++)
+            {
+                DataMsg *msg= new DataMsg;
+                msg->phrase = 0;
+                pme_x(y+grain_size*thisIndex.z, thisIndex.y, xindex).recvTrans(msg); 
+            }
+            PME_index = 1;
+            recv_nums = buffered_num;
+        }
+        recv_nums = 0;
+    }
+    delete msg_recv;
+  }
+};
+
+
+#include "PMEMimic.def.h"
diff --git a/examples/charm++/PMEMimic/PMEMimic_Parallel/PMEMimic.ci b/examples/charm++/PMEMimic/PMEMimic_Parallel/PMEMimic.ci
new file mode 100644 (file)
index 0000000..9aeb585
--- /dev/null
@@ -0,0 +1,43 @@
+mainmodule PMEMimic {
+  readonly CProxy_Main mainProxy;
+
+  readonly int     N;
+  readonly int     grid_x;
+  readonly int     grid_y;
+  readonly int     grid_z;
+  readonly int     pes_per_node;
+  readonly int     grain_size;
+  readonly int     max_iter;
+  readonly CProxy_PMEPencil_X pme_x;
+  readonly CProxy_PMEPencil_Y pme_y;
+  readonly CProxy_PMEPencil_Z pme_z;
+
+  group PMEMap : CkArrayMap {
+      entry PMEMap(int); 
+  }
+
+  mainchare Main {
+    entry Main(CkArgMsg *m);
+    entry void done();
+  };
+
+  message DataMsg;
+
+  array [3D] PMEPencil_X {
+    entry PMEPencil_X(int);
+    entry void start();
+    entry void recvTrans( DataMsg *m);
+  };        
+  array [3D] PMEPencil_Y {
+    entry PMEPencil_Y(int);
+    entry void start();
+    entry void recvTrans( DataMsg *m);
+  }; 
+array [3D] PMEPencil_Z {
+    entry PMEPencil_Z(int);
+    entry void start();
+    entry void recvTrans( DataMsg *m);
+  };  
+
+};