Initial checkin.
authorDavid Kunzman <kunzman2@illinois.edu>
Mon, 23 Feb 2009 18:22:49 +0000 (18:22 +0000)
committerDavid Kunzman <kunzman2@illinois.edu>
Mon, 23 Feb 2009 18:22:49 +0000 (18:22 +0000)
14 files changed:
examples/charm++/cell/md/Makefile [new file with mode: 0644]
examples/charm++/cell/md/main.C [new file with mode: 0644]
examples/charm++/cell/md/main.ci [new file with mode: 0644]
examples/charm++/cell/md/main.h [new file with mode: 0644]
examples/charm++/cell/md/md_config.h [new file with mode: 0644]
examples/charm++/cell/md/pairCompute.C [new file with mode: 0644]
examples/charm++/cell/md/pairCompute.ci [new file with mode: 0644]
examples/charm++/cell/md/pairCompute.h [new file with mode: 0644]
examples/charm++/cell/md/patch.C [new file with mode: 0644]
examples/charm++/cell/md/patch.ci [new file with mode: 0644]
examples/charm++/cell/md/patch.h [new file with mode: 0644]
examples/charm++/cell/md/selfCompute.C [new file with mode: 0644]
examples/charm++/cell/md/selfCompute.ci [new file with mode: 0644]
examples/charm++/cell/md/selfCompute.h [new file with mode: 0644]

diff --git a/examples/charm++/cell/md/Makefile b/examples/charm++/cell/md/Makefile
new file mode 100644 (file)
index 0000000..a765a31
--- /dev/null
@@ -0,0 +1,49 @@
+CHARM_BASE_DIR = ../../../..
+CHARM_BIN_DIR = $(CHARM_BASE_DIR)/bin
+CHARMC = $(CHARM_BIN_DIR)/charmc $(OPTS)
+
+OBJS = main.o patch.o selfCompute.o pairCompute.o
+PGM = md
+
+
+default: all
+all: $(PGM)
+
+
+md: $(OBJS)
+       $(CHARMC) -language charm++ -o $(PGM) $(OBJS) -spu-lib m
+
+md_proj: $(OBJS)
+       $(CHARMC) -language charm++ -o $(PGM) $(OBJS) -tracemode summary -tracemode projections
+
+
+main.decl.h main.def.h: main.ci
+       $(CHARMC) main.ci
+
+main.o: main.h main.C main.decl.h patch.decl.h selfCompute.decl.h pairCompute.decl.h main.def.h
+       $(CHARMC) -c main.C
+
+
+patch.decl.h patch.def.h: patch.ci
+       $(CHARMC) patch.ci
+
+patch.o: patch.h main.h patch.C patch.decl.h main.decl.h patch.def.h
+       $(CHARMC) -c patch.C
+
+
+selfCompute.decl.h selfCompute.def.h: selfCompute.ci
+       $(CHARMC) selfCompute.ci
+
+selfCompute.o: selfCompute.h main.h selfCompute.C selfCompute.decl.h patch.decl.h selfCompute.def.h
+       $(CHARMC) -c selfCompute.C
+
+
+pairCompute.decl.h pairCompute.def.h: pairCompute.ci
+       $(CHARMC) pairCompute.ci
+
+pairCompute.o: pairCompute.h main.h pairCompute.C pairCompute.decl.h patch.decl.h pairCompute.def.h
+       $(CHARMC) -c pairCompute.C
+
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o $(PGM) charmrun *genSPECode*
diff --git a/examples/charm++/cell/md/main.C b/examples/charm++/cell/md/main.C
new file mode 100644 (file)
index 0000000..6891be3
--- /dev/null
@@ -0,0 +1,154 @@
+#include <stdlib.h>
+#include <time.h>
+
+#include "main.h"
+#include "md_config.h"
+
+
+// Read-Onlys
+CProxy_Main mainProxy;
+CProxy_Patch patchArrayProxy;
+CProxy_SelfCompute selfComputeArrayProxy;
+CProxy_PairCompute pairComputeArrayProxy;
+int numPatchesX;
+int numPatchesY;
+int numPatchesZ;
+
+
+void Main::parseCommandLine(int argc, char** argv) {
+
+  // Verify the parameters
+  if (argc <= 0) return;
+  if (argv == NULL) return;
+
+  // Set default parameters for the application
+  numParticlesPerPatch = DEFAULT_NUM_PARTICLES_PER_PATCH;
+  numPatchesX = DEFAULT_NUM_PATCHES_X;
+  numPatchesY = DEFAULT_NUM_PATCHES_Y;
+  numPatchesZ = DEFAULT_NUM_PATCHES_Z;
+  numStepsRemaining = DEFAULT_NUM_STEPS;
+
+  // Parse the command line
+  if (argc > 1) { numParticlesPerPatch = atoi(argv[1]); }
+  if (argc > 4) {
+    numPatchesX = atoi(argv[2]);
+    numPatchesY = atoi(argv[3]);
+    numPatchesZ = atoi(argv[4]);
+  }
+  if (argc > 5) { numStepsRemaining = atoi(argv[5]); }
+
+  // Check application parameters
+  if (numParticlesPerPatch <= 0) { CkAbort("numParticlesPerPatch must be greater than zero"); }
+  if ((numParticlesPerPatch % 4) != 0) { CkAbort("numParticlesPerPatch must be a multiple of 4"); }
+  if (numPatchesX <= 0) { CkAbort("Invalid number of patches in X dimension"); }
+  if (numPatchesY <= 0) { CkAbort("Invalid number of patches in Y dimension"); }
+  if (numPatchesZ <= 0) { CkAbort("Invalid number of patches in Z dimension"); }
+  if (numStepsRemaining <= 0) { CkAbort("Invalid number of steps"); }
+}
+
+
+Main::Main(CkArgMsg* msg) {
+
+  // Parse the command line (sets application parameters)
+  parseCommandLine(msg->argc, msg->argv);
+  delete msg;
+
+  // Display a header that displays info about the run
+  CkPrintf("MD Simulation\n"
+           "  Patch Grid: x:%d by y:%d by z:%d\n"
+           "  NumParticlesPerPatch: %d\n"
+           "  Simulation Steps: %d\n",
+           numPatchesX, numPatchesY, numPatchesZ,
+           numParticlesPerPatch,
+           numStepsRemaining
+         );
+
+  // Spread a proxy to this main chare object to all processors via a read-only
+  mainProxy = thisProxy;
+
+  // Create the patch array
+  patchArrayProxy = CProxy_Patch::ckNew(numParticlesPerPatch, numPatchesX, numPatchesY, numPatchesZ);
+
+  // Create the self compute array
+  selfComputeArrayProxy = CProxy_SelfCompute::ckNew(numParticlesPerPatch, numPatchesX, numPatchesY, numPatchesZ);
+
+  // Create the pair compute array
+  pairComputeArrayProxy = CProxy_PairCompute::ckNew();
+  const int numPatches = numPatchesX * numPatchesY * numPatchesZ;
+  for (int p0 = 0; p0 < numPatches; p0++) {
+    for (int p1 = p0 + 1; p1 < numPatches; p1++) {
+      pairComputeArrayProxy(p0, p1).insert(numParticlesPerPatch);
+    }
+  }
+  pairComputeArrayProxy.doneInserting();
+}
+
+
+Main::~Main() {
+}
+
+
+void Main::proxyCheckIn() {
+
+  static int numCheckedIn = 0;
+  const int numPatches = numPatchesX * numPatchesY * numPatchesZ;
+  const int numToCheckIn = (2 * numPatches) + ((numPatches * (numPatches - 1)) / 2);
+
+  // Count this caller and check to see if everyone has called
+  numCheckedIn++;
+  if (numCheckedIn >= numToCheckIn) {
+    numCheckedIn = 0;
+
+    // Start timing
+    simStartTime = CkWallTimer();
+
+    // One step for main (patches do many steps)
+    const int numStepsToDo = (numStepsRemaining > STEPS_PER_PRINT) ? (STEPS_PER_PRINT) : (numStepsRemaining);
+    patchArrayProxy.startIterations(numStepsToDo);
+    CkPrintf("Main::patchCheckIn() - Starting Simulation (%d steps remaining)...\n", numStepsRemaining);
+    numStepsRemaining -= numStepsToDo - 1;
+  }
+}
+
+
+void Main::patchCheckIn() {
+
+  static int numCheckedIn = 0;
+  const int numToCheckIn = (numPatchesX * numPatchesY * numPatchesZ);
+
+  // Count this caller and check to see if everyone has called
+  numCheckedIn++;
+  if (numCheckedIn >= numToCheckIn) {
+    numCheckedIn = 0;
+
+    // Check to see if there is another step, if so start it, otherwise exit
+    numStepsRemaining--;
+    CkPrintf("Main::patchCheckIn() - Simulation (%d steps remaining)...\n", numStepsRemaining);
+    if (numStepsRemaining <= 0) {
+
+      // Stop timing and display elapsed time
+      double simStopTime = CkWallTimer();
+      CkPrintf("Elapsed Time: %lf sec\n", simStopTime - simStartTime);
+
+      // The simulation has completed, so exit
+      CkExit();
+
+    } else {
+
+      // Begin another set of iterations
+      const int numStepsToDo = (numStepsRemaining > STEPS_PER_PRINT) ? (STEPS_PER_PRINT) : (numStepsRemaining);
+      patchArrayProxy.startIterations(numStepsToDo);
+      numStepsRemaining -= numStepsToDo - 1;
+    }
+  }
+}
+
+
+void seedRand() {
+
+  // Initialize the seed for this processor to a function of the time and the PE index
+  srand(time(NULL) + (754027 + CkMyPe()));
+}
+
+
+#include "main.def.h"
diff --git a/examples/charm++/cell/md/main.ci b/examples/charm++/cell/md/main.ci
new file mode 100644 (file)
index 0000000..e839fdd
--- /dev/null
@@ -0,0 +1,23 @@
+mainmodule main {
+
+  extern module patch;
+  extern module selfCompute;
+  extern module pairCompute;
+
+  readonly CProxy_Main mainProxy;
+  readonly CProxy_Patch patchArrayProxy;
+  readonly CProxy_SelfCompute selfComputeArrayProxy;
+  readonly CProxy_PairCompute pairComputeArrayProxy;
+  readonly int numPatchesX;
+  readonly int numPatchesY;
+  readonly int numPatchesZ;
+
+  mainchare Main {
+    entry Main(CkArgMsg *msg);
+    entry void proxyCheckIn();
+    entry void patchCheckIn();
+  };
+
+  initproc void seedRand();
+
+};
diff --git a/examples/charm++/cell/md/main.h b/examples/charm++/cell/md/main.h
new file mode 100644 (file)
index 0000000..0604545
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef __MAIN_H__
+#define __MAIN_H__
+
+
+#include "main.decl.h"
+#include "md_config.h"
+
+
+// DMK - DEBUG
+#if ((!(defined(CMK_CELL))) || (CMK_CELL == 0))
+  void* malloc_aligned(int size, int align) { return malloc(size); }
+  void free_aligned(void* ptr) { free(ptr); }
+#endif
+
+
+// Read-Onlys
+extern CProxy_Main mainProxy;
+extern CProxy_Patch patchArrayProxy;
+extern CProxy_SelfCompute selfComputeArrayProxy;
+extern CProxy_PairCompute pairComputeArrayProxy;
+extern int numPatchesX;
+extern int numPatchesY;
+extern int numPatchesZ;
+
+
+class Main : public CBase_Main {
+
+  private:
+
+    /// Application Parameters ///
+    int numParticlesPerPatch;
+
+    /// Member Variables ///
+    int numStepsRemaining;
+    double simStartTime;
+
+    /// Member Functions ///
+    void parseCommandLine(int argc, char** argv);
+
+  public:
+
+    /// Constructor(s) \ Destructor ///
+    Main(CkArgMsg* msg);
+    ~Main();
+
+    /// Entry Methods ///
+    void proxyCheckIn();
+    void patchCheckIn();
+
+};
+
+
+#endif //__MAIN_H__
diff --git a/examples/charm++/cell/md/md_config.h b/examples/charm++/cell/md/md_config.h
new file mode 100644 (file)
index 0000000..580e53d
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef __MD_CONFIG_H__
+#define __MD_CONFIG_H__
+
+
+#define DEFAULT_NUM_PARTICLES_PER_PATCH  (128)
+
+#define DEFAULT_NUM_PATCHES_X              (2)
+#define DEFAULT_NUM_PATCHES_Y              (2)
+#define DEFAULT_NUM_PATCHES_Z              (2)
+
+#define DEFAULT_NUM_STEPS                  (2)
+#define STEPS_PER_PRINT                    (4)
+
+#define PATCH_XYZ_TO_I(x,y,z)  (((z)*numPatchesX*numPatchesY)+((y)*numPatchesX)+(x))
+#define PATCH_I_TO_X(i)        ((i)%numPatchesX)
+#define PATCH_I_TO_Y(i)        (((i)/numPatchesX)%numPatchesY)
+#define PATCH_I_TO_Z(i)        ((i)/(numPatchesX*numPatchesY))
+
+
+#define TIME_PER_STEP       (1.0e-15f)           // Unit: s
+#define SIM_BOX_SIDE_LEN    (1.0e-7f)            // Unit: m (NOTE: 1 nm = 10A)
+#define COULOMBS_CONSTANT   (8.987551787e-9f)    // Unit: N*(m^2)*(C^-2)
+#define ELECTRON_CHARGE     (-1.602176487e-19f)  // Unit: C
+#define ELECTRON_MASS       (9.109382154e-31f)   // Unit: kg
+
+
+#endif //__MD_CONFIG_H__
diff --git a/examples/charm++/cell/md/pairCompute.C b/examples/charm++/cell/md/pairCompute.C
new file mode 100644 (file)
index 0000000..2835cb3
--- /dev/null
@@ -0,0 +1,89 @@
+#include "pairCompute.h"
+#include "main.h"
+
+
+PairCompute::PairCompute(int numParticlesPerPatch) {
+
+  numParticles = numParticlesPerPatch;
+  particleX[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleX[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleY[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleY[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleZ[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleZ[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleQ[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleQ[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceX[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceX[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceY[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceY[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceZ[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceZ[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  patchDataCount = 0;
+
+  // Check in with the main chare so it "knows" this object is ready for the simulation to start
+  mainProxy.proxyCheckIn();
+}
+
+
+PairCompute::PairCompute(CkMigrateMessage* msg) {
+  CkAbort("PairCompute::PairCompute(CkMigrateMessage* msg) not implemented yet");
+}
+
+
+PairCompute::~PairCompute() {
+  if (particleX[0] != NULL) { free_aligned(particleX[0]); particleX[0] = NULL; }
+  if (particleX[1] != NULL) { free_aligned(particleX[1]); particleX[1] = NULL; }
+  if (particleY[0] != NULL) { free_aligned(particleY[0]); particleY[0] = NULL; }
+  if (particleY[1] != NULL) { free_aligned(particleY[1]); particleY[1] = NULL; }
+  if (particleZ[0] != NULL) { free_aligned(particleZ[0]); particleZ[0] = NULL; }
+  if (particleZ[1] != NULL) { free_aligned(particleZ[1]); particleZ[1] = NULL; }
+  if (particleQ[0] != NULL) { free_aligned(particleQ[0]); particleQ[0] = NULL; }
+  if (particleQ[1] != NULL) { free_aligned(particleQ[1]); particleQ[1] = NULL; }
+  if (forceX[0] != NULL) { free_aligned(forceX[0]); forceX[0] = NULL; }
+  if (forceX[1] != NULL) { free_aligned(forceX[1]); forceX[1] = NULL; }
+  if (forceY[0] != NULL) { free_aligned(forceY[0]); forceY[0] = NULL; }
+  if (forceY[1] != NULL) { free_aligned(forceY[1]); forceY[1] = NULL; }
+  if (forceZ[0] != NULL) { free_aligned(forceZ[0]); forceZ[0] = NULL; }
+  if (forceZ[1] != NULL) { free_aligned(forceZ[1]); forceZ[1] = NULL; }
+  numParticles = 0;
+}
+
+
+void PairCompute::patchData(int numParticles, float* particleX, float* particleY, float* particleZ, float* particleQ, int fromPatch) {
+
+  // Copy the data from the parameters
+  memcpy(this->particleX[fromPatch], particleX, numParticles * sizeof(float));
+  memcpy(this->particleY[fromPatch], particleY, numParticles * sizeof(float));
+  memcpy(this->particleZ[fromPatch], particleZ, numParticles * sizeof(float));
+  memcpy(this->particleQ[fromPatch], particleQ, numParticles * sizeof(float));
+
+  // Increment the patch count and initiate the calculation of both patches have
+  //   sent their data to this compute
+  patchDataCount++;
+  if (patchDataCount >= 2) {
+    thisProxy(thisIndex.x, thisIndex.y).doCalc();  // Send message to self to do calculation
+    patchDataCount = 0;
+  }
+}
+
+
+void PairCompute::doCalc_callback() {
+
+  // Calculate the index of patch 0 and send force data back to it
+  int p0Index = thisIndex.x;
+  int p0IndexX = PATCH_I_TO_X(p0Index);
+  int p0IndexY = PATCH_I_TO_Y(p0Index);
+  int p0IndexZ = PATCH_I_TO_Z(p0Index);
+  patchArrayProxy(p0IndexX, p0IndexY, p0IndexZ).forceCheckIn(numParticles, forceX[0], forceY[0], forceZ[0]);
+
+  // Calculate the index of patch 1 and send force data back to it
+  int p1Index = thisIndex.y;
+  int p1IndexX = PATCH_I_TO_X(p1Index);
+  int p1IndexY = PATCH_I_TO_Y(p1Index);
+  int p1IndexZ = PATCH_I_TO_Z(p1Index);
+  patchArrayProxy(p1IndexX, p1IndexY, p1IndexZ).forceCheckIn(numParticles, forceX[1], forceY[1], forceZ[1]);
+}
+
+
+#include "pairCompute.def.h"
diff --git a/examples/charm++/cell/md/pairCompute.ci b/examples/charm++/cell/md/pairCompute.ci
new file mode 100644 (file)
index 0000000..7361104
--- /dev/null
@@ -0,0 +1,128 @@
+module pairCompute {
+
+  // Include md_config.h on the accelerator (for physics constants)
+  accelblock { #include "md_config.h" };
+
+  array[2D] PairCompute {
+
+    entry PairCompute(int numParticlesPerPatch);
+
+    entry void patchData(int numParticles,
+                         float particleX[numParticles],
+                         float particleY[numParticles],
+                         float particleZ[numParticles],
+                         float particleQ[numParticles],
+                         int fromPatch
+                        );
+
+    entry [accel] void doCalc()[  readonly : int numParticles <impl_obj->numParticles>,
+                                  readonly : int thisIndex_x <impl_obj->thisIndex.x>,
+                                  readonly : int thisIndex_y <impl_obj->thisIndex.y>,
+                                  readonly : float p0_x[numParticles] <impl_obj->particleX[0]>,
+                                  readonly : float p1_x[numParticles] <impl_obj->particleX[1]>,
+                                  readonly : float p0_y[numParticles] <impl_obj->particleY[0]>,
+                                  readonly : float p1_y[numParticles] <impl_obj->particleY[1]>,
+                                  readonly : float p0_z[numParticles] <impl_obj->particleZ[0]>,
+                                  readonly : float p1_z[numParticles] <impl_obj->particleZ[1]>,
+                                  readonly : float p0_q[numParticles] <impl_obj->particleQ[0]>,
+                                  readonly : float p1_q[numParticles] <impl_obj->particleQ[1]>,
+                                 writeonly : float f0_x[numParticles] <impl_obj->forceX[0]>,
+                                 writeonly : float f1_x[numParticles] <impl_obj->forceX[1]>,
+                                 writeonly : float f0_y[numParticles] <impl_obj->forceY[0]>,
+                                 writeonly : float f1_y[numParticles] <impl_obj->forceY[1]>,
+                                 writeonly : float f0_z[numParticles] <impl_obj->forceZ[0]>,
+                                 writeonly : float f1_z[numParticles] <impl_obj->forceZ[1]>
+                               ] {
+
+      // Calculate the electrostatic force (coulumb's law) between the particles
+      //   F = (k_e * (q_0 * q_1)) / (r^2)
+
+      register vec4f* p1_x_vec = (vec4f*)p1_x;
+      register vec4f* p1_y_vec = (vec4f*)p1_y;
+      register vec4f* p1_z_vec = (vec4f*)p1_z;
+      register vec4f* p1_q_vec = (vec4f*)p1_q;
+      register vec4f* f1_x_vec = (vec4f*)f1_x;
+      register vec4f* f1_y_vec = (vec4f*)f1_y;
+      register vec4f* f1_z_vec = (vec4f*)f1_z;
+      register int i;
+      register int j;
+      register const int numParticlesByVecSize = numParticles / (sizeof(vec4f) / sizeof(float));
+
+      // Zero out the force array for p1 (p0's force array will be zero'ed for each
+      //   particle in the outer loop)
+      for (j = 0; j < numParticlesByVecSize; j++) {
+        f1_x_vec[j] = vspread4f(0.0f);
+        f1_y_vec[j] = vspread4f(0.0f);
+        f1_z_vec[j] = vspread4f(0.0f);
+      }
+
+      // Spread coulumb's constant across a vector
+      vec4f coulomb_vec = vspread4f(COULOMBS_CONSTANT);
+
+      // Outer-loop (p0)
+      for (i = 0; i < numParticles; i++) {
+
+        // Spread this particle's values out over vectors
+        vec4f p0_x_i_vec = vspread4f(p0_x[i]);
+        vec4f p0_y_i_vec = vspread4f(p0_y[i]);
+        vec4f p0_z_i_vec = vspread4f(p0_z[i]);
+        vec4f p0_q_i_vec = vspread4f(p0_q[i]);
+        vec4f f0_x_i_vec = vspread4f(0.0f);
+        vec4f f0_y_i_vec = vspread4f(0.0f);
+        vec4f f0_z_i_vec = vspread4f(0.0f);
+
+        // Inner-loop (p1)
+        for (j = 0; j < numParticlesByVecSize; j++) {
+
+          // Load the particles' data
+          vec4f p1_x_j_vec = p1_x_vec[j];
+          vec4f p1_y_j_vec = p1_y_vec[j];
+          vec4f p1_z_j_vec = p1_z_vec[j];
+          vec4f p1_q_j_vec = p1_q_vec[j];
+
+          // Calculate the vector between the particles
+          vec4f p_x_diff_vec = p0_x_i_vec - p1_x_j_vec;
+          vec4f p_y_diff_vec = p0_y_i_vec - p1_y_j_vec;
+          vec4f p_z_diff_vec = p0_z_i_vec - p1_z_j_vec;
+
+          // Calculate r and r^2 between the particles
+          vec4f p_x_diff_2_vec = p_x_diff_vec * p_x_diff_vec;
+          vec4f p_y_diff_2_vec = p_y_diff_vec * p_y_diff_vec;
+          vec4f p_z_diff_2_vec = p_z_diff_vec * p_z_diff_vec;
+          vec4f r_2_vec = p_x_diff_2_vec + p_y_diff_2_vec + p_z_diff_2_vec;
+          vec4f r_vec = vsqrt4f(r_2_vec);
+
+          // Normalize the vector between the particles
+          vec4f p_x_diff_norm_vec = p_x_diff_vec / r_vec;
+          vec4f p_y_diff_norm_vec = p_y_diff_vec / r_vec;
+          vec4f p_z_diff_norm_vec = p_z_diff_vec / r_vec;
+
+          // Calculate the magnitude of the electrostatic force between the particles
+          vec4f f_mag_vec = coulomb_vec * ((p0_q_i_vec * p1_q_j_vec) / r_2_vec);
+
+          // Multiply the magnitude by the normalized postition difference vector to
+          //   create the force vector
+          vec4f f_x_vec = p_x_diff_norm_vec * f_mag_vec;
+          vec4f f_y_vec = p_y_diff_norm_vec * f_mag_vec;
+          vec4f f_z_vec = p_z_diff_norm_vec * f_mag_vec;
+
+          // Add the force to the outer-loop particle and subtract it from the inner-loop particles
+          f0_x_i_vec += f_x_vec;
+          f0_y_i_vec += f_y_vec;
+          f0_z_i_vec += f_z_vec;
+          f1_x_vec[j] -= f_x_vec;
+          f1_y_vec[j] -= f_y_vec;
+          f1_z_vec[j] -= f_z_vec;
+        }
+
+        // Write force values for the outer-loop particle back to memory
+        f0_x[i] += vextract4f(f0_x_i_vec, 0) + vextract4f(f0_x_i_vec, 1) + vextract4f(f0_x_i_vec, 2) + vextract4f(f0_x_i_vec, 3);
+        f0_y[i] += vextract4f(f0_y_i_vec, 0) + vextract4f(f0_y_i_vec, 1) + vextract4f(f0_y_i_vec, 2) + vextract4f(f0_y_i_vec, 3);
+        f0_z[i] += vextract4f(f0_z_i_vec, 0) + vextract4f(f0_z_i_vec, 1) + vextract4f(f0_z_i_vec, 2) + vextract4f(f0_z_i_vec, 3);
+      }
+
+    } doCalc_callback;
+
+  };
+
+};
\ No newline at end of file
diff --git a/examples/charm++/cell/md/pairCompute.h b/examples/charm++/cell/md/pairCompute.h
new file mode 100644 (file)
index 0000000..c378e6b
--- /dev/null
@@ -0,0 +1,44 @@
+#ifndef __PAIR_COMPUTE_H__
+#define __PAIR_COMPUTE_H__
+
+
+#include "pairCompute.decl.h"
+
+
+class PairCompute : public CBase_PairCompute {
+
+  // Declare CkIndex_PairCompute as a friend so accelerated entry methods can access
+  //   the member variables of the object they execute on
+  friend class CkIndex_PairCompute;
+
+  private:
+
+    int numParticles;
+    int patchDataCount;
+
+    /// Particle Buffers ///
+    float* particleX[2];
+    float* particleY[2];
+    float* particleZ[2];
+    float* particleQ[2];
+
+    /// Force Buffers ///
+    float* forceX[2];
+    float* forceY[2];
+    float* forceZ[2];
+
+  public:
+
+    /// Constructor(s) \ Destructor ///
+    PairCompute(int numParticlesPerPatch);
+    PairCompute(CkMigrateMessage *msg);
+    ~PairCompute();
+
+    /// Entry Methods ///
+    void patchData(int numParticles, float* particleX, float* particleY, float* particleZ, float* particleQ, int fromPatch);
+    void doCalc_callback();
+
+};
+
+
+#endif //__PAIR_COMPUTE_H__
diff --git a/examples/charm++/cell/md/patch.C b/examples/charm++/cell/md/patch.C
new file mode 100644 (file)
index 0000000..5286689
--- /dev/null
@@ -0,0 +1,134 @@
+#include "patch.h"
+#include "main.h"
+
+
+void Patch::randomizeParticles() {
+
+  // Fill in a box with electrons that initially have no velocity
+  for (int i = 0; i < numParticles; i++) {
+    particleX[i] = (randf() * SIM_BOX_SIDE_LEN) - (SIM_BOX_SIDE_LEN / 2.0f);
+    particleY[i] = (randf() * SIM_BOX_SIDE_LEN) - (SIM_BOX_SIDE_LEN / 2.0f);
+    particleZ[i] = (randf() * SIM_BOX_SIDE_LEN) - (SIM_BOX_SIDE_LEN / 2.0f);
+    particleQ[i] = ELECTRON_CHARGE;
+    particleM[i] = ELECTRON_MASS;
+    velocityX[i] = 0.0f;
+    velocityY[i] = 0.0f;
+    velocityZ[i] = 0.0f;
+  }
+}
+
+
+float Patch::randf() {
+  return (((float)(rand() % 1483727)) / (1483727.0f));
+}
+
+
+Patch::Patch(int numParticles) {
+
+  // Allocate memory for the particles
+  this->numParticles = numParticles;
+  particleX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleQ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleM = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceSumX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceSumY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceSumZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  velocityX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  velocityY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  velocityZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+
+  // Initialize the particles
+  randomizeParticles();
+
+  // Check in with the main proxy so it "knows" that this patch is ready to start simulation
+  mainProxy.proxyCheckIn();
+}
+
+
+Patch::Patch(CkMigrateMessage* msg) {
+  CkAbort("Patch::Patch(CkMigrateMessage* msg) not implemented yet");
+}
+
+
+Patch::~Patch() {
+  if (particleX != NULL) { free_aligned(particleX); particleX = NULL; }
+  if (particleY != NULL) { free_aligned(particleY); particleY = NULL; }
+  if (particleZ != NULL) { free_aligned(particleZ); particleZ = NULL; }
+  if (particleQ != NULL) { free_aligned(particleQ); particleQ = NULL; }
+  if (particleM != NULL) { free_aligned(particleM); particleM = NULL; }
+  if (forceSumX != NULL) { free_aligned(forceSumX); forceSumX = NULL; }
+  if (forceSumY != NULL) { free_aligned(forceSumY); forceSumY = NULL; }
+  if (forceSumZ != NULL) { free_aligned(forceSumZ); forceSumZ = NULL; }
+  if (velocityX != NULL) { free_aligned(velocityX); velocityX = NULL; }
+  if (velocityY != NULL) { free_aligned(velocityY); velocityY = NULL; }
+  if (velocityZ != NULL) { free_aligned(velocityZ); velocityZ = NULL; }
+  numParticles = 0;
+}
+
+
+void Patch::startIteration() { startIteration_common(1); }
+void Patch::startIterations(int numIters) { startIteration_common(numIters); }
+void Patch::startIteration_common(int numIters) {
+
+  // Set the number of remaining time stops
+  CkAssert(numIters > 0);
+  remainingIterations = numIters;
+
+  // Reset the number of expected computes that will check in
+  //   NOTE: pair compute for every other patch, self compute for this patch
+  remainingForceCheckIns = numPatchesX * numPatchesY * numPatchesZ;
+
+  // Clear the force sum arrays
+  register vec4f* fsx = (vec4f*)forceSumX;
+  register vec4f* fsy = (vec4f*)forceSumY;
+  register vec4f* fsz = (vec4f*)forceSumZ;
+  const vec4f zero_vec = vspread4f(0.0f);
+  register const int numParticles_vec = numParticles / (sizeof(vec4f) * sizeof(float));
+  for (int i = 0; i < numParticles_vec; i++) {
+    fsx[i] = zero_vec;
+    fsy[i] = zero_vec;
+    fsz[i] = zero_vec;
+  }
+
+  // Send particle data to self computes
+  selfComputeArrayProxy(thisIndex.x, thisIndex.y, thisIndex.z).doCalc(numParticles, particleX, particleY, particleZ, particleQ);
+
+  // Send particle data to pair computes
+  const int index = PATCH_XYZ_TO_I(thisIndex.x, thisIndex.y, thisIndex.z);
+  for (int i = 0; i < index; i++) {
+    pairComputeArrayProxy(i, index).patchData(numParticles, particleX, particleY, particleZ, particleQ, 1);
+  }
+  const int numPatches = numPatchesX * numPatchesY * numPatchesZ;
+  for (int i = index + 1; i < numPatches; i++) {
+    pairComputeArrayProxy(index, i).patchData(numParticles, particleX, particleY, particleZ, particleQ, 0);
+  }
+}
+
+
+void Patch::forceCheckIn_callback() {
+
+  // Decrement the counter containing the number of remaining computes that need to report forces
+  //   back to this patch.  Once all computes have checked in, send a message to accelerated
+  //   'integrate' entry method.
+  remainingForceCheckIns--;
+  if (remainingForceCheckIns <= 0) {
+    thisProxy(thisIndex.x, thisIndex.y, thisIndex.z).integrate();
+  }
+}
+
+void Patch::integrate_callback() {
+
+  // Decrement the counter containing the number of remaining iterations.  If there are
+  //   more iterations, do another one, otherwise, check in with main
+  remainingIterations--;
+  if (remainingIterations > 0) {
+    startIteration_common(remainingIterations);
+  } else {
+    mainProxy.patchCheckIn();
+  }
+}
+
+
+#include "patch.def.h"
diff --git a/examples/charm++/cell/md/patch.ci b/examples/charm++/cell/md/patch.ci
new file mode 100644 (file)
index 0000000..c047d6a
--- /dev/null
@@ -0,0 +1,91 @@
+module patch {
+
+  accelblock { #include "md_config.h" };
+
+  array[3D] Patch {
+
+    entry Patch(int numParticles);
+
+    entry void startIteration();
+    entry void startIterations(int numIters);
+
+    entry [accel] void forceCheckIn(int numParticles,
+                                    float forceX[numParticles],
+                                    float forceY[numParticles],
+                                    float forceZ[numParticles]
+                                   )[
+                                    readwrite : float forceSumX[numParticles] <impl_obj->forceSumX>,
+                                    readwrite : float forceSumY[numParticles] <impl_obj->forceSumY>,
+                                    readwrite : float forceSumZ[numParticles] <impl_obj->forceSumZ>
+                                   ] {
+
+      // Combine the incoming force with the accumulated forces for this patch
+      register vec4f* fsx = (vec4f*)forceSumX;
+      register vec4f* fsy = (vec4f*)forceSumY;
+      register vec4f* fsz = (vec4f*)forceSumZ;
+      register vec4f* fx = (vec4f*)forceX;
+      register vec4f* fy = (vec4f*)forceY;
+      register vec4f* fz = (vec4f*)forceZ;
+      register const int numParticles_vec = numParticles / (sizeof(vec4f) * sizeof(float));
+      register int i;
+      for (i = 0; i < numParticles_vec; i++) {
+        fsx[i] = vadd4f(fsx[i], fx[i]);
+        fsy[i] = vadd4f(fsy[i], fy[i]);
+        fsz[i] = vadd4f(fsz[i], fz[i]);
+      }
+
+    } forceCheckIn_callback;
+
+    entry [accel] void integrate()[  readonly : int numParticles <impl_obj->numParticles>,
+                                     readonly : int thisIndex_x <impl_obj->thisIndex.x>,
+                                     readonly : int thisIndex_y <impl_obj->thisIndex.y>,
+                                     readonly : int thisIndex_z <impl_obj->thisIndex.z>,
+                                    readwrite : float p_x[numParticles] <impl_obj->particleX>,
+                                    readwrite : float p_y[numParticles] <impl_obj->particleY>,
+                                    readwrite : float p_z[numParticles] <impl_obj->particleZ>,
+                                     readonly : float p_m[numParticles] <impl_obj->particleM>,
+                                     readonly : float f_x[numParticles] <impl_obj->forceSumX>,
+                                     readonly : float f_y[numParticles] <impl_obj->forceSumY>,
+                                     readonly : float f_z[numParticles] <impl_obj->forceSumZ>,
+                                    readwrite : float v_x[numParticles] <impl_obj->velocityX>,
+                                    readwrite : float v_y[numParticles] <impl_obj->velocityY>,
+                                    readwrite : float v_z[numParticles] <impl_obj->velocityZ>
+                                  ] {
+
+      register vec4f* p_x_vec = (vec4f*)p_x;
+      register vec4f* p_y_vec = (vec4f*)p_y;
+      register vec4f* p_z_vec = (vec4f*)p_z;
+      register vec4f* p_m_vec = (vec4f*)p_m;
+      register vec4f* f_x_vec = (vec4f*)f_x;
+      register vec4f* f_y_vec = (vec4f*)f_y;
+      register vec4f* f_z_vec = (vec4f*)f_z;
+      register vec4f* v_x_vec = (vec4f*)v_x;
+      register vec4f* v_y_vec = (vec4f*)v_y;
+      register vec4f* v_z_vec = (vec4f*)v_z;
+      register int i;
+      register const int numParticles_vec = numParticles / (sizeof(vec4f) / sizeof(float));
+
+      vec4f delta_time_vec = vspread4f(TIME_PER_STEP);
+
+      // Add the acceleration to the velocity and add the update velocity to the position
+      for (i = 0; i < numParticles_vec; i++) {
+
+        //// Update the velocity : v_new = v_old + (F/m)
+        vec4f new_v_x_vec = v_x_vec[i] + (delta_time_vec * (f_x_vec[i] / p_m_vec[i]));
+        vec4f new_v_y_vec = v_y_vec[i] + (delta_time_vec * (f_y_vec[i] / p_m_vec[i]));
+        vec4f new_v_z_vec = v_z_vec[i] + (delta_time_vec * (f_z_vec[i] / p_m_vec[i]));
+        v_x_vec[i] = new_v_x_vec;
+        v_y_vec[i] = new_v_y_vec;
+        v_z_vec[i] = new_v_z_vec;
+
+        //// Update the position : pos_new = pos_old + v_new
+        p_x_vec[i] += delta_time_vec * new_v_x_vec;
+        p_y_vec[i] += delta_time_vec * new_v_y_vec;
+        p_z_vec[i] += delta_time_vec * new_v_z_vec;
+      }
+
+    } integrate_callback;
+
+  };
+
+};
\ No newline at end of file
diff --git a/examples/charm++/cell/md/patch.h b/examples/charm++/cell/md/patch.h
new file mode 100644 (file)
index 0000000..35cae8e
--- /dev/null
@@ -0,0 +1,55 @@
+#ifndef __PATCH_H__
+#define __PATCH_H__
+
+
+#include "patch.decl.h"
+#include "md_config.h"
+
+
+class Patch : public CBase_Patch {
+
+  // Declare CkIndex_Patch as a friend so accelerated entry methods can access
+  //   the member variables of the object they execute on
+  friend class CkIndex_Patch;
+
+  private:
+
+    /// Member Variables ///
+    int remainingForceCheckIns;
+    int remainingIterations;
+
+    int numParticles;
+    float* particleX;  // x-coordinate
+    float* particleY;  // y-coordinate
+    float* particleZ;  // z-coordinate
+    float* particleQ;  // charge
+    float* particleM;  // mass
+    float* velocityX;  // velocity x-component
+    float* velocityY;  // velocity y-component
+    float* velocityZ;  // velocity z-component
+    float* forceSumX;  // Buffer to sum of force x components from computes
+    float* forceSumY;  // Buffer to sum of force y components from computes
+    float* forceSumZ;  // Buffer to sum of force z components from computes
+
+    /// Member Functions ///
+    void randomizeParticles();
+    float randf();
+    void startIteration_common(int numIters);
+
+  public:
+
+    /// Constructor(s) \ Destructor ///
+    Patch(int numParticles);
+    Patch(CkMigrateMessage* msg);
+    ~Patch();
+
+    /// Entry Methods ///
+    void startIteration();
+    void startIterations(int numIters);
+    void forceCheckIn_callback();
+    void integrate_callback();
+
+};
+
+
+#endif //__PATCH_H__
diff --git a/examples/charm++/cell/md/selfCompute.C b/examples/charm++/cell/md/selfCompute.C
new file mode 100644 (file)
index 0000000..e2852fc
--- /dev/null
@@ -0,0 +1,36 @@
+#include "selfCompute.h"
+#include "main.h"
+
+
+SelfCompute::SelfCompute(int numParticlesPerPatch) {
+
+  // Allocate buffers for force data
+  numParticles = numParticlesPerPatch;
+  forceX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+
+  // Check in with the main chare indicating that this object is ready for the simulation to start
+  mainProxy.proxyCheckIn();
+}
+
+
+SelfCompute::SelfCompute(CkMigrateMessage* msg) {
+  CkAbort("SelfCompute::SelfCompute(CkMigrateMessage *msg) not implemented yet");
+}
+
+
+SelfCompute::~SelfCompute() {
+  if (forceX != NULL) { free_aligned(forceX); forceX = NULL; }
+  if (forceY != NULL) { free_aligned(forceY); forceY = NULL; }
+  if (forceZ != NULL) { free_aligned(forceZ); forceZ = NULL; }
+  numParticles = 0;
+}
+
+
+void SelfCompute::doCalc_callback() {
+  patchArrayProxy(thisIndex.x, thisIndex.y, thisIndex.z).forceCheckIn(numParticles, forceX, forceY, forceZ);
+}
+
+
+#include "selfCompute.def.h"
diff --git a/examples/charm++/cell/md/selfCompute.ci b/examples/charm++/cell/md/selfCompute.ci
new file mode 100644 (file)
index 0000000..a373842
--- /dev/null
@@ -0,0 +1,141 @@
+module selfCompute {
+
+  // Include md_config.h on the accelerator (for physics constants)
+  accelblock { #include "md_config.h" };
+
+  array[3D] SelfCompute {
+
+    entry SelfCompute(int numParticlesPerPatch);
+
+    entry [accel] void doCalc(int numParticles,
+                              float particleX[numParticles],
+                              float particleY[numParticles],
+                              float particleZ[numParticles],
+                              float particleQ[numParticles]
+                             )[
+                               readonly : int thisIndex_x <impl_obj->thisIndex.x>,
+                               readonly : int thisIndex_y <impl_obj->thisIndex.y>,
+                              writeonly : float f0_x[numParticles] <impl_obj->forceX>,
+                              writeonly : float f0_y[numParticles] <impl_obj->forceY>,
+                              writeonly : float f0_z[numParticles] <impl_obj->forceZ>
+                             ] {
+
+      // Calculate the electrostatic force (coulumb's law) between the particles
+      //   F = (k_e * (q_0 * q_1)) / (r^2)
+
+      register vec4f* p0_x_vec = (vec4f*)particleX;
+      register vec4f* p0_y_vec = (vec4f*)particleY;
+      register vec4f* p0_z_vec = (vec4f*)particleZ;
+      register vec4f* p0_q_vec = (vec4f*)particleQ;
+      register vec4f* f0_x_vec = (vec4f*)f0_x;
+      register vec4f* f0_y_vec = (vec4f*)f0_y;
+      register vec4f* f0_z_vec = (vec4f*)f0_z;
+      register int i;
+      register int j;
+      register const int numParticlesByVecSize = numParticles / (sizeof(vec4f) / sizeof(float));
+
+      // Zero out the force output
+      for (j = 0; j < numParticlesByVecSize; j++) {
+        f0_x_vec[j] = vspread4f(0.0f);
+        f0_y_vec[j] = vspread4f(0.0f);
+        f0_z_vec[j] = vspread4f(0.0f);
+      }
+
+      // Spread coulumb's constant across a vector
+      vec4f coulomb_vec = vspread4f(COULOMBS_CONSTANT);
+
+      // Outer-loop
+      for (i = 0; i < numParticles; i++) {
+
+        // Interact with all particles (one-by-one) until the vector boundary
+        for (j = i + 1; ((j & 0x3) != 0x0) && (j < numParticles); j++) {
+
+          float p_x_diff = particleX[i] - particleX[j];
+          float p_y_diff = particleY[i] - particleY[j];
+          float p_z_diff = particleZ[i] - particleZ[j];
+          float r_2 = (p_x_diff * p_x_diff) + (p_y_diff * p_y_diff) + (p_z_diff * p_z_diff);
+          float r = sqrt(r_2);
+
+          float p_x_diff_norm = p_x_diff / r;
+          float p_y_diff_norm = p_y_diff / r;
+          float p_z_diff_norm = p_z_diff / r;
+
+          float p_q_prod = particleQ[i] * particleQ[j];
+          float f_mag = COULOMBS_CONSTANT * (p_q_prod / r_2);
+
+          float f_x = f_mag * p_x_diff_norm;
+          float f_y = f_mag * p_y_diff_norm;
+          float f_z = f_mag * p_z_diff_norm;
+
+          f0_x[i] += f_x;
+          f0_y[i] += f_y;
+          f0_z[i] += f_z;
+          f0_x[j] -= f_x;
+          f0_y[j] -= f_y;
+          f0_z[j] -= f_z;
+        }
+
+        // Spread this particle's (p0[i]) values out over vectors
+        vec4f p0_x_i_vec = vspread4f(particleX[i]);
+        vec4f p0_y_i_vec = vspread4f(particleY[i]);
+        vec4f p0_z_i_vec = vspread4f(particleZ[i]);
+        vec4f p0_q_i_vec = vspread4f(particleQ[i]);
+        vec4f f0_x_i_vec = vspread4f(0.0f);
+        vec4f f0_y_i_vec = vspread4f(0.0f);
+        vec4f f0_z_i_vec = vspread4f(0.0f);
+
+        // Switch to vectorized loop for the remaining elements in the particle array
+        for (j = j >> 2; j < numParticlesByVecSize; j++) {
+
+          // Load the particles' data
+          vec4f p0_x_j_vec = p0_x_vec[j];
+          vec4f p0_y_j_vec = p0_y_vec[j];
+          vec4f p0_z_j_vec = p0_z_vec[j];
+          vec4f p0_q_j_vec = p0_q_vec[j];
+
+          // Calculate the vector between the particles
+          vec4f p_x_diff_vec = p0_x_i_vec - p0_x_j_vec;
+          vec4f p_y_diff_vec = p0_y_i_vec - p0_y_j_vec;
+          vec4f p_z_diff_vec = p0_z_i_vec - p0_z_j_vec;
+
+          // Calculate r and r^2 between the particles
+          vec4f p_x_diff_2_vec = p_x_diff_vec * p_x_diff_vec;
+          vec4f p_y_diff_2_vec = p_y_diff_vec * p_y_diff_vec;
+          vec4f p_z_diff_2_vec = p_z_diff_vec * p_z_diff_vec;
+          vec4f r_2_vec = p_x_diff_2_vec + p_y_diff_2_vec + p_z_diff_2_vec;
+          vec4f r_vec = vsqrt4f(r_2_vec);
+
+          // Normalize the vector between the particles
+          vec4f p_x_diff_norm_vec = p_x_diff_vec / r_vec;
+          vec4f p_y_diff_norm_vec = p_y_diff_vec / r_vec;
+          vec4f p_z_diff_norm_vec = p_z_diff_vec / r_vec;
+
+          // Calculate the magnitude of the electrostatic force between the particles
+          vec4f f_mag_vec = coulomb_vec * ((p0_q_i_vec * p0_q_j_vec) / r_2_vec);
+
+          // Multiply the magnitude by the normalized postition difference vector to
+          //   create the force vector
+          vec4f f_x_vec = p_x_diff_norm_vec * f_mag_vec;
+          vec4f f_y_vec = p_y_diff_norm_vec * f_mag_vec;
+          vec4f f_z_vec = p_z_diff_norm_vec * f_mag_vec;
+
+          // Add the force to the outer-loop particle and subtract it from the inner-loop particles
+          f0_x_i_vec += f_x_vec;
+          f0_y_i_vec += f_y_vec;
+          f0_z_i_vec += f_z_vec;
+          f0_x_vec[j] -= f_x_vec;
+          f0_y_vec[j] -= f_y_vec;
+          f0_z_vec[j] -= f_z_vec;
+        }
+
+        // Add force values for p0[i] into f0
+        f0_x[i] += vextract4f(f0_x_i_vec, 0) + vextract4f(f0_x_i_vec, 1) + vextract4f(f0_x_i_vec, 2) + vextract4f(f0_x_i_vec, 3);
+        f0_y[i] += vextract4f(f0_y_i_vec, 0) + vextract4f(f0_y_i_vec, 1) + vextract4f(f0_y_i_vec, 2) + vextract4f(f0_y_i_vec, 3);
+        f0_z[i] += vextract4f(f0_z_i_vec, 0) + vextract4f(f0_z_i_vec, 1) + vextract4f(f0_z_i_vec, 2) + vextract4f(f0_z_i_vec, 3);
+      }
+
+    } doCalc_callback;
+
+  };
+
+};
\ No newline at end of file
diff --git a/examples/charm++/cell/md/selfCompute.h b/examples/charm++/cell/md/selfCompute.h
new file mode 100644 (file)
index 0000000..30e6983
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef __SELF_COMPUTE_H__
+#define __SELF_COMPUTE_H__
+
+
+#include "selfCompute.decl.h"
+#include "md_config.h"
+
+
+class SelfCompute : public CBase_SelfCompute {
+
+  // Declare CkIndex_SelfCompute as a friend so accelerated entry methods can access
+  //   the member variables of the object they execute on
+  friend class CkIndex_SelfCompute;
+
+  private:
+
+    /// Force Buffers ///
+    int numParticles;
+    float* forceX;
+    float* forceY;
+    float* forceZ;
+
+  public:
+
+    /// Constructor(s) \ Destructor ///
+    SelfCompute(int numParticlesPerPatch);
+    SelfCompute(CkMigrateMessage* msg);
+    ~SelfCompute();
+
+    /// Entry Methods ///
+    void doCalc_callback();
+
+};
+
+
+#endif //__SELF_COMPUTE_H__