Modified the code to use CmiMallocAligned and CmiFreeAligned.
authorDavid Kunzman <kunzman2@illinois.edu>
Tue, 24 Mar 2009 01:44:17 +0000 (01:44 +0000)
committerDavid Kunzman <kunzman2@illinois.edu>
Tue, 24 Mar 2009 01:44:17 +0000 (01:44 +0000)
examples/charm++/cell/md/main.h
examples/charm++/cell/md/pairCompute.C
examples/charm++/cell/md/patch.C
examples/charm++/cell/md/selfCompute.C

index c254799ae0dc0bfff852766f1aec1a5f408d6f73..3653da6f43c5317e73981d568c1cd9b93a210446 100644 (file)
 //   net-linux-cell builds, but not general net-linux builds.  Declare them here
 //   if this is not a net-linux-cell build (for now, in this example program, it
 //   is not important if they do not actually align on non net-linux-cell builds).
-#if ((!(defined(CMK_CELL))) || (CMK_CELL == 0))
-  inline void* malloc_aligned(int size, int align) { return malloc(size); }
-  inline void free_aligned(void* ptr) { free(ptr); }
-#endif
+//#if ((!(defined(CMK_CELL))) || (CMK_CELL == 0))
+//  inline void* malloc_aligned(int size, int align) { return malloc(size); }
+//  inline void free_aligned(void* ptr) { free(ptr); }
+//#endif
 
 
 // Read-Only Variables
index ca3a696d211acf21c5c297013edf12a44005622c..28ff50e04a73123eb646ac3d51cac0c9bf9424ab 100644 (file)
@@ -23,21 +23,21 @@ PairCompute::PairCompute(CkMigrateMessage* msg) {
 PairCompute::~PairCompute() {
 
   #if USE_PROXY_PATCHES == 0
-    if (particleX[0] != NULL) { free_aligned(particleX[0]); particleX[0] = NULL; }
-    if (particleX[1] != NULL) { free_aligned(particleX[1]); particleX[1] = NULL; }
-    if (particleY[0] != NULL) { free_aligned(particleY[0]); particleY[0] = NULL; }
-    if (particleY[1] != NULL) { free_aligned(particleY[1]); particleY[1] = NULL; }
-    if (particleZ[0] != NULL) { free_aligned(particleZ[0]); particleZ[0] = NULL; }
-    if (particleZ[1] != NULL) { free_aligned(particleZ[1]); particleZ[1] = NULL; }
-    if (particleQ[0] != NULL) { free_aligned(particleQ[0]); particleQ[0] = NULL; }
-    if (particleQ[1] != NULL) { free_aligned(particleQ[1]); particleQ[1] = NULL; }
+    if (particleX[0] != NULL) { CmiFreeAligned(particleX[0]); particleX[0] = NULL; }
+    if (particleX[1] != NULL) { CmiFreeAligned(particleX[1]); particleX[1] = NULL; }
+    if (particleY[0] != NULL) { CmiFreeAligned(particleY[0]); particleY[0] = NULL; }
+    if (particleY[1] != NULL) { CmiFreeAligned(particleY[1]); particleY[1] = NULL; }
+    if (particleZ[0] != NULL) { CmiFreeAligned(particleZ[0]); particleZ[0] = NULL; }
+    if (particleZ[1] != NULL) { CmiFreeAligned(particleZ[1]); particleZ[1] = NULL; }
+    if (particleQ[0] != NULL) { CmiFreeAligned(particleQ[0]); particleQ[0] = NULL; }
+    if (particleQ[1] != NULL) { CmiFreeAligned(particleQ[1]); particleQ[1] = NULL; }
   #endif
-  if (forceX[0] != NULL) { free_aligned(forceX[0]); forceX[0] = NULL; }
-  if (forceX[1] != NULL) { free_aligned(forceX[1]); forceX[1] = NULL; }
-  if (forceY[0] != NULL) { free_aligned(forceY[0]); forceY[0] = NULL; }
-  if (forceY[1] != NULL) { free_aligned(forceY[1]); forceY[1] = NULL; }
-  if (forceZ[0] != NULL) { free_aligned(forceZ[0]); forceZ[0] = NULL; }
-  if (forceZ[1] != NULL) { free_aligned(forceZ[1]); forceZ[1] = NULL; }
+  if (forceX[0] != NULL) { CmiFreeAligned(forceX[0]); forceX[0] = NULL; }
+  if (forceX[1] != NULL) { CmiFreeAligned(forceX[1]); forceX[1] = NULL; }
+  if (forceY[0] != NULL) { CmiFreeAligned(forceY[0]); forceY[0] = NULL; }
+  if (forceY[1] != NULL) { CmiFreeAligned(forceY[1]); forceY[1] = NULL; }
+  if (forceZ[0] != NULL) { CmiFreeAligned(forceZ[0]); forceZ[0] = NULL; }
+  if (forceZ[1] != NULL) { CmiFreeAligned(forceZ[1]); forceZ[1] = NULL; }
   numParticles = -1;
 }
 
@@ -47,21 +47,21 @@ void PairCompute::init(int numParticlesPerPatch) {
   // Initialize the arrays
   numParticles = numParticlesPerPatch;
   #if USE_PROXY_PATCHES == 0
-    particleX[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleX[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleY[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleY[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleZ[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleZ[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleQ[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleQ[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+    particleX[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleX[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleY[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleY[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleZ[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleZ[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleQ[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleQ[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
   #endif
-  forceX[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceX[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceY[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceY[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceZ[0] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceZ[1] = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceX[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceX[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceY[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceY[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceZ[0] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceZ[1] = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
   patchDataCount = 0;
 
   // Check in with the main chare
index 8c420ae0f89bbafadff704b325a9a2cb974dee2f..5a487e412827be4285b990e74d1dc4e647d20d08 100644 (file)
@@ -41,17 +41,17 @@ Patch::Patch(CkMigrateMessage* msg) {
 
 
 Patch::~Patch() {
-  if (particleX != NULL) { free_aligned(particleX); particleX = NULL; }
-  if (particleY != NULL) { free_aligned(particleY); particleY = NULL; }
-  if (particleZ != NULL) { free_aligned(particleZ); particleZ = NULL; }
-  if (particleQ != NULL) { free_aligned(particleQ); particleQ = NULL; }
-  if (particleM != NULL) { free_aligned(particleM); particleM = NULL; }
-  if (forceSumX != NULL) { free_aligned(forceSumX); forceSumX = NULL; }
-  if (forceSumY != NULL) { free_aligned(forceSumY); forceSumY = NULL; }
-  if (forceSumZ != NULL) { free_aligned(forceSumZ); forceSumZ = NULL; }
-  if (velocityX != NULL) { free_aligned(velocityX); velocityX = NULL; }
-  if (velocityY != NULL) { free_aligned(velocityY); velocityY = NULL; }
-  if (velocityZ != NULL) { free_aligned(velocityZ); velocityZ = NULL; }
+  if (particleX != NULL) { CmiFreeAligned(particleX); particleX = NULL; }
+  if (particleY != NULL) { CmiFreeAligned(particleY); particleY = NULL; }
+  if (particleZ != NULL) { CmiFreeAligned(particleZ); particleZ = NULL; }
+  if (particleQ != NULL) { CmiFreeAligned(particleQ); particleQ = NULL; }
+  if (particleM != NULL) { CmiFreeAligned(particleM); particleM = NULL; }
+  if (forceSumX != NULL) { CmiFreeAligned(forceSumX); forceSumX = NULL; }
+  if (forceSumY != NULL) { CmiFreeAligned(forceSumY); forceSumY = NULL; }
+  if (forceSumZ != NULL) { CmiFreeAligned(forceSumZ); forceSumZ = NULL; }
+  if (velocityX != NULL) { CmiFreeAligned(velocityX); velocityX = NULL; }
+  if (velocityY != NULL) { CmiFreeAligned(velocityY); velocityY = NULL; }
+  if (velocityZ != NULL) { CmiFreeAligned(velocityZ); velocityZ = NULL; }
   numParticles = 0;
 }
 
@@ -69,17 +69,17 @@ void Patch::init(int numParticles) {
 
   // Allocate memory for the particles
   this->numParticles = numParticles;
-  particleX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleQ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleM = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceSumX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceSumY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceSumZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  velocityX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  velocityY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  velocityZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleQ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleM = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceSumX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceSumY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceSumZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  velocityX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  velocityY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  velocityZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
 
   // Initialize the particles
   randomizeParticles();
@@ -187,19 +187,27 @@ void Patch::forceCheckIn(int numParticles, float* forceX, float* forceY, float*
 void Patch::forceCheckIn(int numParticles, float* forceX, float* forceY, float* forceZ, int numForceCheckIns) {
 
   // Accumulate the force data
-  register vec4f* fsx = (vec4f*)forceSumX;
-  register vec4f* fsy = (vec4f*)forceSumY;
-  register vec4f* fsz = (vec4f*)forceSumZ;
-  register vec4f* fx = (vec4f*)forceX;
-  register vec4f* fy = (vec4f*)forceY;
-  register vec4f* fz = (vec4f*)forceZ;
-  register const int numParticles_vec = numParticles / (sizeof(vec4f) * sizeof(float));
-  register int i;
-  for (i = 0; i < numParticles_vec; i++) {
-    fsx[i] = vadd4f(fsx[i], fx[i]);
-    fsy[i] = vadd4f(fsy[i], fy[i]);
-    fsz[i] = vadd4f(fsz[i], fz[i]);
-  }
+  #if 0
+    register vec4f* fsx = (vec4f*)forceSumX;
+    register vec4f* fsy = (vec4f*)forceSumY;
+    register vec4f* fsz = (vec4f*)forceSumZ;
+    register vec4f* fx = (vec4f*)forceX;
+    register vec4f* fy = (vec4f*)forceY;
+    register vec4f* fz = (vec4f*)forceZ;
+    register const int numParticles_vec = numParticles / (sizeof(vec4f) * sizeof(float));
+    register int i;
+    for (i = 0; i < numParticles_vec; i++) {
+      fsx[i] = vadd4f(fsx[i], fx[i]);
+      fsy[i] = vadd4f(fsy[i], fy[i]);
+      fsz[i] = vadd4f(fsz[i], fz[i]);
+    }
+  #else
+    for (int i = 0; i < numParticles; i++) {
+      forceSumX[i] += forceX[i];
+      forceSumY[i] += forceY[i];
+      forceSumZ[i] += forceZ[i];
+    }
+  #endif
 
   // Count the incoming forced data and integrate if all force data has arrived
   remainingForceCheckIns -= numForceCheckIns;
@@ -253,13 +261,13 @@ ProxyPatch::ProxyPatch(CkMigrateMessage *msg) {
 
 
 ProxyPatch::~ProxyPatch() {
-  if (particleX != NULL) { free_aligned(particleX); particleX = NULL; }
-  if (particleY != NULL) { free_aligned(particleY); particleY = NULL; }
-  if (particleZ != NULL) { free_aligned(particleZ); particleZ = NULL; }
-  if (particleQ != NULL) { free_aligned(particleQ); particleQ = NULL; }
-  if (forceSumX != NULL) { free_aligned(forceSumX); forceSumX = NULL; }
-  if (forceSumY != NULL) { free_aligned(forceSumY); forceSumY = NULL; }
-  if (forceSumZ != NULL) { free_aligned(forceSumZ); forceSumZ = NULL; }
+  if (particleX != NULL) { CmiFreeAligned(particleX); particleX = NULL; }
+  if (particleY != NULL) { CmiFreeAligned(particleY); particleY = NULL; }
+  if (particleZ != NULL) { CmiFreeAligned(particleZ); particleZ = NULL; }
+  if (particleQ != NULL) { CmiFreeAligned(particleQ); particleQ = NULL; }
+  if (forceSumX != NULL) { CmiFreeAligned(forceSumX); forceSumX = NULL; }
+  if (forceSumY != NULL) { CmiFreeAligned(forceSumY); forceSumY = NULL; }
+  if (forceSumZ != NULL) { CmiFreeAligned(forceSumZ); forceSumZ = NULL; }
   numParticles = -1;
 }
 
@@ -268,13 +276,13 @@ void ProxyPatch::init(int numParticles) {
 
   // Allocate memory for the particles
   this->numParticles = numParticles;
-  particleX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  particleQ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceSumX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceSumY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceSumZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  particleX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  particleQ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceSumX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceSumY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceSumZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
 
   // Check in with the main proxy
   mainProxy.initCheckIn();
@@ -332,18 +340,26 @@ void ProxyPatch::patchData(int numParticles, float* particleX, float* particleY,
 void ProxyPatch::forceCheckIn(int numParticles, float* forceX, float* forceY, float* forceZ) {
 
   // Accumulate the force data
-  register vec4f* forceX_vec = (vec4f*)forceX;
-  register vec4f* forceY_vec = (vec4f*)forceY;
-  register vec4f* forceZ_vec = (vec4f*)forceZ;
-  register vec4f* forceSumX_vec = (vec4f*)forceSumX;
-  register vec4f* forceSumY_vec = (vec4f*)forceSumY;
-  register vec4f* forceSumZ_vec = (vec4f*)forceSumZ;
-  const int numParticles_vec = numParticles / (sizeof(vec4f) / sizeof(float));
-  for (int i = 0; i < numParticles_vec; i++) {
-    forceSumX_vec[i] += forceX_vec[i];
-    forceSumY_vec[i] += forceY_vec[i];
-    forceSumZ_vec[i] += forceZ_vec[i];
-  }
+  #if 0
+    register vec4f* forceX_vec = (vec4f*)forceX;
+    register vec4f* forceY_vec = (vec4f*)forceY;
+    register vec4f* forceZ_vec = (vec4f*)forceZ;
+    register vec4f* forceSumX_vec = (vec4f*)forceSumX;
+    register vec4f* forceSumY_vec = (vec4f*)forceSumY;
+    register vec4f* forceSumZ_vec = (vec4f*)forceSumZ;
+    const int numParticles_vec = numParticles / (sizeof(vec4f) / sizeof(float));
+    for (int i = 0; i < numParticles_vec; i++) {
+      forceSumX_vec[i] += forceX_vec[i];
+      forceSumY_vec[i] += forceY_vec[i];
+      forceSumZ_vec[i] += forceZ_vec[i];
+    }
+  #else
+    for (int i = 0; i < numParticles; i++) {
+      forceSumX[i] += forceX[i];
+      forceSumY[i] += forceY[i];
+      forceSumZ[i] += forceZ[i];
+    }
+  #endif
 
   // Once all computes this proxy called have contributed forces, send the data back to the patch itself
   checkInCount--;
index fea44f1accc449a3388a41b9b0b552c3ce7f1548..9a3f679da154a5c791fa4e09d7c26cdf88ff6a06 100644 (file)
@@ -21,14 +21,14 @@ SelfCompute::SelfCompute(CkMigrateMessage* msg) {
 
 SelfCompute::~SelfCompute() {
   #if USE_PROXY_PATCHES == 0
-    if (particleX != NULL) { free_aligned(particleX); particleX = NULL; }
-    if (particleY != NULL) { free_aligned(particleY); particleY = NULL; }
-    if (particleZ != NULL) { free_aligned(particleZ); particleZ = NULL; }
-    if (particleQ != NULL) { free_aligned(particleQ); particleQ = NULL; }
+    if (particleX != NULL) { CmiFreeAligned(particleX); particleX = NULL; }
+    if (particleY != NULL) { CmiFreeAligned(particleY); particleY = NULL; }
+    if (particleZ != NULL) { CmiFreeAligned(particleZ); particleZ = NULL; }
+    if (particleQ != NULL) { CmiFreeAligned(particleQ); particleQ = NULL; }
   #endif
-  if (forceX != NULL) { free_aligned(forceX); forceX = NULL; }
-  if (forceY != NULL) { free_aligned(forceY); forceY = NULL; }
-  if (forceZ != NULL) { free_aligned(forceZ); forceZ = NULL; }
+  if (forceX != NULL) { CmiFreeAligned(forceX); forceX = NULL; }
+  if (forceY != NULL) { CmiFreeAligned(forceY); forceY = NULL; }
+  if (forceZ != NULL) { CmiFreeAligned(forceZ); forceZ = NULL; }
   numParticles = -1;
 }
 
@@ -38,14 +38,14 @@ void SelfCompute::init(int numParticlesPerPatch) {
   // Allocate buffers for force data
   numParticles = numParticlesPerPatch;
   #if USE_PROXY_PATCHES == 0
-    particleX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-    particleQ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+    particleX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+    particleQ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
   #endif
-  forceX = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceY = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
-  forceZ = (float*)(malloc_aligned(numParticles * sizeof(float), 128));
+  forceX = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceY = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
+  forceZ = (float*)(CmiMallocAligned(numParticles * sizeof(float), 128));
 
   // Check in with the main chare
   mainProxy.initCheckIn();