Added user events and network progress calls for performance testing.
authorDavid Kunzman <kunzman2@illinois.edu>
Wed, 4 Mar 2009 19:29:00 +0000 (19:29 +0000)
committerDavid Kunzman <kunzman2@illinois.edu>
Wed, 4 Mar 2009 19:29:00 +0000 (19:29 +0000)
examples/charm++/cell/md/Makefile
examples/charm++/cell/md/main.C
examples/charm++/cell/md/main.h
examples/charm++/cell/md/md_config.h
examples/charm++/cell/md/pairCompute.C
examples/charm++/cell/md/patch.C
examples/charm++/cell/md/selfCompute.C

index 4fa633fe7f29e66bce0f2b64f4b1ddebd911230f..639af0c17d22cd851ed5a2fc282a4da9082edba8 100644 (file)
@@ -2,21 +2,33 @@ CHARM_BASE_DIR = ../../../..
 CHARM_BIN_DIR = $(CHARM_BASE_DIR)/bin
 CHARMC = $(CHARM_BIN_DIR)/charmc $(OPTS)
 
-OBJS = main.o patch.o selfCompute.o pairCompute.o
 PGM = md
 
-
 default: all
 all: $(PGM)
 
+OBJS = main.o patch.o selfCompute.o pairCompute.o
+SPE_LIBS = -spu-lib m
+ACCEL_LIBS = $(SPE_LIBS)
+
 
-md: $(OBJS)
-       $(CHARMC) -language charm++ -o $(PGM) $(OBJS) -spu-lib m
+################################################################################
+## Linker Commands
 
-md_proj: $(OBJS)
-       $(CHARMC) -language charm++ -o $(PGM) $(OBJS) -spu-lib m -tracemode summary -tracemode projections
+# Standard Build (no projections)
+$(PGM): $(OBJS)
+       $(CHARMC) -language charm++ -o $(PGM) $(OBJS) $(ACCEL_LIBS)
 
+# Projections Build
+$(PGM)_proj: $(OBJS)
+       $(CHARMC) -language charm++ -o $(PGM)_proj $(OBJS) $(ACCEL_LIBS) -tracemode summary -tracemode projections
 
+
+################################################################################
+## Chare Classes
+
+
+### Main
 main.decl.h main.def.h: main.ci
        $(CHARMC) main.ci
 
@@ -24,6 +36,7 @@ main.o: main.h main.C main.decl.h patch.decl.h selfCompute.decl.h pairCompute.de
        $(CHARMC) -c main.C
 
 
+### Patch
 patch.decl.h patch.def.h: patch.ci
        $(CHARMC) patch.ci
 
@@ -31,6 +44,7 @@ patch.o: patch.h main.h patch.C patch.decl.h main.decl.h patch.def.h
        $(CHARMC) -c patch.C
 
 
+### SelfCompute
 selfCompute.decl.h selfCompute.def.h: selfCompute.ci
        $(CHARMC) selfCompute.ci
 
@@ -38,6 +52,7 @@ selfCompute.o: selfCompute.h main.h selfCompute.C selfCompute.decl.h patch.decl.
        $(CHARMC) -c selfCompute.C
 
 
+### PairCompute
 pairCompute.decl.h pairCompute.def.h: pairCompute.ci
        $(CHARMC) pairCompute.ci
 
@@ -45,5 +60,8 @@ pairCompute.o: pairCompute.h main.h pairCompute.C pairCompute.decl.h patch.decl.
        $(CHARMC) -c pairCompute.C
 
 
+################################################################################
+## Binary File, Object Files, etc. Cleanup
+
 clean:
        rm -f *.decl.h *.def.h conv-host *.o $(PGM) charmrun *genSPECode*
index 6891be350ea915cd690ba42dc30702b201b43f11..5d317407d9c2e649cb6bb0b6baaa16f01e2a13df 100644 (file)
@@ -5,7 +5,7 @@
 #include "md_config.h"
 
 
-// Read-Onlys
+// Read-Only Variables
 CProxy_Main mainProxy;
 CProxy_Patch patchArrayProxy;
 CProxy_SelfCompute selfComputeArrayProxy;
@@ -63,6 +63,15 @@ Main::Main(CkArgMsg* msg) {
            numStepsRemaining
          );
 
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    traceRegisterUserEvent("Patch::forceCheckIn_callback()", PROJ_USER_EVENT_PATCH_FORCECHECKIN_CALLBACK);
+    traceRegisterUserEvent("Patch::integrate_callback()", PROJ_USER_EVENT_PATCH_INTEGRATE_CALLBACK);
+    traceRegisterUserEvent("SelfCompute::doCalc_callback()", PROJ_USER_EVENT_SELFCOMPUTE_DOCALC_CALLBACK);
+    traceRegisterUserEvent("PairCompute::doCalc_callback()", PROJ_USER_EVENT_PAIRCOMPUTE_DOCALC_CALLBACK);
+    traceRegisterUserEvent("CmiMachineProgressImpl", PROJ_USER_EVENT_MACHINEPROGRESS);
+  #endif
+
   // Spread a proxy to this main chare object to all processors via a read-only
   mainProxy = thisProxy;
 
index 0604545768724c6748f23690d6d34af99b734a28..d6eae556af0037855268802be47b097ebbde9538 100644 (file)
@@ -6,14 +6,18 @@
 #include "md_config.h"
 
 
-// DMK - DEBUG
+// DMK - DEBUG - Until a general Charm++ API for aligned memory buffers is defined,
+//   simply use malloc_aligned and free_align.  These functions are defined for
+//   net-linux-cell builds, but not general net-linux builds.  Declare them here
+//   if this is not a net-linux-cell build (for now, in this example program, it
+//   is not important if they do not actually align on non net-linux-cell builds).
 #if ((!(defined(CMK_CELL))) || (CMK_CELL == 0))
   void* malloc_aligned(int size, int align) { return malloc(size); }
   void free_aligned(void* ptr) { free(ptr); }
 #endif
 
 
-// Read-Onlys
+// Read-Only Variables
 extern CProxy_Main mainProxy;
 extern CProxy_Patch patchArrayProxy;
 extern CProxy_SelfCompute selfComputeArrayProxy;
index 580e53d7f016e38920260d7e95c6b92a9832ec26..8ea511478383048923bf5660d6139ca9b9e6f4ff 100644 (file)
@@ -2,20 +2,21 @@
 #define __MD_CONFIG_H__
 
 
+////////////////////////////////////////////////////////////////////////////////
+// Default Simulation Parameters
+
 #define DEFAULT_NUM_PARTICLES_PER_PATCH  (128)
 
 #define DEFAULT_NUM_PATCHES_X              (2)
 #define DEFAULT_NUM_PATCHES_Y              (2)
 #define DEFAULT_NUM_PATCHES_Z              (2)
 
-#define DEFAULT_NUM_STEPS                  (2)
+#define DEFAULT_NUM_STEPS                 (16)
 #define STEPS_PER_PRINT                    (4)
 
-#define PATCH_XYZ_TO_I(x,y,z)  (((z)*numPatchesX*numPatchesY)+((y)*numPatchesX)+(x))
-#define PATCH_I_TO_X(i)        ((i)%numPatchesX)
-#define PATCH_I_TO_Y(i)        (((i)/numPatchesX)%numPatchesY)
-#define PATCH_I_TO_Z(i)        ((i)/(numPatchesX*numPatchesY))
 
+////////////////////////////////////////////////////////////////////////////////
+// Physics Constants
 
 #define TIME_PER_STEP       (1.0e-15f)           // Unit: s
 #define SIM_BOX_SIDE_LEN    (1.0e-7f)            // Unit: m (NOTE: 1 nm = 10A)
 #define ELECTRON_MASS       (9.109382154e-31f)   // Unit: kg
 
 
+////////////////////////////////////////////////////////////////////////////////
+// Misc. Helper Macros
+
+#define PATCH_XYZ_TO_I(x,y,z)  (((z)*numPatchesX*numPatchesY)+((y)*numPatchesX)+(x))
+#define PATCH_I_TO_X(i)        ((i)%numPatchesX)
+#define PATCH_I_TO_Y(i)        (((i)/numPatchesX)%numPatchesY)
+#define PATCH_I_TO_Z(i)        ((i)/(numPatchesX*numPatchesY))
+
+
+////////////////////////////////////////////////////////////////////////////////
+// Misc. Macros for Performance Testing
+
+// DMK - DEBUG
+#define ENABLE_USER_EVENTS        (0)
+#define PROJ_USER_EVENT_PATCH_FORCECHECKIN_CALLBACK  (1120)
+#define PROJ_USER_EVENT_PATCH_INTEGRATE_CALLBACK     (1121)
+#define PROJ_USER_EVENT_SELFCOMPUTE_DOCALC_CALLBACK  (1130)
+#define PROJ_USER_EVENT_PAIRCOMPUTE_DOCALC_CALLBACK  (1140)
+#define PROJ_USER_EVENT_MACHINEPROGRESS              (1150)
+
+// DMK - DEBUG
+#define ENABLE_NETWORK_PROGRESS   (0)
+#if ENABLE_NETWORK_PROGRESS != 0
+  #if ENABLE_USER_EVENTS != 0
+    #define NetworkProgress  \
+      {  \
+        double __start_time__ = CmiWallTimer();  \
+        CmiMachineProgressImpl();  \
+        traceUserBracketEvent(PROJ_USER_EVENT_MACHINEPROGRESS, __start_time__, CmiWallTimer());  \
+      }
+  #else
+    #define NetworkProgress  CmiMachineProgressImpl();
+  #endif
+#else
+  #define NetworkProgress
+#endif
+
+
 #endif //__MD_CONFIG_H__
index 2835cb3368ae173e770cc57918ee8e6ff1294078..b3ce263c468a50c673124d6fc519ec633d7b6c5d 100644 (file)
@@ -70,6 +70,14 @@ void PairCompute::patchData(int numParticles, float* particleX, float* particleY
 
 void PairCompute::doCalc_callback() {
 
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __start_time__ = CmiWallTimer();
+  #endif
+
+  // DMK - DEBUG
+  NetworkProgress;
+
   // Calculate the index of patch 0 and send force data back to it
   int p0Index = thisIndex.x;
   int p0IndexX = PATCH_I_TO_X(p0Index);
@@ -77,12 +85,24 @@ void PairCompute::doCalc_callback() {
   int p0IndexZ = PATCH_I_TO_Z(p0Index);
   patchArrayProxy(p0IndexX, p0IndexY, p0IndexZ).forceCheckIn(numParticles, forceX[0], forceY[0], forceZ[0]);
 
+  // DMK - DEBUG
+  NetworkProgress;
+
   // Calculate the index of patch 1 and send force data back to it
   int p1Index = thisIndex.y;
   int p1IndexX = PATCH_I_TO_X(p1Index);
   int p1IndexY = PATCH_I_TO_Y(p1Index);
   int p1IndexZ = PATCH_I_TO_Z(p1Index);
   patchArrayProxy(p1IndexX, p1IndexY, p1IndexZ).forceCheckIn(numParticles, forceX[1], forceY[1], forceZ[1]);
+
+  // DMK - DEBUG
+  NetworkProgress;
+
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __end_time__ = CmiWallTimer();
+    traceUserBracketEvent(PROJ_USER_EVENT_PAIRCOMPUTE_DOCALC_CALLBACK, __start_time__, __end_time__);
+  #endif
 }
 
 
index 5286689737e2729ea20d7d688523ace383f730fa..6987291624984d7010d002bf1757e3c76fb33647 100644 (file)
@@ -92,23 +92,45 @@ void Patch::startIteration_common(int numIters) {
     fsz[i] = zero_vec;
   }
 
+  // DMK - DEBUG
+  NetworkProgress
+
   // Send particle data to self computes
   selfComputeArrayProxy(thisIndex.x, thisIndex.y, thisIndex.z).doCalc(numParticles, particleX, particleY, particleZ, particleQ);
 
   // Send particle data to pair computes
   const int index = PATCH_XYZ_TO_I(thisIndex.x, thisIndex.y, thisIndex.z);
   for (int i = 0; i < index; i++) {
+
+    // DMK - DEBUG
+    NetworkProgress
+
     pairComputeArrayProxy(i, index).patchData(numParticles, particleX, particleY, particleZ, particleQ, 1);
   }
   const int numPatches = numPatchesX * numPatchesY * numPatchesZ;
   for (int i = index + 1; i < numPatches; i++) {
+
+    // DMK - DEBUG
+    NetworkProgress
+
     pairComputeArrayProxy(index, i).patchData(numParticles, particleX, particleY, particleZ, particleQ, 0);
   }
+
+  // DMK - DEBUG
+  NetworkProgress
 }
 
 
 void Patch::forceCheckIn_callback() {
 
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __start_time__ = CmiWallTimer();
+  #endif
+
+  // DMK - DEBUG
+  NetworkProgress
+
   // Decrement the counter containing the number of remaining computes that need to report forces
   //   back to this patch.  Once all computes have checked in, send a message to accelerated
   //   'integrate' entry method.
@@ -116,10 +138,27 @@ void Patch::forceCheckIn_callback() {
   if (remainingForceCheckIns <= 0) {
     thisProxy(thisIndex.x, thisIndex.y, thisIndex.z).integrate();
   }
+
+  // DMK - DEBUG
+  NetworkProgress
+
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __end_time__ = CmiWallTimer();
+    traceUserBracketEvent(PROJ_USER_EVENT_PATCH_FORCECHECKIN_CALLBACK, __start_time__, __end_time__);
+  #endif
 }
 
 void Patch::integrate_callback() {
 
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __start_time__ = CmiWallTimer();
+  #endif
+
+  // DMK - DEBUG
+  NetworkProgress
+
   // Decrement the counter containing the number of remaining iterations.  If there are
   //   more iterations, do another one, otherwise, check in with main
   remainingIterations--;
@@ -128,6 +167,15 @@ void Patch::integrate_callback() {
   } else {
     mainProxy.patchCheckIn();
   }
+
+  // DMK - DEBUG
+  NetworkProgress
+
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __end_time__ = CmiWallTimer();
+    traceUserBracketEvent(PROJ_USER_EVENT_PATCH_INTEGRATE_CALLBACK, __start_time__, __end_time__);
+  #endif
 }
 
 
index e2852fc59e441f5f915a798bd6b8f1bca9fded86..2fc8361f220ab4a7bc8b0c4d8ab8f59223ca079a 100644 (file)
@@ -29,7 +29,25 @@ SelfCompute::~SelfCompute() {
 
 
 void SelfCompute::doCalc_callback() {
+
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __start_time__ = CmiWallTimer();
+  #endif
+
+  // DMK - DEBUG
+  NetworkProgress;
+
   patchArrayProxy(thisIndex.x, thisIndex.y, thisIndex.z).forceCheckIn(numParticles, forceX, forceY, forceZ);
+
+  // DMK - DEBUG
+  NetworkProgress;
+
+  // DMK - DEBUG
+  #if ENABLE_USER_EVENTS != 0
+    double __end_time__ = CmiWallTimer();
+    traceUserBracketEvent(PROJ_USER_EVENT_SELFCOMPUTE_DOCALC_CALLBACK, __start_time__, __end_time__);
+  #endif
 }