Adding some BGP UPC preliminary performance counter support for use in
authorIsaac Dooley <idooley2@illinois.edu>
Wed, 2 Sep 2009 22:57:20 +0000 (22:57 +0000)
committerIsaac Dooley <idooley2@illinois.edu>
Wed, 2 Sep 2009 22:57:20 +0000 (22:57 +0000)
autotuning.

src/ck-cp/controlPoints.C
src/ck-perf/trace-controlPoints-BGP.C [new file with mode: 0644]
src/ck-perf/trace-controlPoints.C
src/ck-perf/trace-controlPoints.ci
src/ck-perf/trace-controlPointsBOC.h
src/scripts/Makefile

index 21682c950a7a9d3b10ab8bae1238be77f54427fe..3483d7f64165111be914c9b7d8655e55453c733a 100644 (file)
@@ -7,6 +7,7 @@
 #include "trace-projections.h"
 #include <pathHistory.h>
 
+
 /**
  *  \addtogroup ControlPointFramework
  *   @{
@@ -59,7 +60,7 @@ CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
   return CkReductionMsg::buildNew(3*sizeof(double),ret);   
 }
 /// An initcall that registers the idle time reducer idleTimeReduction()
-/*initcall*/ void registerIdleTimeReduction(void) {
+/*initproc*/ void registerIdleTimeReduction(void) {
   idleTimeReductionType=CkReduction::addReducer(idleTimeReduction);
 }
 
@@ -67,6 +68,9 @@ CkReductionMsg *idleTimeReduction(int nMsg,CkReductionMsg **msgs){
 
 
 
+
+
+
 /// Return an integer between 0 and num-1 inclusive
 /// If different seed, name, and random_seed values are provided, the returned values are pseudo-random
 unsigned int randInt(unsigned int num, const char* name, int seed=0){
@@ -1115,6 +1119,7 @@ void controlPointPriorityEntry(const char *name, int idx){
 
 
 
+
 /*! @} */
 
 
diff --git a/src/ck-perf/trace-controlPoints-BGP.C b/src/ck-perf/trace-controlPoints-BGP.C
new file mode 100644 (file)
index 0000000..9b61e9e
--- /dev/null
@@ -0,0 +1,100 @@
+#if CMK_BLUEGENEP
+
+#include <spi/UPC.h>
+#include <spi/UPC_Events.h>
+#include <iostream>
+
+
+/// An initcall that registers the idle time reducer idleTimeReduction()
+void initBGP_UPC_Counters(void) {
+
+    // every process on the node calls BGP_UPC_Initialize()
+    BGP_UPC_Initialize();
+
+
+    // just one rank per node sets the counter config and zeros the counters
+    
+    // counter_mode = 0, 1, 2, 3 (plus some others … see UPC.h)
+    // counter_trigger = BGP_UPC_CFG_LEVEL_HIGH, BGP_UPC_CFG_EDGE_DEFAULT
+    
+    BGP_UPC_Mode_t counter_mode = BGP_UPC_MODE_DEFAULT;
+    BGP_UPC_Event_Edge_t counter_trigger = BGP_UPC_CFG_LEVEL_HIGH;
+    
+    BGP_UPC_Initialize_Counter_Config(counter_mode, counter_trigger);
+
+    BGP_UPC_Zero_Counter_Values();
+    BGP_UPC_Start(0);
+
+}
+
+
+
+
+
+/// print out the counters
+void printBGP_UPC_Counters(void) {
+    
+    BGP_UPC_Stop();
+   
+    // Should look at BGP_TORUS_XP_NO_TOKENS to determine if there is contention
+   
+    int64_t cxp = BGP_UPC_Read_Counter_Value(BGP_TORUS_XP_PACKETS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus xp = " << cxp << std::endl;
+
+    int64_t cxm = BGP_UPC_Read_Counter_Value(BGP_TORUS_XM_PACKETS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus xm = " << cxm << std::endl;
+
+    int64_t cyp = BGP_UPC_Read_Counter_Value(BGP_TORUS_YP_PACKETS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus yp = " << cyp << std::endl;
+
+    int64_t cym = BGP_UPC_Read_Counter_Value(BGP_TORUS_YM_PACKETS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus ym = " << cym << std::endl;
+
+    int64_t czp = BGP_UPC_Read_Counter_Value(BGP_TORUS_ZP_PACKETS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus zp = " << czp << std::endl;
+
+    int64_t czm = BGP_UPC_Read_Counter_Value(BGP_TORUS_ZM_PACKETS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus zm = " << czm << std::endl;
+
+
+
+    int64_t cxpc = BGP_UPC_Read_Counter_Value(BGP_TORUS_XP_32BCHUNKS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus xp chunks = " << cxpc << std::endl;
+
+    int64_t cxmc = BGP_UPC_Read_Counter_Value(BGP_TORUS_XM_32BCHUNKS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus xm chunks = " << cxmc << std::endl;
+
+    int64_t cypc = BGP_UPC_Read_Counter_Value(BGP_TORUS_YP_32BCHUNKS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus yp chunks = " << cypc << std::endl;
+
+    int64_t cymc = BGP_UPC_Read_Counter_Value(BGP_TORUS_YM_32BCHUNKS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus ym chunks = " << cymc << std::endl;
+
+    int64_t czpc = BGP_UPC_Read_Counter_Value(BGP_TORUS_ZP_32BCHUNKS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus zp chunks = " << czpc << std::endl;
+
+    int64_t czmc = BGP_UPC_Read_Counter_Value(BGP_TORUS_ZM_32BCHUNKS, BGP_UPC_READ_EXCLUSIVE);
+    std::cout << "BGP_UPC_Read_Counter_Value returned torus zm chunks = " << czmc << std::endl;
+
+
+    //    Save the counter values from the counter_data structure …
+    BGP_UPC_Start(0);
+    
+}
+#else
+
+
+void printBGP_UPC_Counters(void){
+// do nothing since not on BGP
+}
+
+
+
+void initBGP_UPC_Counters(void){
+// do nothing since not on BGP
+}
+
+
+
+#endif
index 776eb320f7df42e2a2778a2b4d5b77321a13f801..562274fe83ff03bd766867690966cb57a23cf163 100644 (file)
@@ -13,7 +13,8 @@
 CkpvStaticDeclare(TraceControlPoints*, _trace);
 
 // This global variable is required for any post-execution 
-// parallel analysis or activities the trace module might wish to perform.
+// parallel analysis or parallel activities the trace module 
+// might wish to perform.
 CkGroupID traceControlPointsGID;
 
 /**
@@ -145,11 +146,26 @@ void TraceControlPoints::free(void *where, int size) {
 
 void TraceControlPoints::traceClose(void)
 {
+  // Print out some performance counters on BG/P
+  CProxy_TraceControlPointsBOC myProxy(traceControlPointsGID);
+  myProxy.ckLocalBranch()->printBGP_UPC_CountersBOC();
+
+    
   CkpvAccess(_trace)->endComputation();
   // remove myself from traceArray so that no tracing will be called.
   CkpvAccess(_traces)->removeTrace(this);
 }
 
+void printBGP_UPC_Counters(void);
+
+void TraceControlPointsBOC::printBGP_UPC_CountersBOC(void) {
+//    if(CkMyPe() == 0){
+       printBGP_UPC_Counters();
+       //   }
+}
+
+
+
 void TraceControlPoints::resetTimings(){
   totalIdleTime = 0.0;
   totalEntryMethodTime = 0.0;
index 08b364c728a68673c872ca0787d639061ad21f2a..4091af46aa55fb18e2d7da6d2278deebf84d5d0c 100644 (file)
@@ -9,7 +9,8 @@ module TraceControlPoints {
 
   group [migratable] TraceControlPointsBOC {
     entry TraceControlPointsBOC(void);
-  };
+    entry void printBGP_UPC_CountersBOC(void);
+                                     };
 
 };
 
index 4c9335ba6a8af22ebf604337efc1b12606fc7465..47e63159863e363581b3487cf92d98bc9b5ac13d 100644 (file)
@@ -2,6 +2,8 @@
 
 extern CkGroupID traceControlPointsGID;
 
+void initBGP_UPC_Counters(void);
+
 // We typically declare parallel object classes here for the purposes of
 // performing parallel operations for the trace module after the main
 // application has completed execution (and calls CkExit()).
@@ -18,14 +20,21 @@ class TraceControlPointsInit : public Chare {
   TraceControlPointsInit(CkArgMsg*) {
     traceControlPointsGID = CProxy_TraceControlPointsBOC::ckNew();
     CProxy_TraceControlPointsBOC controlPointsProxy(traceControlPointsGID);
+    CkPrintf("Initializing counters on pe %d\n", CkMyPe());
+   
   }
   TraceControlPointsInit(CkMigrateMessage *m):Chare(m) {}
 };
 
 class TraceControlPointsBOC : public CBase_TraceControlPointsBOC {
 public:
-  TraceControlPointsBOC(void) {};
+  TraceControlPointsBOC(void) {
+      initBGP_UPC_Counters();
+  };
   TraceControlPointsBOC(CkMigrateMessage *m) {};
+
+  void printBGP_UPC_CountersBOC(void);
+
 };
 
 
index ca60ce8ff533bcd7f8b10504f78a8c42af0cef85..c454d3669bae324b668f67f3c7fb1b1887813cec 100644 (file)
@@ -830,7 +830,11 @@ tau_selective.o: $(TAUROOT)/utils/tau_selective.cpp
 $(L)/libtrace-Tau.a: $(TAU_TRACE_OBJ) tau_selective.o
        $(CHARMC) $(TAU_LIBS) -o $@ $(TAU_TRACE_OBJ) tau_selective.o $(TAU_TRACE_OBJ)
 
-LIBTRACE_CONTROLPOINTS=trace-controlPoints.o
+# The BGP performance counter library UPC must be compiled with gcc:
+trace-controlPoints-BGP.o : trace-controlPoints-BGP.C
+       g++ -c trace-controlPoints-BGP.C -o trace-controlPoints-BGP.o  -I/bgsys/drivers/ppcfloor/arch/include/ $(OPTS) -g
+
+LIBTRACE_CONTROLPOINTS=trace-controlPoints.o trace-controlPoints-BGP.o
 $(L)/libtrace-controlPoints.a: $(LIBTRACE_CONTROLPOINTS)
        $(CHARMC) -o $@ $(LIBTRACE_CONTROLPOINTS)