add PAPI performance counter in autoPerf module
authorYanhua Sun <sun51@illinois.edu>
Sat, 14 Sep 2013 01:16:46 +0000 (20:16 -0500)
committerYanhua Sun <sun51@illinois.edu>
Sat, 14 Sep 2013 01:16:46 +0000 (20:16 -0500)
src/ck-tune/trace-autoPerf.C
src/ck-tune/trace-autoPerf.h
src/ck-tune/trace-perf.C

index 1def57131b029497c92b656509c3c4cca8d4c796..d4b651b55f5cdf05e4d78e96c4a96829e92ea6c0 100644 (file)
@@ -66,6 +66,12 @@ CkReductionMsg *perfDataReduction(int nMsg,CkReductionMsg **msgs){
         //Total invocations
         ret->numInvocations += m->numInvocations;
         ret->objLoadMax = max(ret->objLoadMax, m->objLoadMax);
+#if CMK_HAS_COUNTER_PAPI
+        for(int i=0; i<NUMPAPIEVENTS; i++)
+        {
+            ret->papiValues[i] += m->papiValues[i]; 
+        }
+#endif
     }  
     CkReductionMsg *msg= CkReductionMsg::buildNew(sizeof(perfData),ret); 
     return msg;
@@ -169,6 +175,11 @@ void TraceAutoPerfBOC::globalPerfAnalyze(CkReductionMsg *msg )
     CkPrintf("Overhead(%):     \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) time:%f \n", data->overheadMin*100, data->overheadMax*100, overheadPercentage*100, data->overheadTotalTime);
     CkPrintf("Grainsize(ms):\t(avg:max)\t: (%.3f:    %.3f) \n", data->utilTotalTime/data->numInvocations*1000, data->grainsizeMax*1000);
     CkPrintf("Invocations:  \t%lld\n", data->numInvocations);
+    char eventName[PAPI_MAX_STR_LEN];
+    for (int i=0;i<NUMPAPIEVENTS;i++) {
+        PAPI_event_code_to_name(papiEvents[i], eventName);
+        CkPrintf(" EVENT  %s   counter   %lld \n", eventName, data->papiValues[i]);
+    }
     //)
    
     // --- time step measurement 
index d5b4f578314abdcc937dd0f7c1cd21fc7bcde717..1d0f998fc7557838f2959f2f4b8cd8f9cf122d71 100644 (file)
 #include <list>
 
 
+#if CMK_HAS_COUNTER_PAPI
+#include <papi.h>
+#ifdef USE_SPP_PAPI
+#define NUMPAPIEVENTS 8
+#else
+#define NUMPAPIEVENTS 9
+#endif
+#endif
+
+
+
 using namespace std;
 
 extern CkGroupID traceAutoPerfGID;
@@ -176,6 +187,10 @@ public:
     double  commTime;
     double  objLoadMax;
 
+#if CMK_HAS_COUNTER_PAPI
+    LONG_LONG_PAPI papiValues[NUMPAPIEVENTS];
+#endif
+
     // functions
     perfData(){}
 };
@@ -248,6 +263,11 @@ class TraceAutoPerf : public Trace {
 
 public:
 
+#if CMK_HAS_COUNTER_PAPI
+    int papiEventSet;
+    LONG_LONG_PAPI papiValues[NUMPAPIEVENTS];
+    LONG_LONG_PAPI previous_papiValues[NUMPAPIEVENTS];
+#endif
     double  lastBeginExecuteTime;
     int     lastbeginMessageSize;
     int     lastEvent;
@@ -438,6 +458,15 @@ public:
       return totalEntryMethodInvocations;
   }
 
+#if CMK_HAS_COUNTER_PAPI
+  inline void readPAPI()
+  {
+      if (PAPI_read(papiEventSet, papiValues) != PAPI_OK) {
+          CmiAbort("PAPI failed to read at begin execute!\n");
+      }
+  }
+#endif
+
   perfData* getSummary()
   {
       currentSummary->idleMin = currentSummary->idleMax= idleRatio(); 
@@ -449,6 +478,13 @@ public:
       currentSummary->grainsizeAvg = grainSize();
       currentSummary->grainsizeMax = maxGrainSize();
       currentSummary->numInvocations = totalEntryMethodInvocations;
+#if CMK_HAS_COUNTER_PAPI
+      readPAPI();
+      for(int i=0; i<NUMPAPIEVENTS; i++)
+      {
+          currentSummary->papiValues[i] = (papiValues[i] - previous_papiValues[i]);
+      }
+#endif
       return currentSummary;
   }
 
index 8bbe06ef0a334571e7f5d1e96c705a856b4683d0..77eef7b3f72caa0c4191d98c29e458c1a2b4eb84 100644 (file)
@@ -1,3 +1,12 @@
+#if CMK_HAS_COUNTER_PAPI
+#ifdef USE_SPP_PAPI
+int papiEvents[NUMPAPIEVENTS];
+#else
+int papiEvents[NUMPAPIEVENTS] = { PAPI_L1_DCM, PAPI_L2_DCM, PAPI_L3_DCM, PAPI_TLB_DM, PAPI_L1_DCH, PAPI_L2_DCH, PAPI_L3_DCH, PAPI_FP_OPS, PAPI_TOT_CYC };
+#endif
+#endif // CMK_HAS_COUNTER_PAPI
+
+
 CkpvStaticDeclare(TraceAutoPerf*, _trace);
 //-------- group information ---------------------------
 
@@ -14,6 +23,115 @@ TraceAutoPerf::TraceAutoPerf(char **argv)
     resetTimings();
     nesting_level = 0;
     whenStoppedTracing = 0; 
+
+    //PAPI related 
+#if CMK_HAS_COUNTER_PAPI
+  // We initialize and create the event sets for use with PAPI here.
+  int papiRetValue;
+  if(CkMyRank()==0){
+    papiRetValue = PAPI_library_init(PAPI_VER_CURRENT);
+    if (papiRetValue != PAPI_VER_CURRENT) {
+      CmiAbort("PAPI Library initialization failure!\n");
+    }
+#if CMK_SMP
+    if(PAPI_thread_init(pthread_self) != PAPI_OK){
+      CmiAbort("PAPI could not be initialized in SMP mode!\n");
+    }
+#endif
+  }
+
+#if CMK_SMP
+  //PAPI_thread_init has to finish before calling PAPI_create_eventset
+  #if CMK_SMP_TRACE_COMMTHREAD
+      CmiNodeAllBarrier();
+  #else
+      CmiNodeBarrier();
+  #endif
+#endif
+  // PAPI 3 mandates the initialization of the set to PAPI_NULL
+  papiEventSet = PAPI_NULL; 
+  if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
+    CmiAbort("PAPI failed to create event set!\n");
+  }
+#ifdef USE_SPP_PAPI
+  //  CmiPrintf("Using SPP counters for PAPI\n");
+  if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
+    papiEvents[0] = PAPI_FP_OPS;
+  }else{
+    if(CmiMyPe()==0){
+      CmiAbort("WARNING: PAPI_FP_OPS doesn't exist on this platform!");
+    }
+  }
+  if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
+    papiEvents[1] = PAPI_TOT_INS;
+  }else{
+    CmiAbort("WARNING: PAPI_TOT_INS doesn't exist on this platform!");
+  }
+  int EventCode;
+  int ret;
+  ret=PAPI_event_name_to_code("perf::PERF_COUNT_HW_CACHE_LL:ACCESS",&EventCode);
+  if(PAPI_query_event(EventCode)==PAPI_OK) {
+    papiEvents[2] = EventCode;
+  }else{
+    CmiAbort("WARNING: perf::PERF_COUNT_HW_CACHE_LL:ACCESS doesn't exist on this platform!");
+  }
+  ret=PAPI_event_name_to_code("DATA_PREFETCHER:ALL",&EventCode);
+  if(PAPI_query_event(EventCode)==PAPI_OK) {
+    papiEvents[3] = EventCode;
+  }else{
+    CmiAbort("WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
+  }
+  if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
+    papiEvents[4] = PAPI_L1_DCM;
+  }else{
+    CmiAbort("WARNING: PAPI_L1_DCM doesn't exist on this platform!");
+  }
+  if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
+    papiEvents[5] = PAPI_TOT_CYC;
+  }else{
+    CmiAbort("WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
+  }
+  if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
+    papiEvents[6] = PAPI_L2_DCM;
+  }else{
+    CmiAbort("WARNING: PAPI_L2_DCM doesn't exist on this platform!");
+  }
+  if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
+    papiEvents[7] = PAPI_L1_DCA;
+  }else{
+    CmiAbort("WARNING: PAPI_L1_DCA doesn't exist on this platform!");
+  }
+#else
+  // just uses { PAPI_L2_DCM, PAPI_FP_OPS } the 2 initialized PAPI_EVENTS
+#endif
+  papiRetValue = PAPI_add_events(papiEventSet, papiEvents, NUMPAPIEVENTS);
+  if (papiRetValue < 0) {
+    if (papiRetValue == PAPI_ECNFLCT) {
+      CmiAbort("PAPI events conflict! Please re-assign event types!\n");
+    } else {
+      char error_str[PAPI_MAX_STR_LEN];
+      PAPI_perror(error_str);
+      //PAPI_perror(papiRetValue,error_str,PAPI_MAX_STR_LEN);
+      CmiPrintf("PAPI failed with error %s val %d\n",error_str,papiRetValue);
+      CmiAbort("PAPI failed to add designated events!\n");
+    }
+  }
+  if(CkMyPe()==0)
+    {
+      CmiPrintf("Registered %d PAPI counters:",NUMPAPIEVENTS);
+      char nameBuf[PAPI_MAX_STR_LEN];
+      for(int i=0;i<NUMPAPIEVENTS;i++)
+       {
+         PAPI_event_code_to_name(papiEvents[i], nameBuf);
+         CmiPrintf("%s ",nameBuf);
+       }
+      CmiPrintf("\n");
+    }
+  memset(papiValues, 0, NUMPAPIEVENTS*sizeof(LONG_LONG_PAPI));
+  memset(previous_papiValues, 0, NUMPAPIEVENTS*sizeof(LONG_LONG_PAPI));
+#endif
+
+
     if (CkpvAccess(traceOnPe) == 0) return;
 }
 
@@ -48,6 +166,9 @@ void TraceAutoPerf::resetAll(){
     currentSummary->numBytes = 0;
     currentSummary->commTime = 0;
     currentSummary->objLoadMax = 0;
+#if CMK_HAS_COUNTER_PAPI
+    memcpy(previous_papiValues, papiValues, sizeof(LONG_LONG_PAPI)*NUMPAPIEVENTS);
+#endif
 }
 
 void TraceAutoPerf::traceBegin(void){
@@ -147,8 +268,36 @@ void TraceAutoPerf::endIdle(double curWallTime) {
     lastEvent =  -1;
 }
 
-void TraceAutoPerf::beginComputation(void) { }
-void TraceAutoPerf::endComputation(void) { }
+void TraceAutoPerf::beginComputation(void) {
+#if CMK_HAS_COUNTER_PAPI
+  // we start the counters here
+  if (PAPI_start(papiEventSet) != PAPI_OK) {
+    CmiAbort("PAPI failed to start designated counters!\n");
+  }
+#endif
+
+}
+
+void TraceAutoPerf::endComputation(void) { 
+#if CMK_HAS_COUNTER_PAPI
+  // we stop the counters here. A silent failure is alright since we
+  // are already at the end of the program.
+  if (PAPI_stop(papiEventSet, papiValues) != PAPI_OK) {
+    CkPrintf("Warning: PAPI failed to stop correctly!\n");
+  }
+  //else 
+  //{
+  //    char eventName[PAPI_MAX_STR_LEN];
+  //    for (int i=0;i<NUMPAPIEVENTS;i++) {
+  //        PAPI_event_code_to_name(papiEvents[i], eventName);
+  //        CkPrintf(" EVENT  %s   counter   %lld \n", eventName, papiValues[i]);
+  //    }
+  //}
+  // NOTE: We should not do a complete close of PAPI until after the
+  // sts writer is done.
+#endif
+
+}
 
 void TraceAutoPerf::malloc(void *where, int size, void **stack, int stackSize)
 {