PICS framework for performance analysis report 95/895/25
authorYanhua Sun <sun51@illinois.edu>
Mon, 2 Nov 2015 18:13:48 +0000 (12:13 -0600)
committerSam White <white67@illinois.edu>
Fri, 14 Oct 2016 18:28:22 +0000 (13:28 -0500)
PICS framework automatically collects the statistics and does performance
analysis based on a decision tree. The results of the automatic performance
analysis can be obtained as a report at the end of the run. To enable this
feature, provide the link time option: -tracemode perfReport

Change-Id: Iaa8cd9ebde16a897bd91e5eb7d53e4685fee485f

32 files changed:
examples/charm++/PICS/ping/Makefile [new file with mode: 0644]
examples/charm++/PICS/ping/ping.C [new file with mode: 0644]
examples/charm++/PICS/ping/ping.ci [new file with mode: 0644]
src/ck-core/init.C
src/ck-core/register.C
src/ck-core/register.h
src/ck-perf/trace-all.C
src/ck-perf/trace-common.C
src/ck-perf/trace-common.h
src/ck-perf/trace-projections.h
src/ck-perf/trace-projector.C
src/ck-perf/trace.h
src/ck-pics/picsautoperf.C [new file with mode: 0644]
src/ck-pics/picsautoperf.ci [new file with mode: 0644]
src/ck-pics/picsautoperf.h [new file with mode: 0644]
src/ck-pics/picsautoperfAPI.C [new file with mode: 0644]
src/ck-pics/picsautoperfAPI.h [new file with mode: 0644]
src/ck-pics/picsautoperfAPIC.h [new file with mode: 0644]
src/ck-pics/picsdecisiontree.C [new file with mode: 0644]
src/ck-pics/picsdecisiontree.h [new file with mode: 0644]
src/ck-pics/picsdefs.h [new file with mode: 0644]
src/ck-pics/picsdefscpp.h [new file with mode: 0644]
src/ck-pics/picstreenode.C [new file with mode: 0644]
src/ck-pics/picstreenode.h [new file with mode: 0644]
src/ck-pics/trace-perf.C [new file with mode: 0644]
src/ck-pics/trace-perf.h [new file with mode: 0644]
src/conv-core/conv-trace.h
src/scripts/Make.cidepends
src/scripts/Makefile
src/scripts/charmc
src/util/fuzzytree.txt [new file with mode: 0644]
src/util/tree.txt [new file with mode: 0644]

diff --git a/examples/charm++/PICS/ping/Makefile b/examples/charm++/PICS/ping/Makefile
new file mode 100644 (file)
index 0000000..66b7f27
--- /dev/null
@@ -0,0 +1,35 @@
+SRC=../../../src
+CHARMC=../../../../bin/charmc $(OPTS) -g
+
+OBJS = ping.o
+
+all:   pgm
+
+pgm: $(OBJS)
+       $(CHARMC) -language charm++ -o pgm $(OBJS) -tracemode perfReport
+
+pgm.prj: $(OBJS)
+       $(CHARMC) -tracemode projections -language charm++ -o pgm.prj $(OBJS) -tracemode perfReport
+
+cifiles: ping.ci
+       $(CHARMC)  ping.ci
+       touch cifiles
+
+clean:
+       rm -f *.decl.h *.def.h conv-host *.o pgm charmrun cifiles pgm.exe pgm.pdb pgm.ilk
+
+ping.o: ping.C cifiles
+       $(CHARMC) -c ping.C
+
+test: all
+       @echo "Intra-processor ping.."
+       $(call run, +p1 ./pgm )
+       @echo "Inter-processor ping.."
+       $(call run, +p2 ./pgm )
+
+bgtest: all
+       @echo "Intra-processor ping.."
+       $(call run, +p1 ./pgm +x1 +y1 +z1 )
+       @echo "Inter-processor ping.."
+       $(call run, +p2 ./pgm +x2 +y1 +z1 )
+
diff --git a/examples/charm++/PICS/ping/ping.C b/examples/charm++/PICS/ping/ping.C
new file mode 100644 (file)
index 0000000..e851a65
--- /dev/null
@@ -0,0 +1,126 @@
+#include <string.h> // for strlen, and strcmp
+#include <charm++.h>
+#include "picsautoperfAPI.h"
+//#include "picsautotunerAPI.h"
+#define NITER 20
+#define PAYLOAD 1024
+#define TUNE_FREQ 4
+
+#include "ping.decl.h"
+class PingMsg : public CMessage_PingMsg
+{
+public:
+  char *x;
+};
+
+
+CProxy_Main mainProxy;
+int maxIter;
+int payload;
+int workLoad;
+
+class Main: public CBase_Main{
+
+  Main_SDAG_CODE
+
+  double startTimer;
+  int totalCnt;
+  CProxy_Ping1 arr1;
+  int iter;
+  int dv, minv, maxv;
+
+public:
+  Main(CkMigrateMessage *m) {}
+  Main(CkArgMsg* m) {
+    if(CkNumPes()>2) {
+      CkAbort("Run this program on 1 or 2 processors only.\n");
+    }
+
+    iter = 0;
+    maxIter=NITER;
+    payload=PAYLOAD;
+    workLoad = 1024;
+    dv = 1;
+    minv = 1;
+    maxv = 32;
+    if(m->argc>1)
+      payload=atoi(m->argv[1]);
+    if(m->argc>2)
+      maxIter=atoi(m->argv[2]);
+    if(m->argc>3)
+      workLoad =atoi(m->argv[3]);
+    if(m->argc>4)
+      dv=atoi(m->argv[4]);
+    CkPrintf("ping with payload: %d workload:%d maxIter: %d\n", payload, workLoad, maxIter);
+    mainProxy = thishandle;
+    arr1 = CProxy_Ping1::ckNew(2);
+    delete m;
+    thisProxy.prepare();
+  };
+
+  void prepare() {
+    char *names[] = {"PING"};
+    PICS_setNumOfPhases(true, 1, names);
+    //PICS_registerTunableParameterFields("PIPELINE_NUM", TP_INT, dv, minv, maxv, 1, PICS_EFF_GRAINSIZE, -1, OP_ADD, TS_SIMPLE, 1);
+    thisProxy.run();
+  }
+
+};
+
+double doWork(int cnt) {
+  double sum = 0;
+  for(int  i=0; i<cnt; i++)
+  {
+    sum += i/3*(i-7)/11;
+  }
+  return sum;
+}
+
+class Ping1 : public CBase_Ping1
+{
+  int cnt;
+  double sum;
+public:
+  Ping1() {
+    cnt = 0;
+    sum = 0;
+  }
+
+  Ping1(CkMigrateMessage *m) {}
+
+  void start() {
+    int valid = 0;
+    int frags = 1;
+    //int frags = (int)PICS_getTunedParameter("PIPELINE_NUM", &valid);
+    CkPrintf("getTunned is    %d\n", frags);
+    if(!valid)
+      frags = 1;
+    int size = payload/frags;
+    for(int i=0; i<frags; i++)
+    {
+      sum += doWork(workLoad/frags);
+      PingMsg *msg = new (size) PingMsg();
+      memset(msg->x, sum, sizeof(double));
+      thisProxy[1].recv(msg);
+    }
+  }
+
+  void recv(PingMsg *msg)
+  {
+    int valid = 0;
+    int frags = 1;
+    //(int)PICS_getTunedParameter("PIPELINE_NUM", &valid);
+    if(!valid)
+      frags = 1;
+    cnt++;
+    sum += doWork(workLoad/frags);
+    if(cnt == frags)
+    {
+      mainProxy.report(sum);
+      cnt = 0;
+    }
+    delete msg;
+  }
+};
+
+#include "ping.def.h"
diff --git a/examples/charm++/PICS/ping/ping.ci b/examples/charm++/PICS/ping/ping.ci
new file mode 100644 (file)
index 0000000..effc2cb
--- /dev/null
@@ -0,0 +1,43 @@
+mainmodule ping {
+  readonly CProxy_Main mainProxy;
+  readonly int payload;
+  readonly int workLoad;
+
+  mainchare Main{
+    entry Main(CkArgMsg *);
+    entry void prepare();
+    entry  void report(double d);
+    entry void run() {
+      for(iter = 0; iter<maxIter; iter++)
+      {
+        serial{
+          startTimer = CkWallTimer();
+          PICS_startStep(true);
+          PICS_startPhase(true, 0);
+          arr1[0].start();
+        }
+        when report(double sum) serial {
+          PICS_endPhase(true);
+          CkPrintf("End Phase\n");
+          PICS_endStepResumeCb(true, CkCallbackResumeThread());
+          CkPrintf("End Step and Resume CB\n");
+        }
+      }
+
+       
+      serial {
+        CkPrintf("------------------in exiting \n");
+        CkExit();
+      }
+    };
+  };
+
+  message PingMsg{char x[];};
+
+  array [1D] Ping1 {
+    entry Ping1();
+    entry void start();
+    entry void recv(PingMsg *);
+  };
+
+};
index a6b54c18e1174ba0c9c28ec8a274241a81dcdaea..e29a0d4588a847faeaf46d2187bcdaaef03826ec 100644 (file)
@@ -1261,27 +1261,6 @@ void _initCharm(int unused_argc, char **argv)
                _registerCkMemCheckpoint();
 #endif
 
-
-               /*
-                 Setup Control Point Automatic Tuning Framework.
-
-                 By default it is enabled as a part of charm, 
-                 however it won't enable its tracing module 
-                 unless a +CPEnableMeasurements command line argument
-                 is specified. See trace-common.C for more info
-
-                 Thus there should be no noticable overhead to 
-                 always having the control point framework linked
-                 in.
-                 
-               */
-#if CMK_WITH_CONTROLPOINT
-               _registerPathHistory();
-               _registerControlPoints();
-               _registerTraceControlPoints();
-#endif
-
-
                /**
                  CkRegisterMainModule is generated by the (unique)
                  "mainmodule" .ci file.  It will include calls to 
index 7a9110d0af148702e037cea142e3e4d32a33dc52..fb4fa85707c227a21a31d41b2d5567c23c69420f 100644 (file)
@@ -105,6 +105,14 @@ void CkRegisterBase(int derivedIdx, int baseIdx)
     _chareTable[derivedIdx]->addBase(baseIdx);
 }
 
+int CkGetChareIdx(const char *name){
+  for(int i=0; i<_chareTable.size(); i++){
+    if(strcmp(name, _chareTable[i]->name)==0)
+      return i;
+  }
+  return -1;
+}
+
 extern "C"
 void CkRegisterReadonly(const char *name,const char *type,
        size_t size, void *ptr,CkPupReadonlyFnPtr pup_fn)
index e680c3f91d256f45cd42ecc15ed158f30cdbd759..841790a0feb1aea74246c065b7a9805630d04c86 100644 (file)
@@ -303,5 +303,7 @@ extern CkRegisteredInfo<ReadonlyMsgInfo> _readonlyMsgs;
 extern void _registerInit(void);
 extern void _registerDone(void);
 
+extern int CkGetChareIdx(const char *name);
+
 /*@}*/
 #endif
index 74b271b681d25bd4872cd636ea8d4d53ff04a3bc..c79b0910b54987589cded1c612ebc16c7bb3698a 100644 (file)
@@ -1,13 +1,13 @@
 extern void _createTraceprojections(char **argv);
 extern void _createTracesummary(char **argv);
 extern void _createTraceprojector(char **argv);
-extern void _createTraceautoPerf(char **argv);
+extern void _createTraceperfReport(char **argv);
 
 void _createTraceall(char **argv)
 {
   _createTraceprojections(argv);
   _createTracesummary(argv);
-  _createTraceautoPerf(argv);
+  _createTraceperfReport(argv);
   _createTraceprojector(argv);
 }
 
index e1ca112443fa631e0e8099ca6b8445b343114688..d10a9dba2f72787c444fdb3714b069f908f82343 100644 (file)
@@ -34,6 +34,7 @@ class TraceBluegene;
 CkpvDeclare(TraceBluegene*, _tracebg);
 int traceBluegeneLinked=0;                     // if trace-bluegene is linked
 
+CkpvDeclare(bool,   dumpData);
 CkpvDeclare(double, traceInitTime);
 CkpvDeclare(double, traceInitCpuTime);
 CpvDeclare(int, traceOn);
@@ -43,7 +44,6 @@ CkpvDeclare(char*, partitionRoot);
 CkpvDeclare(int, traceRootBaseLength);
 CkpvDeclare(char*, selective);
 CkpvDeclare(bool, verbose);
-
 bool outlierAutomatic;
 bool findOutliers;
 int numKSeeds;
@@ -73,6 +73,8 @@ static void traceCommonInit(char **argv)
   DEBUGF(("[%d] in traceCommonInit.\n", CkMyPe()));
   CkpvInitialize(double, traceInitTime);
   CkpvAccess(traceInitTime) = CmiStartTimer();
+  CkpvInitialize(bool, dumpData);
+  CkpvAccess(dumpData) = true;
   CkpvInitialize(double, traceInitCpuTime);
   CkpvAccess(traceInitCpuTime) = TRACE_CPUTIMER();
   CpvInitialize(int, traceOn);
@@ -399,6 +401,13 @@ static int checkTraceOnPe(char **argv)
     CkListString procList(procs);
     traceOnPE = procList.includes(CkMyPe());
   }
+
+  if (CmiGetArgFlagDesc(argv, "+traceselective", " Whether only dump data for PEs based on perfReport"))
+  {
+      if(CkMyPe() !=0)
+          CkpvAccess(dumpData) = false;
+  }
+
   // must include pe 0, otherwise sts file is not generated
   if (CkMyPe()==0) traceOnPE = 1;
 #if !CMK_TRACE_IN_CHARM
@@ -567,6 +576,40 @@ void endAppWork()
 #endif
 }
 
+extern "C"
+void countNewChare()
+{
+#if CMK_TRACE_ENABLED
+    if (CpvAccess(traceOn) && CkpvAccess(_traces))
+    {
+        CkpvAccess(_traces)->countNewChare();
+    }
+#endif
+}
+
+
+extern "C"
+void beginTuneOverhead()
+{
+#if CMK_TRACE_ENABLED
+    if (CpvAccess(traceOn) && CkpvAccess(_traces))
+    {
+        CkpvAccess(_traces)->beginTuneOverhead();
+    }
+#endif
+}
+
+extern "C"
+void endTuneOverhead()
+{
+#if CMK_TRACE_ENABLED
+    if (CpvAccess(traceOn) && CkpvAccess(_traces))
+    {
+        CkpvAccess(_traces)->endTuneOverhead();
+    }
+#endif
+}
+
 extern "C"
 void traceUserBracketEvent(int e, double beginT, double endT)
 {
@@ -898,7 +941,7 @@ CkpvDeclare(int, papiStopped);
 #ifdef USE_SPP_PAPI
 int papiEvents[NUMPAPIEVENTS];
 #else
-int papiEvents[NUMPAPIEVENTS] = { PAPI_L2_DCM, PAPI_FP_OPS };
+int papiEvents[NUMPAPIEVENTS] = { PAPI_L1_TCM, PAPI_L1_TCA, PAPI_L2_TCM, PAPI_L2_TCA};
 #endif
 #endif // CMK_HAS_COUNTER_PAPI
 
@@ -987,9 +1030,8 @@ void initPAPI() {
       CmiAbort("PAPI events conflict! Please re-assign event types!\n");
     } else {
       char error_str[PAPI_MAX_STR_LEN];
-      PAPI_perror(error_str);
+      //PAPI_perror(error_str);
       //PAPI_perror(papiRetValue,error_str,PAPI_MAX_STR_LEN);
-      CmiPrintf("PAPI failed with error %s val %d\n",error_str,papiRetValue);
       CmiAbort("PAPI failed to add designated events!\n");
     }
   }
@@ -1011,4 +1053,22 @@ void initPAPI() {
 }
 #endif
 
+extern "C"
+void traceSend(void *env, int pe, int size)
+{
+#if CMK_TRACE_ENABLED
+  if (CpvAccess(traceOn) && CkpvAccess(_traces))
+      CkpvAccess(_traces)->messageSend(env, pe, size);
+#endif
+}
+
+extern "C"
+void traceRecv(void *env , int size)
+{
+#if CMK_TRACE_ENABLED
+  if (CpvAccess(traceOn) && CkpvAccess(_traces))
+      CkpvAccess(_traces)->messageRecv(env, size);
+#endif
+}
+
 /*@}*/
index 15233620692b82495f6a737940a426b95bcbaddb..e4df30a02b745033797c15a69befd7d7781712f8 100644 (file)
@@ -125,7 +125,7 @@ extern "C" void (*registerMachineUserEvents())();
 #ifdef USE_SPP_PAPI
 #define NUMPAPIEVENTS 6
 #else
-#define NUMPAPIEVENTS 2
+#define NUMPAPIEVENTS 4
 #endif
 CkpvExtern(int, papiEventSet);
 CkpvExtern(LONG_LONG_PAPI*, papiValues);
index 5dfe5a935f2af070f59b28de8e479a27321adccd..e5ba517f75b415811f874558b9d55d3893154448 100644 (file)
@@ -20,7 +20,7 @@
 
 #include "pup.h"
 
-#define PROJECTION_VERSION  "7.0"
+#define PROJECTION_VERSION  "8.0"
 
 #define PROJ_ANALYSIS 1
 
index 7e3b3be9073686b56a328ffaf36dae6cc890c675..f28ed63d80a2a7d520f46786d78b442d5931c685 100644 (file)
@@ -13,10 +13,8 @@ public:
   char *str;
   UsrEvent(int _e, char* _s): e(_e),str(_s) {}
 };
-typedef CkVec<UsrEvent *>   UsrEventVec;
-CkpvStaticDeclare(UsrEventVec, usrEvents);
-
 
+CkpvStaticDeclare(CkVec<UsrEvent *>, usrEventsProjector);
 #if ! CMK_TRACE_ENABLED
 static int warned=0;
 #define OPTIMIZED_VERSION      \
@@ -34,7 +32,7 @@ void _createTraceprojector(char **argv)
 {
   DEBUGF(("%d createTraceProjector\n", CkMyPe()));
   CkpvInitialize(Trace*, _traceproj);
-  CkpvInitialize(CkVec<UsrEvent *>, usrEvents);
+  CkpvInitialize(CkVec<UsrEvent *>, usrEventsProjector);
   CkpvAccess(_traceproj) = new  TraceProjector(argv);
   CkpvAccess(_traces)->addTrace(CkpvAccess(_traceproj));
 }
@@ -54,8 +52,8 @@ CkAssert(e==-1 || e>=0);
   CkAssert(evt != NULL);
   int event;
   int biggest = -1;
-  for (int i=0; i<CkpvAccess(usrEvents).length(); i++) {
-    int cur = CkpvAccess(usrEvents)[i]->e;
+  for (int i=0; i<CkpvAccess(usrEventsProjector).length(); i++) {
+    int cur = CkpvAccess(usrEventsProjector)[i]->e;
     if (cur == e) 
       CmiAbort("UserEvent double registered!");
     if (cur > biggest) biggest = cur;
@@ -64,7 +62,7 @@ CkAssert(e==-1 || e>=0);
   // hence automatically assigned events will start from id of 0.
   if (e==-1) event = biggest+1; // automatically assign new event id
   else event = e;
-  CkpvAccess(usrEvents).push_back(new UsrEvent(event,(char *)evt));
+  CkpvAccess(usrEventsProjector).push_back(new UsrEvent(event,(char *)evt));
   return event;
 }
 
@@ -88,10 +86,10 @@ extern "C" void writeSts(){
        delete[] fname;
                    
         fprintf(stsfp, "VERSION %s\n", PROJECTION_VERSION);
-        traceWriteSTS(stsfp,CkpvAccess(usrEvents).length());
+        traceWriteSTS(stsfp,CkpvAccess(usrEventsProjector).length());
         int i;
-        for(i=0;i<CkpvAccess(usrEvents).length();i++)
-             fprintf(stsfp, "EVENT %d %s\n", CkpvAccess(usrEvents)[i]->e, CkpvAccess(usrEvents)[i]->str);
+        for(i=0;i<CkpvAccess(usrEventsProjector).length();i++)
+             fprintf(stsfp, "EVENT %d %s\n", CkpvAccess(usrEventsProjector)[i]->e, CkpvAccess(usrEventsProjector)[i]->str);
         fprintf(stsfp, "END\n");
        fclose(stsfp);
                             
index 665825493e37bea8e664c8db2486cf606e8d07c4..ef245a0cdb32a6542bcdc480861fb8bc449401a1 100644 (file)
@@ -99,6 +99,9 @@ protected:
      //interact with application
      virtual void beginAppWork() {}
      virtual void endAppWork() {}
+     virtual void countNewChare() {}
+     virtual void beginTuneOverhead() {}
+     virtual void endTuneOverhead() {}
 
      // a user supplied integer value(likely a timestep)
      virtual void userSuppliedData(int e) { (void)e; }
@@ -125,7 +128,8 @@ protected:
      }
      virtual void creationDone(int num=1) { (void)num; }
      // ???
-     virtual void messageRecv(char *env, int pe) { (void)env; (void)pe; }
+     virtual void messageRecv(void *env, int size) { (void)env; (void)size; }
+     virtual void messageSend(void *env, int pe, int size) { (void)env; (void)pe; (void)size; }
      virtual void beginSDAGBlock(
        int event,   // event type defined in trace-common.h
        int msgType, // message type
@@ -254,6 +258,9 @@ public:
     
     inline void beginAppWork() { ALLDO(beginAppWork());}
     inline void endAppWork() { ALLDO(endAppWork());}
+    inline void countNewChare() { ALLDO(countNewChare());}
+    inline void beginTuneOverhead() { ALLDO(beginTuneOverhead());}
+    inline void endTuneOverhead() { ALLDO(endTuneOverhead());}
 
        inline void userSuppliedData(int d) { ALLDO(userSuppliedData(d));}
 
@@ -284,7 +291,8 @@ public:
     inline void endExecute(void) {ALLREVERSEDO(endExecute());}
     inline void endExecute(char *msg) {ALLREVERSEDO(endExecute(msg));}
     inline void changeLastEntryTimestamp(double ts) {ALLDO(changeLastEntryTimestamp(ts));}
-    inline void messageRecv(char *env, int pe) {ALLDO(messageRecv(env, pe));}
+    inline void messageRecv(void *env, int size) {ALLDO(messageRecv(env, size));}
+    inline void messageSend(void *env, int pe, int size) {ALLDO(messageSend(env, pe, size));}
     inline void beginPack(void) {ALLDO(beginPack());}
     inline void endPack(void) {ALLDO(endPack());}
     inline void beginUnpack(void) {ALLDO(beginUnpack());}
@@ -383,6 +391,9 @@ extern "C" {
 #define _TRACE_USER_EVENT_BRACKET(x,bt,et) _TRACE_ONLY(CkpvAccess(_traces)->userBracketEvent(x,bt,et))
 #define _TRACE_BEGIN_APPWORK() _TRACE_ONLY(CkpvAccess(_traces)->beginAppWork())
 #define _TRACE_END_APPWORK() _TRACE_ONLY(CkpvAccess(_traces)->endAppWork())
+#define _TRACE_NEW_CHARE()  _TRACE_ONLY(CkpvAccess(_traces)->countNewChare())
+#define _TRACE_BEGIN_TUNEOVERHEAD() _TRACE_ONLY(CkpvAccess(_traces)->beginTuneOverhead())
+#define _TRACE_END_TUNEOVERHEAD() _TRACE_ONLY(CkpvAccess(_traces)->endTuneOverhead())
 #define _TRACE_CREATION_1(env) _TRACE_ONLY(CkpvAccess(_traces)->creation(env,env->getEpIdx()))
 #define _TRACE_CREATION_DETAILED(env,ep) _TRACE_ONLY(CkpvAccess(_traces)->creation(env,ep))
 #define _TRACE_CREATION_N(env, num) _TRACE_ONLY(CkpvAccess(_traces)->creation(env, env->getEpIdx(), num))
@@ -393,7 +404,8 @@ extern "C" {
 #define _TRACE_BEGIN_EXECUTE(env, obj) _TRACE_ONLY(CkpvAccess(_traces)->beginExecute(env, obj))
 #define _TRACE_BEGIN_EXECUTE_DETAILED(evt,typ,ep,src,mlen,idx, obj) _TRACE_ONLY(CkpvAccess(_traces)->beginExecute(evt,typ,ep,src,mlen,idx, obj))
 #define _TRACE_END_EXECUTE() _TRACE_ONLY(CkpvAccess(_traces)->endExecute())
-#define _TRACE_MESSAGE_RECV(env, pe) _TRACE_ONLY(CkpvAccess(_traces)->messageRecv(env, pe))
+#define _TRACE_MESSAGE_RECV(env, size) _TRACE_ONLY(CkpvAccess(_traces)->messageRecv(env, size))
+#define _TRACE_MESSAGE_SEND(env, pe, size) _TRACE_ONLY(CkpvAccess(_traces)->messageSend(env, pe, size))
 #define _TRACE_BEGIN_PACK() _TRACE_ONLY(CkpvAccess(_traces)->beginPack())
 #define _TRACE_END_PACK() _TRACE_ONLY(CkpvAccess(_traces)->endPack())
 #define _TRACE_BEGIN_UNPACK() _TRACE_ONLY(CkpvAccess(_traces)->beginUnpack())
diff --git a/src/ck-pics/picsautoperf.C b/src/ck-pics/picsautoperf.C
new file mode 100644 (file)
index 0000000..24825d9
--- /dev/null
@@ -0,0 +1,800 @@
+#include  <stdlib.h>
+#include <stdio.h>
+#include "charm++.h"
+#include "pathHistory.h"
+#include "TopoManager.h"
+#include "picsdefs.h"
+#include "picsdefscpp.h"
+#include "TraceAutoPerf.decl.h"
+#include "picsautoperf.h"
+#include <algorithm>
+#include <math.h>
+#include "trace-perf.h"
+
+#define PICS_CODE  15848
+#define TRACE_START(id)
+#define TRACE_END(step, id)
+
+#define TRIGGER_PERF_IDLE_PERCENTAGE 0.1 
+
+int user_call = 0;
+int WARMUP_STEP;
+int PAUSE_STEP;
+#define   CP_PERIOD  200
+
+#define TIMESTEP_RATIO_THRESHOLD 0
+
+#define DEBUG_PRINT(x) 
+
+#define NumOfSetConfigs   1
+
+//ldb related quick hack
+CkpvDeclare(double, timeForLdb);
+CkpvDeclare(double, timeBeforeLdb);
+CkpvDeclare(double, currentTimeStep);
+CkpvDeclare(int, cntAfterLdb);
+//scalable tree analysis
+CkpvDeclare(int, myParent);
+CkpvDeclare(int, myInterGroupParent);
+CkpvDeclare(int, numChildren);
+
+#if USE_MIRROR
+extern CProxy_MirrorUpdate MirrorProxy;
+#endif
+CkpvDeclare(int, numOfPhases);
+CkpvDeclare(std::vector<char*>, phaseNames);
+CkpvExtern(bool, dumpData);
+CkpvDeclare(bool,   isExit);
+CkpvDeclare(SavedPerfDatabase*, perfDatabase);
+CkpvDeclare(Database<CkReductionMsg*>*, summaryPerfDatabase);
+CkpvDeclare(DecisionTree*, learnTree);
+CkpvExtern(int, availAnalyzeCP);
+CkpvExtern(int, hasPendingAnalysis);
+CkpvExtern(CkCallback, callBackAutoPerfDone);
+CkGroupID traceAutoPerfGID;
+CProxy_TraceAutoPerfBOC autoPerfProxy;
+extern void setNoPendingAnalysis();
+extern void startAnalysisonIdle();
+extern void startAnalysis();
+extern void autoPerfReset();
+
+
+int PICS_collection_mode;
+int PICS_evaluation_mode;
+
+bool isPeriodicalAnalysis;
+int treeGroupSize;
+int numGroups;
+int treeBranchFactor;
+bool isIdleAnalysis;
+bool isPerfDumpOn;
+CkpvDeclare(FILE*, fpSummary);
+
+
+SavedPerfDatabase::SavedPerfDatabase() {
+  best = new PerfData();
+  secondbest = new PerfData();
+  prevIdx = curIdx = -1;
+  for(int i=0; i<ENTRIES_SAVED; i++)
+    perfList[i] = NULL;
+}
+
+SavedPerfDatabase::~SavedPerfDatabase() {
+  for(int i=0; i<ENTRIES_SAVED; i++) {
+    if(perfList[i] != NULL)
+      free (perfList[i]);
+  }
+}
+
+void SavedPerfDatabase::advanceStep() {
+  startTimer = CkWallTimer();
+  prevIdx = curIdx < 0 ? 0: curIdx;
+  curIdx = (curIdx+1)%ENTRIES_SAVED;
+  if(perfList[curIdx] == NULL) {
+    int nbytes = sizeof(PerfData) * CkpvAccess(numOfPhases) * PERIOD_PERF;
+    perfList[curIdx] = (PerfData*) malloc(nbytes);
+    memset(perfList[curIdx], 0, nbytes);
+  }
+}
+
+void SavedPerfDatabase::endCurrent( ) {
+  perfList[curIdx]->timeStep = CkWallTimer() - startTimer ;
+}
+
+PerfData* SavedPerfDatabase::getCurrentPerfData(){
+  if(curIdx<0) curIdx = 0;
+  return perfList[curIdx];
+}
+
+PerfData* SavedPerfDatabase::getPrevPerfData(){
+  return perfList[prevIdx];
+}
+
+void SavedPerfDatabase::copyData(PerfData *source, int num) {
+  memcpy(perfList[curIdx], source, num * sizeof(PerfData));
+}
+
+void SavedPerfDatabase::setData(PerfData *source) {
+  perfList[curIdx] = source;
+}
+
+void combinePerfData(PerfData *ret, PerfData *source) {
+  int k;
+  CkAssert(ret!=NULL);
+  CkAssert(source!=NULL);
+  for(k=0; k<NUM_AVG; k++) {
+    ret->data[k] += source->data[k];
+  }
+  if(ret->data[MAX_EntryMethodDuration] < source->data[MAX_EntryMethodDuration])
+    ret->data[MaxEntryPE] = source->data[MaxEntryPE];
+  for(;k<NUM_AVG+NUM_MAX; k++) {
+    if(ret->data[k] < source->data[k]){
+      ret->data[k] = source->data[k];
+      k++;
+      ret->data[k] = source->data[k];
+    }
+    else
+    {
+        k++;
+    }
+  }
+  for(;k<NUM_AVG+NUM_MAX+NUM_MIN; k++) {
+    ret->data[k] = std::min(ret->data[k], source->data[k]);
+  }
+}
+
+void TraceAutoPerfBOC::gatherSummary(CkReductionMsg *msg){
+  recvChildren++;
+  PerfData *myCurrent;
+  double *data;
+  if(redMsg==NULL)
+  {
+    redMsg = msg;
+  }else
+  {
+    PerfData *fromChild = (PerfData*)msg->getData();
+    myCurrent = (PerfData*)redMsg->getData();
+    combinePerfData(myCurrent, fromChild);
+    delete msg;
+  }
+  if(recvChildren == CkpvAccess(numChildren)+1) {
+    if(CkpvAccess(myParent) == -1)
+    {
+     autoPerfProxy[CkMyPe()].globalPerfAnalyze(redMsg);
+     redMsg = NULL;
+    }
+    else{
+      autoPerfProxy[CkpvAccess(myParent)].gatherSummary(redMsg);
+      redMsg = NULL;
+    }
+    recvChildren = 0;
+  }
+}
+
+CkpvDeclare(CkReduction::reducerType, PerfDataReductionType);
+
+CkReductionMsg *PerfDataReduction(int nMsg,CkReductionMsg **msgs){
+  PerfData *ret;
+  int k;
+  for(int j=0; j<CkpvAccess(numOfPhases)*PERIOD_PERF; j++) {
+    if(nMsg > 0){
+      ret=(PerfData*)(msgs[0]->getData())+j;
+    }
+    for (int i=1;i<nMsg;i++) {
+      PerfData *m=(PerfData*)(msgs[i]->getData())+j;
+      combinePerfData(ret, m);
+    }
+  }  
+  ret=(PerfData*)msgs[0]->getData();
+  CkReductionMsg *msg= CkReductionMsg::buildNew(sizeof(PerfData)*CkpvAccess(numOfPhases)*PERIOD_PERF,ret); 
+  return msg;
+}
+
+void  TraceAutoPerfBOC::staticAtSync(void *data) {
+  TraceAutoPerfBOC *me;
+  char *str = NULL;
+  if(data == NULL)
+  {
+    me = autoPerfProxy.ckLocalBranch(); 
+  }else
+    me = (TraceAutoPerfBOC*)(data);
+}
+
+void TraceAutoPerfBOC::startPhase(int phaseId) {
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  CkpvAccess(perfDatabase)->setPhase(phaseId);
+  t->startPhase(picsStep%PERIOD_PERF, phaseId);
+}
+
+void TraceAutoPerfBOC::endPhase() {
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  t->endPhase();
+}
+
+void TraceAutoPerfBOC::startStep() {
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  
+  if(user_call == 1){ /* Resets the data to the initial values */
+    t->resetAll();
+  }
+
+  if(picsStep % PERIOD_PERF == 0) //start of next analysis
+  {
+    t->startStep(true);
+  }
+  else
+    t->startStep(false);
+}
+
+void TraceAutoPerfBOC::endStep(bool fromGlobal, int fromPE, int incSteps) {
+  endStepTimer = CkWallTimer();
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  currentAppStep += incSteps;
+  picsStep++;
+  if(picsStep % PERIOD_PERF == 0 ) {
+    t->endStep(true);
+  }
+  else
+  {
+    t->endStep(false);
+  }
+}
+
+void TraceAutoPerfBOC::endStepResumeCb(bool fromGlobal, int fromPE, CkCallback cb) {
+  endStepTimer = CkWallTimer();
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  if(picsStep % PERIOD_PERF == 0 ) {
+    t->endStep(true);
+  }
+  else
+  {
+    t->endStep(false);
+  }
+  setAutoPerfDoneCallback(cb);
+  run(fromGlobal, fromPE); 
+}
+
+void TraceAutoPerfBOC::endPhaseAndStep(bool fromGlobal, int fromPE) {
+  endStepTimer = CkWallTimer();
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  t->endPhase();
+  currentAppStep++;
+  picsStep++;
+  if(picsStep % PERIOD_PERF == 0 ) {
+    t->endStep(true);
+    getPerfData(0, CkCallback::ignore );
+  }
+  else
+  {
+    t->endStep(false);
+  }
+  startStep();
+  startPhase(0);
+}
+
+void TraceAutoPerfBOC::startTimeNextStep(){
+  CcdCallFnAfterOnPE((CcdVoidFn)startAnalysis, NULL, CP_PERIOD, CkMyPe());
+}
+
+void TraceAutoPerfBOC::resume( CkCallback cb) {
+  cb.send();
+}
+void TraceAutoPerfBOC::resume( ) {
+  CkpvAccess(callBackAutoPerfDone).send();
+}
+
+void TraceAutoPerfBOC::run(bool fromGlobal, int fromPE)
+{
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  if(picsStep % PERIOD_PERF == 0 )
+    getPerfData(0, CkCallback::ignore );
+  else
+  {
+    if(fromGlobal && CkMyPe() == fromPE)
+    {
+      resume();
+    }
+    else if (!fromGlobal)
+    {
+      resume( CkpvAccess(callBackAutoPerfDone));
+    }
+  }
+}
+
+void TraceAutoPerfBOC::PICS_markLDBStart(int appStep) {
+  startLdbTimer = CkWallTimer(); 
+}
+
+void TraceAutoPerfBOC::PICS_markLDBEnd() {
+  endLdbTimer = CkWallTimer();
+  CkpvAccess(timeForLdb) = endLdbTimer - startLdbTimer;
+  CkpvAccess(timeBeforeLdb) = currentTimeStep;
+  CkpvAccess(cntAfterLdb) = -1;
+}
+
+
+void TraceAutoPerfBOC::registerPerfGoal(int goalIndex) {
+  //CkpvAccess(perfGoal) = goalIndex;
+}
+
+void TraceAutoPerfBOC::setUserDefinedGoal(double value) { }
+
+void TraceAutoPerfBOC::setNumOfPhases(int num, char names[]) {
+  CkpvAccess(numOfPhases) = num;
+  CkpvAccess(phaseNames).clear();
+  CkpvAccess(phaseNames).resize(num);
+  for(int i=0; i<num; i++)
+  {
+    char *name = (char*)malloc(40);
+    strcpy(name, names + i*40);
+    CkpvAccess(phaseNames)[i] = name;
+  }
+}
+
+// set the call back function, which is invoked after auto perf is done
+void TraceAutoPerfBOC::setAutoPerfDoneCallback(CkCallback cb) {
+  CkpvAccess(callBackAutoPerfDone) = cb;
+}
+
+void TraceAutoPerfBOC::setCbAndRun(bool fromGlobal, int fromPE, CkCallback cb) {
+  CkpvAccess(callBackAutoPerfDone) = cb;
+  run(fromGlobal, fromPE); 
+}
+
+void TraceAutoPerfBOC::formatPerfData(PerfData *perfdata, int subStep, int phaseID) {
+  double *data = perfdata->data;
+  int numpes = numPesInGroup;
+  double totaltime = data[AVG_TotalTime]/numpes;
+  int steps = currentAppStep-lastAnalyzeStep;
+
+  //derive metrics from raw performance data
+  data[AVG_LoadPerPE] = data[AVG_UtilizationPercentage]/numpes * totaltime/steps;
+  data[AVG_UtilizationPercentage] /= numpes; 
+  data[AVG_IdlePercentage] /= numpes; 
+  data[AVG_OverheadPercentage] /= numpes; 
+  data[MAX_LoadPerPE] = data[MAX_UtilizationPercentage]*totaltime/steps;
+  data[AVG_BytesPerMsg] = data[AVG_BytesPerObject]/data[AVG_NumMsgsPerObject];
+  data[AVG_NumMsgPerPE] = (data[AVG_NumMsgsPerObject]/numpes)/steps;
+  data[AVG_BytesPerPE] = data[AVG_BytesPerObject]/numpes/steps;
+  data[AVG_CacheMissRate] = data[AVG_CacheMissRate]/numpes/steps;
+
+  data[AVG_NumMsgRecv] = data[AVG_NumMsgRecv]/numpes/steps;
+  data[AVG_BytesMsgRecv] = data[AVG_BytesMsgRecv]/numpes/steps;
+
+  data[AVG_EntryMethodDuration] /= data[AVG_NumInvocations];
+  data[AVG_EntryMethodDuration_1] /= data[AVG_NumInvocations_1];
+  data[AVG_EntryMethodDuration_2] /= data[AVG_NumInvocations_2];
+  data[AVG_NumInvocations] = data[AVG_NumInvocations]/numpes/steps;
+  data[AVG_NumInvocations_1] = data[AVG_NumInvocations_1]/numpes/steps;
+  data[AVG_NumInvocations_2] = data[AVG_NumInvocations_2]/numpes/steps;
+
+  data[AVG_LoadPerObject] /= data[AVG_NumObjectsPerPE];
+  data[AVG_NumMsgsPerObject] /= data[AVG_NumObjectsPerPE];
+  data[AVG_BytesPerObject] /= data[AVG_NumObjectsPerPE];
+
+  data[AVG_NumObjectsPerPE] = data[AVG_NumObjectsPerPE]/numpes/steps;
+
+  CkPrintf("format data :  PE %d PEs in group %d [IDLE, OVERHEAD, UTIL, ENTRY ] %.2f, %.2f, %.2f %f \n", CkMyPe(), numpes, data[AVG_IdlePercentage], data[AVG_OverheadPercentage], data[AVG_UtilizationPercentage], data[AVG_EntryMethodDuration]);
+}
+
+void TraceAutoPerfBOC::getPerfData(int reductionPE, CkCallback cb) {
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  if(t->getTraceOn()) {
+    if(treeBranchFactor < 0) {
+      PerfData *data = CkpvAccess(perfDatabase)->getCurrentPerfData();
+      CkCallback *cb1 = new CkCallback(CkIndex_TraceAutoPerfBOC::globalPerfAnalyze(NULL), thisProxy[reductionPE]);
+      contribute(sizeof(PerfData)*CkpvAccess(numOfPhases)*PERIOD_PERF,data, CkpvAccess(PerfDataReductionType), *cb1);
+      }
+    else 
+    {
+      PerfData *data = CkpvAccess(perfDatabase)->getCurrentPerfData();
+      CkReductionMsg *redMsgP = CkReductionMsg::buildNew(sizeof(PerfData)*CkpvAccess(numOfPhases)*PERIOD_PERF, data);
+      if(CkpvAccess(myParent) != -1 && CkpvAccess(numChildren) == 0)  //leaves of the tree, partial collection
+      {
+        autoPerfProxy[CkpvAccess(myParent)].gatherSummary(redMsgP);
+      }
+      else{
+        gatherSummary(redMsgP);
+      }
+    }
+  }
+}
+
+//perf data from all processors within a group is collected at the root of that
+//group and the data is output to a file.
+void TraceAutoPerfBOC::globalPerfAnalyze(CkReductionMsg *msg )
+{
+  double now = CkWallTimer();
+  double timestep = now-lastAnalyzeTimer;
+  double totaltimestep = now-endStepTimer;
+  lastAnalyzeTimer = now;
+  CkpvAccess(cntAfterLdb)++;
+  int numpes = numPesInGroup;
+  if(analyzeStep == 0)
+  {
+    //autoTunerProxy.ckLocalBranch()->printCPNameToFile(CkpvAccess(fpSummary)); 
+  }
+  analyzeStep++;
+  PerfData *data=(PerfData*) msg->getData();
+  if(CkpvAccess(isExit) || analyzeStep<= WARMUP_STEP || analyzeStep >= PAUSE_STEP) {
+    autoPerfProxy[CkpvAccess(myInterGroupParent)].tuneDone();
+  }
+  if(analyzeStep<= WARMUP_STEP || analyzeStep >= PAUSE_STEP){
+    if(isPeriodicalAnalysis && CkMyPe()== 0)
+      CcdCallFnAfterOnPE((CcdVoidFn)startAnalysis, NULL, CP_PERIOD, CkMyPe());
+    if(analyzeStep < WARMUP_STEP){
+      delete msg;
+    }
+    else 
+    {
+      for(int j=0; j<CkpvAccess(numOfPhases)*PERIOD_PERF; j++)
+      {
+        formatPerfData(data, j/CkpvAccess(numOfPhases), j%CkpvAccess(numOfPhases));
+        data++;
+      }
+      CkpvAccess(summaryPerfDatabase)->add(msg);
+    }
+    lastAnalyzeStep = currentAppStep;
+    return;
+  }
+
+  TRACE_START(PICS_CODE);
+  fprintf(CkpvAccess(fpSummary), "NEWITER %d %d %d %lld %d\n", analyzeStep, CkMyPe(), CkpvAccess(numOfPhases)*PERIOD_PERF, (CMK_TYPEDEF_UINT8)(CkWallTimer()*1000000), currentAppStep); 
+  for(int j=0; j<CkpvAccess(numOfPhases)*PERIOD_PERF; j++)
+  {
+    formatPerfData(data, j/CkpvAccess(numOfPhases), j%CkpvAccess(numOfPhases));
+    data->printMe(CkpvAccess(fpSummary), "format");
+  }
+  //autoTunerProxy.ckLocalBranch()->printCPToFile(CkpvAccess(fpSummary));
+  data=(PerfData*) msg->getData();
+  //save results to database TODO
+  if(bestTimeStep == -1 || bestTimeStep > timestep)
+  {
+    isBest = true;
+    bestTimeStep = timestep;
+  }
+  else
+    isBest = false;
+  currentTimeStep = data->timeStep = timestep/(currentAppStep-lastAnalyzeStep);
+  CkPrintf("-------------------- current timestep is %f step %d after ldb %d \n", currentTimeStep, currentAppStep, CkpvAccess(cntAfterLdb));
+  if(CkpvAccess(cntAfterLdb) == 1)
+    CkpvAccess(currentTimeStep) = currentTimeStep;
+  lastAnalyzeStep = currentAppStep;
+  CkReductionMsg *oldData = CkpvAccess(summaryPerfDatabase)->add(msg);
+  if(oldData != NULL) {
+    delete oldData;
+  }
+
+  if( analyzeStep%NumOfSetConfigs == 0) {
+    //pack results and reduce to PE0 and decide group with best performance metrics to choose best, average utilization percentage
+    autoPerfProxy[CkpvAccess(myInterGroupParent)].globalDecision(data->data[AVG_UtilizationPercentage], CkMyPe());
+  }
+  
+  TRACE_END(currentAppStep, PICS_CODE);
+}
+
+void TraceAutoPerfBOC::globalDecision(double metrics, int source) {
+
+  if(recvGroupCnt == 0){
+    bestMetrics = metrics;
+    bestSource = source;
+  }
+  else if(bestMetrics < metrics)  //higher means better
+  {
+    bestMetrics = metrics;
+    bestSource = source;
+  }
+  recvGroupCnt++;
+  if(recvGroupCnt < numGroups && PICS_collection_mode==FULL )
+    return;
+
+  recvGroupCnt = 0;
+  autoPerfProxy[bestSource].analyzeAndTune();
+  if(isPeriodicalAnalysis)
+    autoPerfProxy[0].startTimeNextStep();
+}
+
+void TraceAutoPerfBOC::analyzeAndTune(){
+  problemProcList.clear();
+  solutions[0].clear();
+  solutions[1].clear();
+  perfProblems.clear();
+
+  CkReductionMsg *msg = CkpvAccess(summaryPerfDatabase)->getCurrent();
+  PerfData *data=(PerfData*) msg->getData();
+  CkReductionMsg *prevMsg = CkpvAccess(summaryPerfDatabase)->getData(0);
+  PerfData *prevSummaryData = (PerfData*)(prevMsg->getData());
+  for(int j=0; j<CkpvAccess(numOfPhases)*PERIOD_PERF; j++)
+  {
+    analyzePerfData(data, j/CkpvAccess(numOfPhases), j%CkpvAccess(numOfPhases));
+    comparePerfData(prevSummaryData, data, j/CkpvAccess(numOfPhases), j%CkpvAccess(numOfPhases));
+    prevSummaryData++;
+    data++;
+  }
+  //combine all solutions in one map, lower priority first and then higher priority
+  int numOfSets;
+  if(PICS_collection_mode == PARTIAL)
+    numOfSets = 1;
+  else
+    numOfSets = numGroups;
+  autoPerfProxy[CkpvAccess(myInterGroupParent)].tuneDone();
+  //output results to screen or files
+  for(int idx=0; idx<solutions.size(); idx++)
+  {
+    fprintf(stdout, "\nnumber of solutions is %d \n", solutions[idx].size());
+      for(IntDoubleMap::iterator iter=solutions[idx].begin(); iter!=solutions[idx].end(); iter++){
+          int effect = iter->first;
+          int value = effect >0 ? effect : -effect;
+          fprintf(stdout, "%s %s \n", effect>0?"UP":"DOWN", EffectName[value]); 
+      }
+  }
+}
+
+void TraceAutoPerfBOC::analyzePerfData(PerfData *perfdata, int subStep, int phaseID) {
+  double *data = perfdata->data;
+  std::vector<Condition*> problems;
+  problems.clear();
+  (priorityTree)->DFS(data, solutions, 0, problems, CkpvAccess(fpSummary));
+  (fuzzyTree)->DFS(data, solutions, 1, problems, CkpvAccess(fpSummary));
+  std::copy(problems.begin(), problems.end(), std::inserter(perfProblems, perfProblems.begin()));
+}
+
+void TraceAutoPerfBOC::comparePerfData(PerfData *prevData, PerfData *perfData, int subStep, int phaseID) {
+  //compare data of this step with previous step, phase by phase compare
+  double *current = perfData->data;
+  double *prev = prevData->data;
+  double *ratios = new double[NUM_NODES];
+  for(int i=0; i<NUM_NODES; i++)
+  {
+    if(prev[i] != 0)
+      ratios[i] = current[i]/prev[i];
+    else
+      ratios[i] = 0;
+  }
+}
+
+void TraceAutoPerfBOC::tuneDone() {
+  recvGroups++;
+  if(recvGroups == numGroups)
+  {
+    recvGroups=0;
+    if(CkpvAccess(isExit))
+      CkExit();
+    else
+    {
+      resume();
+    }
+  }
+}
+
+void TraceAutoPerfBOC::recvGlobalSummary(CkReductionMsg *msg)
+{
+}
+
+double TraceAutoPerfBOC::getModelNetworkTime(int msgs, long bytes){
+  // alpha + B* beta model
+  double alpha = 0.000002; //2us for latency
+  double beta =  0.00000025; //per byte time, 4GBytes/sec
+  return msgs* alpha + beta * bytes; 
+}
+
+void TraceAutoPerfBOC::setProjectionsOutput() {
+  CkpvAccess(dumpData) = true;
+}
+
+TraceAutoPerfBOC::TraceAutoPerfBOC() {
+  picsStep = 0;
+  lastAnalyzeStep = 0;
+  lastCriticalPathLength = 0;
+  currentAppStep = 0;
+  analyzeStep = 0;
+  isBest = false;
+  bestTimeStep = -1;
+  currentTimeStep = -1;
+  priorityTree = new DecisionTree();
+  priorityTree->build("tree.txt");
+  fuzzyTree = new DecisionTree();
+  fuzzyTree->build("fuzzytree.txt");
+  numGroups = 1;
+  recvGroups = 0;
+  numPesInGroup = CkNumPes();
+  recvChildren = 0;
+  redMsg = NULL;
+  solutions.resize(2);
+  //scalable tree structure analysis
+  if(treeBranchFactor > 0) {
+    int treeGroupID = CkMyPe()/treeGroupSize;
+    int idInTree = CkMyPe()%treeGroupSize;
+    int start = treeGroupID * treeGroupSize;
+    int upperBoundPE= (treeGroupID+1) * treeGroupSize;
+    int upperBound;
+    int child;
+
+    recvChildren = 0;
+    CkpvAccess(numChildren) = 0;
+    numGroups = (CkNumPes()-1)/treeGroupSize+1;
+    if(idInTree == 0)
+      CkpvAccess(myParent) = -1;
+    else
+    {
+      CkpvAccess(myParent) = (idInTree-1)/treeBranchFactor + start;
+    }
+    for(int i=0; i<treeBranchFactor; i++)
+    {
+      child = idInTree*treeBranchFactor+1+i+start;
+      if(child < upperBoundPE && child<CkNumPes())
+        CkpvAccess(numChildren)++;
+    }
+    if(upperBoundPE <= CkNumPes())
+      numPesInGroup = treeGroupSize;
+    else
+      numPesInGroup = CkNumPes() - start; 
+  }
+  else{
+    if(CkMyPe()==0)
+      CkpvAccess(myParent) = -1;
+    else
+      CkpvAccess(myParent) = 0;
+  }
+  CkpvAccess(myInterGroupParent) = 0;
+  recvGroupCnt = 0;
+  TraceAutoPerf *t = localAutoPerfTracingInstance();
+  if(PICS_collection_mode == PARTIAL)
+  {
+    numPesCollection = CkNumPes()>numPesInGroup?numPesInGroup:CkNumPes();
+  }
+  else
+    numPesCollection = CkNumPes();
+
+  if(CkMyPe() >= numPesCollection)
+  {
+    t->setTraceOn(false);
+  }else
+  {
+    t->setTraceOn(true);
+  }
+
+  if((isPeriodicalAnalysis))
+  {
+    setNumOfPhases(1, "Default");
+    startStep();
+    startPhase(0);
+    if(CkMyPe() == 0)
+    {
+      CcdCallFnAfterOnPE((CcdVoidFn)startAnalysis, NULL, 100, CkMyPe());
+    }
+  }
+  //--------- Projections output
+  if(CkpvAccess(myParent)==-1){
+    char filename[50];
+    sprintf(filename, "output.%d.pics", CkMyPe());
+    if(CkMyPe()==0)
+      CkpvAccess(fpSummary) = fopen(filename, "w+");
+    else if(PICS_collection_mode == FULL)
+      CkpvAccess(fpSummary) = fopen(filename, "w+");
+  }
+}
+
+TraceAutoPerfBOC::~TraceAutoPerfBOC() { }
+
+TraceAutoPerfInit::TraceAutoPerfInit(CkArgMsg* args)
+{
+  printf("Charm++ - PICS > Enabled pics autoPerf ......\n");
+  char **argv = args->argv;
+  isPeriodicalAnalysis = CmiGetArgFlagDesc(argv,"+auto-pics","start performance analysis periodically");
+  isIdleAnalysis = CmiGetArgFlagDesc(argv,"+idleAnalysis","start performance analysis when idle");
+  if(isIdleAnalysis){
+    CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)startAnalysisonIdle, NULL);
+    CcdCallFnAfterOnPE((CcdVoidFn)autoPerfReset, NULL, 10, CmiMyPe());
+  }
+  isPerfDumpOn = true; 
+  CkpvAccess(fpSummary) = NULL;
+  if(CmiGetArgIntDesc(argv,"+picsGroupSize", &treeGroupSize,"number of processors within a PICS group ")) {
+    treeBranchFactor = 2;
+    CkPrintf("Charm++ - PICS >>>>>>>>> set scalable tree branch factor %d  group is %d \n", treeBranchFactor, treeGroupSize);
+  }
+  else
+  {
+    treeGroupSize = CkNumPes();
+    treeBranchFactor = 2;
+  }
+
+  if(CmiGetArgIntDesc(argv,"+picsCollectionMode", &PICS_collection_mode, "Collection mode (0 full, 1 partial")) {
+    CkPrintf("Charm++ -PICS >>>>>>>>> set scalable collection mode %d \n", PICS_collection_mode);
+  }else{
+    PICS_collection_mode = FULL;
+  }
+
+  if(CmiGetArgIntDesc(argv,"+picsEvaluationMode", &PICS_evaluation_mode, "Evaluation mode (0 SEQ, 1 PARALLEL")) {
+    CkPrintf("Charm++ - PICS >>>>>>>>> set scalable evaluation mode %d \n", PICS_evaluation_mode);
+  }else
+  {
+    PICS_evaluation_mode = SEQUENTIAL;
+  }
+
+  traceAutoPerfGID = autoPerfProxy = CProxy_TraceAutoPerfBOC::ckNew();
+  /* Starts a new phase without user call */
+  autoPerfProxy.startStep();
+  autoPerfProxy.startPhase(0);
+  autoPerfProxy.setNumOfPhases(1, "program");
+}
+
+extern "C" void traceAutoPerfExitFunction() {
+  /* Starts copying of data */
+  if(user_call == 0){  // Do not call them by default if the user is calling them
+    autoPerfProxy.endPhase();
+    autoPerfProxy.endStepResumeCb(true, CkMyPe(), CkCallbackResumeThread());
+  }
+
+  CkpvAccess(isExit) = true;
+  autoPerfProxy.getPerfData(0, CkCallback::ignore );
+
+  if(CkpvAccess(fpSummary)!=NULL){
+    fflush(CkpvAccess(fpSummary));
+    fclose(CkpvAccess(fpSummary));
+  }
+  CkExit();
+}
+
+void _initTraceAutoPerfBOC()
+{
+  WARMUP_STEP = 0;
+  PAUSE_STEP = 1000;
+  CkpvInitialize(CkReduction::reducerType, PerfDataReductionType);
+  CkpvAccess(PerfDataReductionType)=CkReduction::addReducer(PerfDataReduction);
+  CkpvInitialize(int, hasPendingAnalysis);
+  CkpvAccess(hasPendingAnalysis) = 0;
+  CkpvInitialize(CkCallback, callBackAutoPerfDone);
+  CkpvAccess(callBackAutoPerfDone) = CkCallback::ignore; 
+  CkpvInitialize(bool,   isExit);
+  CkpvAccess(isExit) = false;
+//  CkpvInitialize(int, perfGoal);
+//  CkpvAccess(perfGoal) = BestTimeStep;
+  CkpvInitialize(int, myParent);
+  CkpvAccess(myParent) = -1;
+  CkpvInitialize(int, myInterGroupParent);
+  CkpvAccess(myInterGroupParent) = -1;
+  CkpvInitialize(int, numChildren);
+  CkpvAccess(numChildren) = -1;
+  CkpvInitialize(int, numOfPhases);
+  CkpvAccess(numOfPhases) = 1;
+  CkpvInitialize(std::vector<char*>, phaseNames);
+  CkpvAccess(phaseNames).resize(1);
+  CkpvAccess(phaseNames)[0] = "default";
+  isPeriodicalAnalysis = false;
+  CkpvInitialize(double, timeForLdb);
+  CkpvAccess(timeForLdb) = 0;
+  CkpvInitialize(double, timeBeforeLdb);
+  CkpvAccess(timeBeforeLdb) = -1;
+  CkpvInitialize(double, currentTimeStep);
+  CkpvAccess(currentTimeStep) = -1;
+  CkpvInitialize(int, cntAfterLdb);
+  CkpvAccess(cntAfterLdb) = 4;
+  CkpvInitialize(FILE*, fpSummary);
+  CkpvAccess(fpSummary) = NULL;
+  #ifdef __BIGSIM__
+  if (BgNodeRank()==0) {
+#else               
+    if (CkMyRank() == 0) {
+#endif
+      registerExitFn(traceAutoPerfExitFunction);
+    }
+    CkpvInitialize(SavedPerfDatabase*, perfDatabase);
+    CkpvAccess(perfDatabase) = new SavedPerfDatabase();
+    CkpvInitialize(Database<CkReductionMsg*>*, summaryPerfDatabase);
+    CkpvAccess(summaryPerfDatabase) = new Database<CkReductionMsg*>();
+    CkpvInitialize(DecisionTree*, learnTree);
+    CkpvAccess(learnTree) = new DecisionTree();
+}
+
+//------------ C function ----------
+void setCollectionMode(int m) {
+  PICS_collection_mode = m;
+}
+
+void setEvaluationMode(int m) {
+  PICS_evaluation_mode = m;
+}
+
+#include "TraceAutoPerf.def.h"
+
diff --git a/src/ck-pics/picsautoperf.ci b/src/ck-pics/picsautoperf.ci
new file mode 100644 (file)
index 0000000..9e92966
--- /dev/null
@@ -0,0 +1,51 @@
+module TraceAutoPerf {
+  mainchare TraceAutoPerfInit {
+    entry TraceAutoPerfInit(CkArgMsg *m);
+  };
+
+  initproc void _initTraceAutoPerfBOC();
+  readonly CProxy_TraceAutoPerfBOC autoPerfProxy;
+
+  readonly bool isPeriodicalAnalysis;
+  readonly int  treeGroupSize;
+  readonly int  numGroups;
+  readonly int  treeBranchFactor;
+  readonly bool isIdleAnalysis;
+  readonly bool isPerfDumpOn;
+
+  readonly int WARMUP_STEP;
+  readonly int PAUSE_STEP;
+
+  readonly int PICS_collection_mode;
+  readonly int PICS_evaluation_mode;
+
+  group [migratable] TraceAutoPerfBOC {
+    entry TraceAutoPerfBOC(void);
+    entry void registerPerfGoal(int goalIndex);
+    entry void setAutoPerfDoneCallback(CkCallback cb);
+    entry void setUserDefinedGoal(double value);
+    entry void setNumOfPhases(int num, char names[num*40]);
+    entry [expedited,inline] void startStep();
+    entry [expedited,inline] void endStep(bool fromGlobal, int pe, int incSteps);
+    entry [expedited,inline] void endPhaseAndStep(bool fromGlobal, int pe);
+    entry void resume();
+    entry [expedited,inline] void endStepResumeCb(bool fromGlobal, int pe, CkCallback cb);
+    entry void run(bool fromGlobal, int fromPE);
+    entry void setCbAndRun(bool fromGlobal, int fromPE, CkCallback cb) ;
+    entry [expedited,inline] void startPhase(int phaseId);
+    entry [expedited,inline] void endPhase();
+    entry [expedited,inline] void getPerfData(int reductionPE, CkCallback cb);
+    entry [expedited,inline] void gatherSummary(CkReductionMsg *msg);
+    entry [expedited,inline] void globalPerfAnalyze(CkReductionMsg *msg);
+    entry void setProjectionsOutput();
+    entry void recvGlobalSummary(CkReductionMsg *msg);
+    entry [expedited,inline] void tuneDone();
+    entry [expedited,inline] void globalDecision(double metrics, int source);
+    entry [expedited,inline] void analyzeAndTune();
+    entry void startTimeNextStep();
+
+    entry void PICS_markLDBStart(int appStep);
+    entry void PICS_markLDBEnd();
+  };
+};
+
diff --git a/src/ck-pics/picsautoperf.h b/src/ck-pics/picsautoperf.h
new file mode 100644 (file)
index 0000000..5f68512
--- /dev/null
@@ -0,0 +1,321 @@
+#ifndef  TRACE__AUTOPERF__H__
+#define  TRACE__AUTOPERF__H__
+#define _VERBOSE_H
+
+#include "picstreenode.h"
+#include "picsdecisiontree.h"
+#include "picsautoperfAPI.h"
+#include <errno.h>
+#include "charm++.h"
+#include "trace.h"
+#include "envelope.h"
+#include "register.h"
+#include "trace-common.h"
+#include "TraceAutoPerf.decl.h"
+#include "trace-projections.h"
+#include <vector>
+#include <map>
+#include <list>
+#include <fstream>
+#include <iostream>
+#include <utility>
+
+
+CkpvExtern(int, numOfPhases);
+class SavedPerfDatabase;
+CkpvExtern(SavedPerfDatabase*, perfDatabase);
+CkpvExtern(DecisionTree*, learnTree);
+CkpvExtern(int, perfGoal);
+
+//scalable tree analysis
+CkpvExtern(int, myParent);
+CkpvExtern(int, myInterGroupParent);
+CkpvExtern(int, numChildren);
+
+
+extern CkGroupID traceAutoPerfGID;
+extern CProxy_TraceAutoPerfBOC autoPerfProxy;
+extern int treeBranchFactor;
+extern int numGroups;
+extern int treeGroupSize;
+
+/*
+ * raw performance summary data
+ */
+class PerfData {
+public:
+  double data[NUM_NODES];
+  double timeStep;
+  double energy;
+  double utilPercentage;
+  double overheadPercentage;
+  double idlePercentage;
+  double userMetrics;
+
+  PerfData() {}
+
+  PerfData(double step, double util, double idle, double overhead)
+  {
+    timeStep = step;
+    idlePercentage = idle;
+    overheadPercentage = overhead;
+    utilPercentage = util;
+  }
+
+  void copy(PerfData *src)
+  {
+    timeStep = src->timeStep;
+    energy = src->energy;
+    utilPercentage = src->utilPercentage;
+    overheadPercentage = src->overheadPercentage;
+    idlePercentage = src->idlePercentage;
+    userMetrics = src->userMetrics;
+  }
+
+  void printMe(FILE *fp, char *str) {
+    for(int i=0; i<NUM_NODES; i++)
+    {
+      if (i == AVG_IdlePercentage || i == AVG_OverheadPercentage ||
+          i==AVG_UtilizationPercentage || i==AVG_AppPercentage ||
+          i == MAX_IdlePercentage || i == MAX_OverheadPercentage ||
+          i == MAX_UtilizationPercentage || i == MAX_AppPercentage ||
+          i == MIN_IdlePercentage || i == MIN_OverheadPercentage ||
+          i == MIN_UtilizationPercentage)
+        fprintf(fp, "%d %s %.1f\n", i, FieldName[i], 100*data[i]);
+      else
+        fprintf(fp, "%d %s %f\n", i, FieldName[i], data[i]);
+    }
+  }
+};
+
+/*
+ * a set of history performance summary data
+ */
+template <class DataType> class Database{
+private:
+  std::vector<DataType> array;
+  int curIdx;
+  int prevIdx;
+
+public:
+  Database() {
+    prevIdx = curIdx = -1;
+    array.resize(10);
+    for(int i=0; i<array.size(); i++)
+      array[i] = NULL;
+  }
+
+  Database(int s) {
+    prevIdx = curIdx = -1;
+    array.resize(s);
+  }
+
+  DataType add(DataType source) {
+    DataType oldData;
+    prevIdx = curIdx;
+    curIdx = (curIdx+1)%array.size();
+    oldData = array[curIdx];
+    array[curIdx] = source;
+    if(prevIdx == -1) {
+      prevIdx = 0;
+    }
+    return oldData;
+  }
+
+  DataType getCurrent() {
+    return array[curIdx];
+  }
+
+  DataType getPrevious() {
+    return array[prevIdx];
+  }
+
+  //relative position index
+  DataType getData(int index) {
+    int i = (curIdx+index+array.size())%array.size();
+    return array[i];
+  }
+
+};
+
+CkpvExtern(Database<CkReductionMsg*>*, summaryPerfDatabase);
+#define ENTRIES_SAVED       10
+class SavedPerfDatabase {
+private:
+  PerfData    *perfList[ENTRIES_SAVED];
+  PerfData    *best, *secondbest;
+  int         currentPhase;
+  double      startTimer;
+  int         curIdx; //current available
+  int         prevIdx;
+public:
+
+  SavedPerfDatabase(void) ;
+  ~SavedPerfDatabase(void);
+  void advanceStep(void);
+  PerfData* getCurrentPerfData(void);
+  PerfData* getPrevPerfData(void);
+
+  void setUserDefinedMetrics(double v) { perfList[curIdx]->userMetrics = v; }
+  void setPhase(int phaseId) { currentPhase = phaseId; }
+  void endCurrent(void) ;
+  void copyData(PerfData *source, int num);   //copy data from source
+  void setData(PerfData *source);
+  bool timeStepLonger() { return true;}
+  double getCurrentTimestepDuration() { return perfList[curIdx]->timeStep; }
+  double getTimestepRatio() { return perfList[curIdx]->timeStep/perfList[prevIdx]->timeStep; }
+  double getUtilRatio() { return perfList[curIdx]->utilPercentage/perfList[prevIdx]->utilPercentage; }
+  double getEnergyRatio() { return 0; }
+  double getCurrentIdlePercentage() { return perfList[curIdx]->idlePercentage; }
+  double getPreviousIdlePercentage() { return perfList[prevIdx]->idlePercentage; }
+  double getIdleRatio() { return  perfList[curIdx]->idlePercentage/perfList[prevIdx]->idlePercentage; }
+  double getCurrentOverheadPercentage() { return perfList[curIdx]->overheadPercentage; }
+  double getPreviousOverheadPercentage() { return perfList[prevIdx]->overheadPercentage; }
+  double getOverheadRatio() { return perfList[curIdx]->overheadPercentage/perfList[prevIdx]->overheadPercentage; }
+};
+
+class TraceAutoPerfInit : public Chare {
+public:
+  TraceAutoPerfInit(CkArgMsg*);
+  TraceAutoPerfInit(CkMigrateMessage *m):Chare(m) {}
+};
+
+/*
+ * class to perform collection, analysis
+ */
+class TraceAutoPerfBOC : public CBase_TraceAutoPerfBOC {
+private:
+  int         numPesCollection;
+  int         recvChildren;
+  int         recvGroups;
+  CkReductionMsg *redMsg;
+  int         numPesInGroup;
+
+  int         picsStep;
+  bool        isBest;
+  double      bestTimeStep;
+  double      currentTimeStep;
+
+  int         lastAnalyzeStep;
+  int         currentAppStep;
+  int         analyzeStep;
+  double      endStepTimer;
+  double      lastCriticalPathLength;
+  double      lastAnalyzeTimer;
+  LBDatabase  *theLbdb;
+  std::vector<IntDoubleMap> solutions;
+  std::vector<Condition*>    perfProblems;
+  std::vector<int>            problemProcList;
+  DecisionTree* priorityTree;
+  DecisionTree* fuzzyTree;
+
+  int     recvGroupCnt;
+  double  bestMetrics;
+  int     bestSource;
+
+  double startLdbTimer;
+  double endLdbTimer;
+
+public:
+  TraceAutoPerfBOC() ;
+  TraceAutoPerfBOC(CkMigrateMessage *m) : CBase_TraceAutoPerfBOC(m) {};
+  ~TraceAutoPerfBOC();
+
+  void pup(PUP::er &p) {
+    CBase_TraceAutoPerfBOC::pup(p);
+  }
+  void registerPerfGoal(int goalIndex);
+  void setUserDefinedGoal(double value);
+  void setAutoPerfDoneCallback(CkCallback cb); 
+  static void staticAtSync(void *data);
+
+  void resume();
+  void resume(CkCallback cb);
+  void startPhase(int phaseId);
+  void endPhase();
+  void startStep();
+  void endStep(bool fromGlobal, int pe, int incSteps);
+  void endPhaseAndStep(bool fromGlobal, int pe);
+  void endStepResumeCb(bool fromGlobal, int pe, CkCallback cb);
+  void getPerfData(int reductionPE, CkCallback cb);
+  void run(bool fromGlobal, int fromPE);
+  void setCbAndRun(bool fromGlobal, int fromPE, CkCallback cb) ;
+  void PICS_markLDBStart(int appStep) ;
+  void PICS_markLDBEnd() ;
+
+  void setNumOfPhases(int num, char names[]);
+  void setProjectionsOutput();
+  void recvGlobalSummary(CkReductionMsg *msg);
+
+  void tuneDone();
+
+  //scalable analysis, global decision making
+  void globalDecision(double metrics, int source);
+  void analyzeAndTune();
+  void startTimeNextStep();
+  void gatherSummary(CkReductionMsg *msg);
+  void globalPerfAnalyze(CkReductionMsg *msg);
+
+  void formatPerfData(PerfData *data, int step, int phase);
+  void analyzePerfData(PerfData *data, int step, int phase);
+
+  void comparePerfData(PerfData *prevData, PerfData *data, int step, int phase);
+
+  double getModelNetworkTime(int msgs, long bytes) ;
+  
+  inline bool isCurrentBest() {
+    return isBest;
+  }
+
+  inline void setCurrentBest(bool b ) {
+    isBest = b;
+  }
+
+  inline double getCurrentBestRatio() {
+    return currentTimeStep/bestTimeStep;
+  } 
+};
+
+class ObjIdentifier {
+public:
+  void *objPtr;
+
+  ObjIdentifier(void *p) {
+    objPtr = p;
+  }
+
+  ObjIdentifier(int a, int i, void *p) {
+    objPtr = p;
+  }
+};
+
+class ObjInfo 
+{
+public:
+  double executeTime;
+  long msgCount;
+  long msgSize;
+
+  ObjInfo(double e, long mc, long ms) {
+    executeTime = e;
+    msgCount = mc;
+    msgSize = ms;
+  }
+};
+
+class compare {
+public:
+  bool operator () (const void *x, const void *y) const {
+    return (x < y);
+  }
+};
+
+typedef std::map<void*, ObjInfo*, compare> ObjectLoadMap_t;
+
+
+void setCollectionMode(int m) ;
+void setEvaluationMode(int m) ;
+void setConfigMode(int m) ;
+
+#endif
+
diff --git a/src/ck-pics/picsautoperfAPI.C b/src/ck-pics/picsautoperfAPI.C
new file mode 100644 (file)
index 0000000..778168a
--- /dev/null
@@ -0,0 +1,169 @@
+#include "picsdefs.h"
+#include "picsdefscpp.h"
+#include "picsautoperf.h"
+#include "picsautoperfAPI.h"
+#include "picsautoperfAPIC.h"
+#define PERF_FREQUENCY 1
+#define   CP_PERIOD  100
+
+extern int user_call;
+extern int WARMUP_STEP;
+extern int PAUSE_STEP;
+CkpvDeclare(int, currentStep);
+CkpvDeclare(int, availAnalyzeCP);
+CksvDeclare(int, availAnalyzeNodeCP);
+CkpvDeclare(int, hasPendingAnalysis);
+CkpvDeclare(CkCallback, callBackAutoPerfDone);
+
+void PICS_registerAutoPerfDone(CkCallback cb, int frameworkShouldAdvancePhase){
+  CkAssert(CkMyPe() == 0);
+  autoPerfProxy.setAutoPerfDoneCallback(cb);
+}
+
+void PICS_setNumOfPhases(bool fromGlobal, int num, char *names[]) {
+  char seqNames[num*40];
+  for(int i=0; i<num; i++)
+  {
+    strcpy(seqNames+i*40, names[i]); 
+  }
+  if(fromGlobal)
+    autoPerfProxy.setNumOfPhases(num, seqNames);
+  else
+    autoPerfProxy.ckLocalBranch()->setNumOfPhases(num, seqNames);
+}
+
+void PICS_startPhase( bool fromGlobal, int phaseId)
+{
+  if(fromGlobal)
+    autoPerfProxy.startPhase(phaseId);
+  else
+    autoPerfProxy.ckLocalBranch()->startPhase(phaseId);
+}
+
+void PICS_endPhase( bool fromGlobal)
+{
+  if(fromGlobal)
+    autoPerfProxy.endPhase();
+  else
+    autoPerfProxy.ckLocalBranch()->endPhase();
+}
+
+void PICS_startStep(bool fromGlobal)
+{
+  user_call = 1; //Sets call flag to 1 whenever this is called by the user
+  if(fromGlobal)
+    autoPerfProxy.startStep();
+  else
+    autoPerfProxy.ckLocalBranch()->startStep();
+}
+
+void PICS_endStep(bool fromGlobal )
+{
+  user_call = 1;
+  if(fromGlobal)
+    autoPerfProxy.endStep(fromGlobal, CkMyPe(), 1);
+  else
+    autoPerfProxy.ckLocalBranch()->endStep(fromGlobal, CkMyPe(), 1);
+}
+
+void PICS_endStepInc(bool fromGlobal, int incSteps  )
+{
+  if(fromGlobal)
+    autoPerfProxy.endStep(fromGlobal, CkMyPe(), incSteps);
+  else
+    autoPerfProxy.ckLocalBranch()->endStep(fromGlobal, CkMyPe(), incSteps);
+}
+
+
+
+void PICS_endStepResumeCb( bool fromGlobal, CkCallback cb)
+{
+  if(fromGlobal) {
+    autoPerfProxy.endStepResumeCb(true, CkMyPe(), cb);
+  }
+  else
+  {
+    autoPerfProxy.ckLocalBranch()->endStepResumeCb(false, CkMyPe(), cb);
+  }
+}
+
+void PICS_autoPerfRun( )
+{
+  autoPerfProxy.run(true, CkMyPe());
+}
+
+void PICS_autoPerfRunResumeCb(CkCallback cb )
+{
+  autoPerfProxy.setCbAndRun(true, CkMyPe(), cb);
+}
+
+void PICS_localAutoPerfRun( )
+{
+  autoPerfProxy.ckLocalBranch()->run(false,CkMyPe());
+}
+
+//called by PE0
+void startAnalysis()
+{
+  autoPerfProxy.endPhaseAndStep(true, CkMyPe());
+}
+
+void PICS_SetAutoTimer(){
+  CcdCallFnAfterOnPE((CcdVoidFn)startAnalysis, NULL, 100, CkMyPe());
+}
+
+void startAnalysisonIdle()
+{
+  if (traceAutoPerfGID.idx !=0 && ((CkGroupID)autoPerfProxy).idx != 0 &&
+      CksvAccess(availAnalyzeNodeCP) == 1 &&
+      CkpvAccess(hasPendingAnalysis) == 0 )
+  {
+    CksvAccess(availAnalyzeNodeCP) = 0;
+    CcdCallFnAfterOnPE((CcdVoidFn)autoPerfReset, NULL, CP_PERIOD, CkMyPe());
+  }
+}
+
+void autoPerfReset()
+{
+  CksvAccess(availAnalyzeNodeCP) = 1;
+}
+
+void setNoPendingAnalysis()
+{
+  CkpvAccess(hasPendingAnalysis) = 0;
+}
+
+void registerPerfGoal(int goalIndex) 
+{
+  autoPerfProxy.registerPerfGoal(goalIndex);
+}
+
+void setUserDefinedGoal(double value)
+{
+  autoPerfProxy.setUserDefinedGoal(value);
+}
+
+void PICS_setCollectionMode(int m) {
+   setCollectionMode(m); 
+}
+
+void PICS_setEvaluationMode(int m) {
+  setEvaluationMode(m);
+}
+
+void PICS_markLDBStart(int appStep) {
+  autoPerfProxy.PICS_markLDBStart(appStep);
+}
+
+void PICS_markLDBEnd(){
+  autoPerfProxy.PICS_markLDBEnd();
+}
+
+void PICS_setWarmUpSteps(int steps){
+  WARMUP_STEP = steps;
+}
+
+
+void PICS_setPauseSteps(int steps){
+  PAUSE_STEP = steps;
+}
diff --git a/src/ck-pics/picsautoperfAPI.h b/src/ck-pics/picsautoperfAPI.h
new file mode 100644 (file)
index 0000000..5abf7ad
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __AUTOPERFAPI__H__
+#define __AUTOPERFAPI__H__
+
+#include "picsdefs.h"
+#include "charm++.h"
+#ifdef __cplusplus 
+    extern "C" {
+#endif
+
+void PICS_registerAutoPerfDone(CkCallback cb, int frameworkShouldAdvancePhase);
+void PICS_setNumOfPhases(bool fromGlobal, int num, char *names[]);
+
+void PICS_startStep(bool fromGlobal);
+void PICS_endStep(bool fromGlobal);
+void PICS_endStepInc(bool fromGlobal, int incSteps);
+void PICS_endStepResumeCb(bool fromGlobal,  CkCallback cb);
+void PICS_startPhase(bool fromGlobal, int phaseId);
+void PICS_endPhase(bool fromGlobal);
+
+void PICS_localAutoPerfRun( );
+
+void PICS_autoPerfRun();
+void PICS_autoPerfRunResumeCb(CkCallback cb);
+
+void PICS_SetAutoTimer();
+
+void PICS_setCollectionMode(int m) ;
+
+void PICS_setEvaluationMode(int m) ;
+
+void PICS_markLDBStart(int appStep);
+
+void PICS_markLDBEnd();
+
+void PICS_setWarmUpSteps(int steps);
+
+void PICS_setPauseSteps(int steps);
+
+#ifdef __cplusplus 
+    }
+#endif
+#endif
diff --git a/src/ck-pics/picsautoperfAPIC.h b/src/ck-pics/picsautoperfAPIC.h
new file mode 100644 (file)
index 0000000..8bf3534
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef __AUTOPERFAPIC__H__
+#define __AUTOPERFAPIC__H__
+
+//three types of applications to start analysis
+//global barrier for each time step
+
+//local time step
+
+//no timestep, analysis starts when idle
+void startAnalysisonIdle();
+void autoPerfReset();
+
+void registerTuneGoal(int goalIndex);
+
+void setUserDefinedGoal(double value);
+
+#endif
diff --git a/src/ck-pics/picsdecisiontree.C b/src/ck-pics/picsdecisiontree.C
new file mode 100644 (file)
index 0000000..f7e2478
--- /dev/null
@@ -0,0 +1,357 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <string>
+#include <vector>
+#include <queue>
+#include <stack>
+#include <map>
+#include "picsdefs.h"
+#include "picsdecisiontree.h"
+
+#define NAME_LENGTH 30
+
+DecisionTree::DecisionTree() {
+  root=NULL;
+}
+
+DecisionTree::DecisionTree(TreeNode *p) {
+  root = p;
+}
+
+typedef std::map<std::string, int> keyid_map;
+typedef std::map<std::string, TreeNode*> tree_map;
+
+void DecisionTree::build(char *filename) {
+
+  keyid_map fieldMap;
+  keyid_map updownMap;
+  keyid_map effectMap;
+  keyid_map symbolMap;
+  keyid_map opMap;
+  //setup map
+  for(int i=0; i<NUM_NODES; i++) { fieldMap[FieldName[i]] = i; }
+  for(int i=0; i<PICS_NUM_EFFECTS; i++) { effectMap[EffectName[i]] = i; }
+
+  updownMap["UP"] = UP;
+  updownMap["DOWN"] = DOWN;
+
+  symbolMap["IS"] = 0;
+  symbolMap["LT"] = 1;
+  symbolMap["GT"] = 2;
+  symbolMap["NLT"] = 3;
+  symbolMap["NGT"] = 4;
+  symbolMap["NOTIS"] = 5;
+
+  opMap["ADD"] = 0;
+  opMap["SUB"] = 1;
+  opMap["MUL"] = 2;
+  opMap["DIV"] = 3;
+
+  FILE *fp = fopen(filename, "r");
+
+  int nodeType;
+  char keyStr[NAME_LENGTH];
+  char fieldTypeName[NAME_LENGTH];
+  char fieldTypeName2[NAME_LENGTH];
+  char symbol[NAME_LENGTH];
+  char parentName[NAME_LENGTH];
+  char typeStr[10];
+  char baseFieldType[NAME_LENGTH];
+  char baseFieldType2[NAME_LENGTH];
+  char op[10];
+  int  fieldType;
+  int  flag;
+  char avgMinMax[10];
+  size_t len = 0;
+  char *line = NULL;
+  tree_map nodemap;
+  TreeNode *node;
+  int numOfParents;
+  double threshold;
+  float base;
+  Condition *cond;
+  Solution *sol;
+  int sumbytes=0;
+  int bytes;
+
+  while( getline(&line, &len, fp) != -1) {
+    if(line[0] == '#')
+      continue;
+    sumbytes = 0;
+    sscanf(line+sumbytes,"%d %s  %s  %s %n", &nodeType, keyStr, typeStr, fieldTypeName, &bytes);
+    sumbytes += bytes;
+
+    switch(nodeType) {
+    case -1:        //root
+      root = nodemap["Root"]= new TreeNode(NULL, new Condition());
+      break;
+
+    case 0:     //internal node
+      sprintf(fieldTypeName2,"%s_%s",typeStr,fieldTypeName);
+      fieldType = fieldMap[fieldTypeName2];
+      sscanf(line+sumbytes, "%s %n", op, &bytes);
+      sumbytes += bytes;
+      sscanf(line+sumbytes, "%d %n", &flag, &bytes);
+      sumbytes += bytes;
+      if(flag == -1)
+      {
+        sscanf(line+sumbytes, "%f    %s  %d %s %n", &base, symbol, &numOfParents, parentName, &bytes);
+        sumbytes += bytes;
+        cond = new Condition(keyStr, fieldType, (Operator)opMap[op], base, (CompareSymbol)(symbolMap[symbol]));
+        nodemap[keyStr] = new TreeNode(nodemap[parentName], cond);
+        nodemap[parentName]->addChild(nodemap[keyStr]);
+      }else if(flag == 0)
+      {
+        sscanf(line+sumbytes, "%s    %s  %s  %lf  %d %s %n", avgMinMax, baseFieldType, symbol, &threshold, &numOfParents, parentName, &bytes);
+        sprintf(baseFieldType2, "%s_%s", avgMinMax, baseFieldType);
+        sumbytes += bytes;
+        cond = new Condition(keyStr, fieldType, (Operator)opMap[op], fieldMap[baseFieldType2], threshold,  (CompareSymbol)symbolMap[symbol]);
+        node =  new TreeNode(nodemap[parentName], cond);
+        nodemap[keyStr] = node;
+        nodemap[parentName]->addChild(nodemap[keyStr]);
+      }
+      break;
+
+    case 1:     //leaf
+      sscanf(line+sumbytes,"%d  %s %n", &numOfParents, parentName, &bytes); 
+      sumbytes += bytes;
+      sol = new Solution( (Direction)updownMap[typeStr], (Effect)effectMap[fieldTypeName]);
+      node = new TreeNode(nodemap[parentName], sol);
+      nodemap[parentName]->addChild(node);
+      for(int i=1; i<numOfParents; i++) {
+        sscanf(line+sumbytes, "%s %n", parentName, &bytes);
+        sumbytes += bytes;
+        node = new TreeNode(nodemap[parentName], sol);
+        nodemap[parentName]->addChild(node);
+      }
+      break;
+    }
+  };
+}
+
+void DecisionTree::addNodes() {
+}
+
+void DecisionTree::BFS() {
+  TreeNode *current = NULL;
+  TreeNode *child = NULL;
+  std::queue<TreeNode*> myqueue;
+  myqueue.push(root);
+  while(!myqueue.empty())
+  {
+    current = myqueue.front();
+    myqueue.pop();
+    printf("{");
+    if(current->getParent()!=NULL)
+      current->getParent()->printMe();
+    current->printMe();
+    printf("}\n");
+    for(current->beginChild(); !(current->isEndChild()); current->nextChild())
+    {
+      child = current->getCurrentChild();
+      myqueue.push(child);
+    }
+  };
+}
+
+void DecisionTree::DFS( double *input, std::vector<IntDoubleMap>& solutions,
+    int level, std::vector<Condition*>& problems, FILE *fp) {
+  std::stack<TreeNode*> mystack;
+  TreeNode *current = NULL;
+  TreeNode *child = NULL;
+  mystack.push(root);
+  while(!mystack.empty()) {
+    current = mystack.top();
+    mystack.pop();
+    for(current->beginChild(); !(current->isEndChild()); current->nextChild())
+    {
+      child = current->getCurrentChild();
+      if(child->isSolution())
+      {
+        int effect = child->getSolutionValue();
+        int ignore = 0;
+        //check higher priority solution for conflicts
+        for(int higher=0; higher<level; higher++){
+            if(solutions[higher].count(-effect) > 0 || solutions[higher].count(effect) > 0)
+            {
+                ignore = 1;
+                break;
+            }
+        }
+        if(!ignore) {
+            if(solutions[level].count(-effect) > 0){
+                //reverse effect exist, keep the one with larger performance improvement
+                if(current->getPotentialImprove() > solutions[level][-effect])
+                {
+                    printf("\n-----detected conflict effects------ reverse %d %f \n", -effect, solutions[level][-effect]);
+                    solutions[level].erase(-effect);
+                    solutions[level][effect] = current->getPotentialImprove();
+                    child->printDataToFile(input, fp);
+                }
+            }
+            else
+            {
+                solutions[level][child->getSolutionValue()] = current->getPotentialImprove();
+                child->printDataToFile(input, fp);
+            }
+        }
+      }
+      else
+      {
+        if(child->test(input))
+        {
+          mystack.push(child);
+          problems.push_back(child->getValue().condition);
+          if(child->getPotentialImprove()==-100)
+            child->setPotentialImprove(current->getPotentialImprove());
+          child->printDataToFile(input, fp);
+        }
+      }
+    }
+  };
+}
+
+//keep the ones without conflict, keep the conflicted one for the problem without other solutions
+//(A,B,C) (B) --> (A,C) (B)
+void DecisionTree::DFS_3( double *input, std::vector<IntDoubleMap>& solutions, int level, std::vector<Condition*>& problems, FILE *fp) {
+  TreeNode *child = NULL;
+  std::vector<IntDoubleMap> rawSolutions;
+  for(root->beginChild(); !(root->isEndChild()); root->nextChild())
+  {
+    child = root->getCurrentChild();
+    if(child->test(input))
+    {
+      //perform here
+      child->printDataToFile(input, fp);
+      rawSolutions.push_back(sub_DFS(input, child, problems, fp, solutions, level));
+    }
+  }
+
+  //keep the ones without conflict
+  std::vector<int> hasSolutions(rawSolutions.size(), 0);
+  IntDoubleMap &results = solutions[level];
+  for(int i=0; i<rawSolutions.size();i++){
+    //if there is only one solution, keep it
+    if(rawSolutions[i].size() == 1)
+    {
+      IntDoubleMap::iterator iter=rawSolutions[i].begin();
+      results[iter->first] = iter->second;
+      hasSolutions[i] = 1;
+      continue;
+    }
+    //keep the solutions without conflict
+    for(IntDoubleMap::iterator iter=rawSolutions[i].begin(); iter!= rawSolutions[i].end(); iter++)
+    {
+      int eff = iter->first;
+      bool hasConflict = false;
+      for(int j=0; j<rawSolutions.size();j++){
+        if(rawSolutions[j].count(-eff))
+        {
+          hasConflict = true;
+          break;
+        }
+      }
+      if(!hasConflict)
+      {
+        results[eff] = iter->second;
+        hasSolutions[i] = 1;
+      }
+      else{
+        //check whether it exists in the high level solution set, if it does, keep it
+        for(int i=0; i<level; i++){
+        if(solutions[i].count(eff)>0){
+          results[eff] = iter->second;
+          hasSolutions[i] = 1;
+          break;
+        }
+        }
+      }
+    }
+  }
+
+  //try to assign at least one solution to a problem category
+  for(int i=0; i<rawSolutions.size();i++){
+    if(hasSolutions[i])
+      continue;
+    for(IntDoubleMap::iterator iter=rawSolutions[i].begin(); iter!= rawSolutions[i].end(); iter++)
+    {
+      int eff = iter->first;
+      bool hasConflict = false;
+      if(results.count(eff)>0)
+      {
+        hasSolutions[i]= 1;
+        break;
+      }
+      else if(results.count(-eff)==0){
+          results[eff] = iter->second;
+          hasSolutions[i]= 1;
+          break;
+      }
+    }
+  }
+
+  //phase 3, for the other solutions, so long as there is no conflict, keep them
+  for(int i=0; i<rawSolutions.size();i++){
+    for(IntDoubleMap::iterator iter=rawSolutions[i].begin(); iter!= rawSolutions[i].end(); iter++)
+    {
+      int eff = iter->first;
+      bool hasConflict = false;
+      if(results.count(-eff)==0){
+        results[eff] = iter->second; 
+      }
+    }
+  }
+
+  //result has the solutions
+}
+
+IntDoubleMap DecisionTree::sub_DFS(double *input, TreeNode *root,
+    std::vector<Condition*>& problems, FILE *fp,
+    std::vector<IntDoubleMap>& highPriorSolutions, int level) {
+  TreeNode *current = NULL;
+  TreeNode *child = NULL;
+  std::stack<TreeNode*> mystack;
+  IntDoubleMap solutions;
+
+  mystack.push(root);
+  while(!mystack.empty()) {
+    current = mystack.top();
+    mystack.pop();
+    for(current->beginChild(); !(current->isEndChild()); current->nextChild())
+    {
+      child = current->getCurrentChild();
+      if(child->isSolution())
+      {
+        int effect = child->getSolutionValue();
+        int ignore = 0;
+        //check higher priority solution for conflicts
+        for(int higher=0; higher<level; higher++){
+            if(highPriorSolutions[higher].count(-effect) > 0 )
+            {
+                ignore = 1;
+                break;
+            }
+        }
+        if(!ignore){
+          solutions[effect] = current->getPotentialImprove();
+          child->printDataToFile(input, fp);
+        }
+      }
+      else
+      {
+        if(child->test(input))
+        {
+          mystack.push(child);
+          problems.push_back(child->getValue().condition);
+          if(child->getPotentialImprove()==-100)
+            child->setPotentialImprove(current->getPotentialImprove());
+          child->printDataToFile(input, fp);
+        }
+      }
+    }
+  };
+  return solutions;
+}
+
+
diff --git a/src/ck-pics/picsdecisiontree.h b/src/ck-pics/picsdecisiontree.h
new file mode 100644 (file)
index 0000000..657ade3
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef __DECISIONTREE__H__
+#define __DECISIONTREE__H__
+
+#include <vector>
+#include "picstreenode.h"
+
+class DecisionTree {
+
+  TreeNode *root;
+
+public:
+
+  DecisionTree() ;
+  DecisionTree(TreeNode*) ;
+
+  void build(char *filename);
+
+  void BFS();
+  void DFS(double *input, std::vector<IntDoubleMap>&, int level,
+      std::vector<Condition*>&, FILE *fp);
+  void DFS_3(double *input, std::vector<IntDoubleMap>&, int level,
+      std::vector<Condition*>&, FILE *fp);
+  IntDoubleMap sub_DFS(double *input, TreeNode *root,
+      std::vector<Condition*>& problems, FILE *fp,
+      std::vector<IntDoubleMap>& highPriorSolutions, int level) ;
+  void addNodes();
+};
+
+#endif
diff --git a/src/ck-pics/picsdefs.h b/src/ck-pics/picsdefs.h
new file mode 100644 (file)
index 0000000..b0fb975
--- /dev/null
@@ -0,0 +1,128 @@
+#ifndef __PICSDEFS__H__
+#define  __PICSDEFS__H__
+
+extern char FieldName[][30];
+extern char EffectName[][30];
+
+#define NUM_AVG 28
+#define NUM_MIN 9
+#define NUM_MAX 40 
+#define NUM_NODES   79
+enum FieldIndex_t {
+  AVG_TotalTime=0,
+  AVG_IdlePercentage,
+  AVG_OverheadPercentage,
+  AVG_UtilizationPercentage,
+  AVG_AppPercentage,
+  AVG_EntryMethodDuration,
+  AVG_EntryMethodDuration_1,
+  AVG_EntryMethodDuration_2,
+  AVG_NumInvocations,
+  AVG_NumInvocations_1,
+  AVG_NumInvocations_2,
+  AVG_LoadPerObject,
+  AVG_LoadPerPE,
+  AVG_NumObjectsPerPE,
+  AVG_BytesPerMsg,
+  AVG_BytesPerObject,
+  AVG_NumMsgsPerObject,
+  AVG_NumMsgPerPE,
+  AVG_CacheMissRate,
+  AVG_BytesPerPE,
+  AVG_ExternalBytePerPE,
+  AVG_CompressTime,
+  AVG_CompressSourceBytes,
+  AVG_CompressDestBytes,
+  AVG_NumMsgRecv,
+  AVG_BytesMsgRecv,
+  AVG_MsgTimeCost,
+  AVG_TuningOverhead,
+  MAX_IdlePercentage,
+  MAX_IdlePE,
+  MAX_OverheadPercentage,
+  MAX_OverheadPE,
+  MAX_UtilizationPercentage,
+  MAX_UtilPE,
+  MAX_AppPercentage,
+  MAX_AppPE,
+  MAX_NumInvocations,
+  MAX_NumInvocPE,
+  MAX_LoadPerObject,
+  MAX_ObjID,
+  MAX_LoadPerPE,
+  MAX_LoadPE,
+  MAX_BytesPerMsg,
+  MAX_BytesEntryID,
+  MAX_BytesPerObject,
+  MAX_ByteObjID,
+  MAX_NumMsgsPerObject,
+  MAX_NumMsgObjID,
+  MAX_BytesPerPE,
+  MAX_BytesPE,
+  MAX_ExternalBytePerPE,
+  MAX_ExternalBytePE,
+  MAX_CriticalPathLength,
+  MAX_CPPE,
+  MAX_NumMsgRecv,
+  MAX_NumMsgRecvPE,
+  MAX_BytesMsgRecv,
+  MAX_BytesMsgRecvPE,
+  MAX_EntryMethodDuration,
+  MAX_EntryID,
+  MAX_EntryMethodDuration_1,
+  MAX_EntryID_1,
+  MAX_EntryMethodDuration_2,
+  MAX_EntryID_2,
+  MAX_NumMsgSend,
+  MAX_NumMsgSendPE,
+  MAX_BytesSend,
+  MAX_BytesSendPE,
+  MIN_IdlePercentage,
+  MIN_OverheadPercentage,
+  MIN_UtilizationPercentage,
+  MIN_AppPercentage,
+  MIN_LoadPerObject,
+  MIN_LoadPerPE,
+  MIN_BytesPerMsg,
+  MIN_NumMsgRecv,
+  MIN_BytesMsgRecv,
+  MinIdlePE,
+  MaxEntryPE
+};
+
+#define PICS_NUM_EFFECTS 11 
+enum Effect_t {
+  PICS_EFF_PERFGOOD=0,
+  PICS_EFF_GRAINSIZE,
+  PICS_EFF_AGGREGATION,
+  PICS_EFF_COMPRESSION,
+  PICS_EFF_REPLICA,
+  PICS_EFF_LDBFREQUENCY,
+  PICS_EFF_NODESIZE,
+  PICS_EFF_MESSAGESIZE,
+  PICS_EFF_GRAINSIZE_1,
+  PICS_EFF_GRAINSIZE_2,
+  PICS_EFF_UNKNOWN
+};
+
+typedef enum Effect_t Effect;
+
+enum Direction_t { UP=0, DOWN};
+enum CompareSymbol_t {IS=0, LT, GT, NLT, NGT, NOTIS} ;
+enum Operator_t {ADD=0, SUB, MUL, DIV};
+enum PREFIX_t {AVG=0, MIN, MAX};
+
+typedef enum CompareSymbol_t    CompareSymbol ; 
+typedef enum Operator_t     Operator;
+typedef enum Direction_t    Direction;
+
+#define FULL 0
+#define PARTIAL 1
+#define SEQUENTIAL 10
+#define PARALLEL  11
+#define SINGLE 20
+#define MULTIPLE 21
+
+#define   PERIOD_PERF 1
+
+#endif
diff --git a/src/ck-pics/picsdefscpp.h b/src/ck-pics/picsdefscpp.h
new file mode 100644 (file)
index 0000000..c9b0823
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef __PICSDEFS__CPP_H__
+#define  __PICSDEFS__CPP_H__
+
+#include <map>
+#include <vector>
+
+typedef std::map<int,double> IntDoubleMap;
+
+#endif
diff --git a/src/ck-pics/picstreenode.C b/src/ck-pics/picstreenode.C
new file mode 100644 (file)
index 0000000..a0ed970
--- /dev/null
@@ -0,0 +1,294 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "picstreenode.h"
+#include "charm++.h"
+#include "register.h"
+
+
+char FieldName[NUM_NODES][30] = {
+ "AVG_TotalTime",
+ "AVG_IdlePercentage",
+ "AVG_OverheadPercentage",
+ "AVG_UtilizationPercentage",
+ "AVG_AppPercentage",
+ "AVG_EntryMethodDuration",
+ "AVG_EntryMethodDuration_1",
+ "AVG_EntryMethodDuration_2",
+ "AVG_NumInvocations",
+ "AVG_NumInvocations_1",
+ "AVG_NumInvocations_2",
+ "AVG_LoadPerObject",
+ "AVG_LoadPerPE",
+ "AVG_NumObjectsPerPE",
+ "AVG_BytesPerMsg",
+ "AVG_BytesPerObject",
+ "AVG_NumMsgsPerObject",
+ "AVG_NumMsgPerPE",
+ "AVG_CacheMissRate",
+ "AVG_BytesPerPE",
+ "AVG_ExternalBytePerPE",
+ "AVG_CompressTime",
+ "AVG_CompressSourceBytes",
+ "AVG_CompressDestBytes",
+ "AVG_NumMsgRecv",
+ "AVG_BytesMsgRecv",
+ "AVG_MsgTimeCost",
+ "AVG_TuningOverhead",
+ "MAX_IdlePercentage",
+ "MAX_IdlePE",
+ "MAX_OverheadPercentage",
+ "MAX_OverheadPE",
+ "MAX_UtilizationPercentage",
+ "MAX_UtilPE",
+ "MAX_AppPercentage",
+ "MAX_AppPE",
+ "MAX_NumInvocations",
+ "MAX_NumInvocPE",
+ "MAX_LoadPerObject",
+ "MAX_ObjID",
+ "MAX_LoadPerPE",
+ "MAX_LoadPE",
+ "MAX_BytesPerMsg",
+ "MAX_BytesEntryID",
+ "MAX_BytesPerObject",
+ "MAX_ByteObjID",
+ "MAX_NumMsgsPerObject",
+ "MAX_NumMsgObjID",
+ "MAX_BytesPerPE",
+ "MAX_BytesPE",
+ "MAX_ExternalBytePerPE",
+ "MAX_ExternalBytePE",
+ "MAX_CriticalPathLength",
+ "MAX_CPPE",
+ "MAX_NumMsgRecv",
+ "MAX_NumMsgRecvPE",
+ "MAX_BytesMsgRecv",
+ "MAX_BytesMsgRecvPE",
+ "MAX_EntryMethodDuration",
+ "MAX_EntryID",
+ "MAX_EntryMethodDuration_1",
+ "MAX_EntryID_1",
+ "MAX_EntryMethodDuration_2",
+ "MAX_EntryID_2",
+ "MAX_NumMsgSend",
+ "MAX_NumMsgSendPE",
+ "MAX_BytesSend",
+ "MAX_BytesSendPE",
+ "MIN_IdlePercentage",
+ "MIN_OverheadPercentage",
+ "MIN_UtilizationPercentage",
+ "MIN_AppPercentage",
+ "MIN_LoadPerObject",
+ "MIN_LoadPerPE",
+ "MIN_BytesPerMsg",
+ "MIN_NumMsgRecv",
+ "MIN_BytesMsgRecv",
+ "MinIdlePE",
+ "MaxEntryPE"
+};
+
+
+char EffectName[PICS_NUM_EFFECTS][30] = { 
+  "PICS_EFF_PERFGOOD",
+  "PICS_EFF_GRAINSIZE",
+  "PICS_EFF_AGGREGATION", 
+  "PICS_EFF_COMPRESSION",
+  "PICS_EFF_REPLICA", 
+  "PICS_EFF_LDBFREQUENCY",
+  "PICS_EFF_NODESIZE",
+  "PICS_EFF_MESSAGESIZE",
+  "PICS_EFF_GRAINSIZE_1",
+  "PICS_EFF_GRAINSIZE_2",
+  "PICS_EFF_UNKNOWN"
+};
+
+char operatorName[4][2] = {"+", "-", "*", "/" };
+char compareName[6][3] = {"==", "<", ">", ">=", "<=", "!="};
+
+
+void Condition::printMe() {
+  printf("condition %s \n", name);
+}
+
+void Condition::printDataToFile(double *input, FILE *fp) {
+
+  fprintf(fp, "Condition  %s %d %d ", name, varIndex, baseIndex);
+  if(thresholdIndex > -1)
+    threshold = input[thresholdIndex];
+  if(varIndex>-1)
+    fprintf(fp, "  %s %f %s ", FieldName[varIndex], input[varIndex], operatorName[op]);
+
+  if(baseIndex > -1) {
+    base = input[baseIndex];
+    fprintf(fp, " %s %f ", FieldName[baseIndex], base);
+  }
+  else
+    fprintf(fp, " %f ", base);
+
+  fprintf(fp, " %s %f ", compareName[symbol], threshold);
+  //potential improvement
+  fprintf(fp, " %f ", potentialImprove);
+
+  if(varIndex == MAX_EntryMethodDuration)
+  {
+    int entryIdx = (int)input[varIndex+1];
+    fprintf(fp, " %d  %s %s ", entryIdx, _entryTable[entryIdx]->name, _chareTable[_entryTable[entryIdx]->chareIdx]->name); 
+  }else if(varIndex>=NUM_AVG && varIndex<NUM_AVG+NUM_MAX)
+    fprintf(fp, " %d ", (int)input[varIndex+1]);
+
+  fprintf(fp, "\n");
+}
+
+bool Condition::test(double *input) {
+  bool ret;
+  double result;
+  if(varIndex == -2) return true;     //always true
+
+  assert(varIndex>-1 && varIndex<NUM_NODES);
+  double realValue = input[varIndex];
+  if(baseIndex > -1)
+    base = input[baseIndex];
+  if(thresholdIndex > -1)
+    threshold = input[thresholdIndex];
+
+  switch(op) {
+  case ADD:
+    result = realValue + base;
+    break;
+
+  case SUB:
+    result = realValue - base;
+    break;
+
+  case MUL:
+    result = realValue * base;
+    break;
+
+  case DIV:
+    result = realValue / base;
+    break;
+
+  default:
+    printf("Undefined OP\n");
+    exit(1);
+  }
+
+  switch(symbol) {
+  case IS:
+    ret = (result == threshold);
+    break;
+
+  case LT:
+    ret = (result < threshold);
+    break;
+
+  case GT:
+    ret = (result > threshold);
+    break;
+
+  case NLT:
+    ret = (result >= threshold);
+    break;
+
+  case NGT:
+    ret = (result <= threshold);
+    break;
+
+  case NOTIS:
+    ret = (result != threshold);
+    break;
+
+  default:
+    printf("Undefined symbol \n");
+    exit(1);
+  }
+  if(!strcmp(name, "CPU_Util"))
+    potentialImprove = 1 - realValue;
+  else if(!strcmp(name, "High_Overhead"))
+    potentialImprove = realValue;
+  else if(!strcmp(name, "High_Idle"))
+    potentialImprove = realValue;
+  else 
+    potentialImprove = -100;
+
+  return ret;
+}
+
+void Solution::printDataToFile(double *input, FILE *fp) {
+  int abseff = eff>=0?eff:-eff;
+  fprintf(fp, "Solution %s %s \n", eff>0?"UP":"Down", EffectName[abseff]);
+}
+
+TreeNode::TreeNode( TreeNode *p, Condition *c ) {
+  parent = p;
+  data.condition = c;
+  _isSolution = false;
+}
+
+TreeNode::TreeNode( TreeNode *p, Solution *s ) {
+  parent = p;
+  data.solution = s;
+  _isSolution = true;
+}
+
+void TreeNode::addChild(TreeNode *tn) {
+  children.push_back(tn);
+}
+
+void TreeNode::setParent(TreeNode *p) {
+  parent = p;
+}
+
+TreeNode* TreeNode::getParent() {
+  return parent;
+}
+
+Data TreeNode::getValue() {
+  return data;
+}
+
+int TreeNode::getSolutionValue() {
+  assert(_isSolution);
+  return data.solution->getValue();
+}
+
+void TreeNode::beginChild() {
+  it = children.begin(); 
+}
+
+int TreeNode::isEndChild() {
+  return it == children.end();
+}
+
+void TreeNode::nextChild() {
+  it++;
+}
+
+TreeNode* TreeNode::getCurrentChild() {
+  return *it;
+}
+
+void TreeNode::printMe() {
+  if(_isSolution) {
+    data.solution->printMe();
+  }
+  else {
+    data.condition->printMe();
+  }
+}
+
+void TreeNode::printDataToFile(double *input, FILE *fp) {
+  if(_isSolution) {
+    data.solution->printDataToFile(input, fp);
+  }
+  else {
+    data.condition->printDataToFile(input, fp);
+  }
+}
+
+
+bool TreeNode::test(double *input) {
+  if(!children.empty()) {
+    return data.condition->test(input);
+  }
+}
diff --git a/src/ck-pics/picstreenode.h b/src/ck-pics/picstreenode.h
new file mode 100644 (file)
index 0000000..e473094
--- /dev/null
@@ -0,0 +1,129 @@
+#ifndef __TREENODE__H__
+#define __TREENODE__H__
+
+#include <vector>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "charm++.h"
+#include "picsdefs.h"
+#include "picsdefscpp.h"
+class TreeNode;
+
+typedef std::vector<TreeNode*> NodeCollection;
+typedef std::vector<TreeNode*>::iterator NodeIter;
+
+class Condition {
+private:
+  std::string name;
+  int     varIndex;
+  double  base;
+  int     baseIndex;
+  int     thresholdIndex;
+  double  threshold;
+  CompareSymbol symbol;
+  Operator op;
+  //potential performance improvement if problem solved
+  double potentialImprove;
+
+public:
+  Condition() : varIndex(-2) {}
+  Condition(const char *n, int _varIndex, Operator _op,  double _base,
+      CompareSymbol c) : varIndex(_varIndex), base(_base), baseIndex(-1),
+      thresholdIndex(-1), threshold(0), symbol(c), op(_op) {
+      name.assign(n);
+  }
+
+  Condition(const char *n, int _varIndex, Operator _op, int _baseIndex,
+      double _threshold, CompareSymbol c) : varIndex(_varIndex),
+      baseIndex(_baseIndex), threshold(_threshold), thresholdIndex(-1),
+      symbol(c), op(_op) {
+      name.assign(n);
+  }
+
+  double getPotentialImprove() { return potentialImprove;}
+  void setPotentialImprove(double v) { potentialImprove = v;}
+  void printMe();
+  void printDataToFile(double *input, FILE *fp);
+  bool test(double *input); //test whether this condition is satisfied with input data
+};
+
+class Solution {
+
+private:
+  int eff;
+
+public:
+  Solution(Direction d, Effect n) {
+    if(d == UP)
+      eff = n;
+    else
+      eff = -n;
+  }
+  void printMe(){
+    int abseff = eff>=0?eff:-eff;
+    CkPrintf("solution %s  %s \n", eff>0?"UP":"Down", EffectName[abseff]);
+  }
+  void printDataToFile(double *input, FILE *fp) ;
+  int getValue() { return eff;}
+};
+
+union Data_t {
+  Condition *condition;
+  Solution *solution;
+};
+
+typedef union Data_t Data;
+
+class TreeNode {
+
+private:
+  TreeNode *parent;
+  //bool isLeaf;
+  NodeCollection children;
+  NodeIter it;
+  Data data;
+  bool _isSolution;
+
+public:
+
+  TreeNode(TreeNode *parent, Condition *c);
+
+  TreeNode(TreeNode *parent, Solution *s);
+
+  void addChild(TreeNode*);
+
+  TreeNode* getParent();
+  void setParent(TreeNode *p);
+
+  Data getValue();
+
+  int getSolutionValue();
+
+  void beginChild();
+  int isEndChild();
+
+  TreeNode* getCurrentChild();
+  void nextChild();
+
+  void printMe();
+  void printDataToFile(double *input, FILE *fp);
+
+  bool test(double *input) ;
+
+  bool isLeaf() { return children.size()==0;}
+
+  bool isSolution() { return _isSolution; }
+
+  double getPotentialImprove() {
+    CkAssert(!_isSolution);
+    return data.condition->getPotentialImprove();
+  }
+  void setPotentialImprove(double v) {
+    CkAssert(!_isSolution);
+    data.condition->setPotentialImprove(v);
+  }
+};
+
+#endif
diff --git a/src/ck-pics/trace-perf.C b/src/ck-pics/trace-perf.C
new file mode 100644 (file)
index 0000000..c502c5a
--- /dev/null
@@ -0,0 +1,389 @@
+#include "trace-perf.h"
+#include <stdlib.h>
+CkpvStaticDeclare(TraceAutoPerf*, _trace);
+
+TraceAutoPerf *localAutoPerfTracingInstance()
+{
+  return CkpvAccess(_trace);
+}
+
+TraceAutoPerf::TraceAutoPerf(char **argv) 
+{
+    currentSummary = currentTraceData = (PerfData*)::malloc(sizeof(PerfData) );
+    memset(currentSummary, 0, sizeof(PerfData));
+    resetAll();
+    nesting_level = 0;
+    whenStoppedTracing = 0;
+#if CMK_HAS_COUNTER_PAPI
+    initPAPI();
+#endif
+    if (CkpvAccess(traceOnPe) == 0) return;
+}
+
+void TraceAutoPerf::startStep(bool newAnalysis) {
+  if(isTraceOn){
+    if(newAnalysis) {
+      CkpvAccess(perfDatabase)->advanceStep();
+      currentSummary = currentTraceData = CkpvAccess(perfDatabase)->getCurrentPerfData();
+    }
+  }
+}
+
+void TraceAutoPerf::startPhase(int step, int phaseId) {
+  if(isTraceOn){
+    currentSummary = currentTraceData + step*CkpvAccess(numOfPhases) +  phaseId;
+    resetAll(); 
+  }
+}
+
+void TraceAutoPerf::endPhase() {
+  if(isTraceOn){
+    getSummary();
+  }
+}
+
+void TraceAutoPerf::endStep( bool newAnalysis) {
+  if(isTraceOn){
+    if(newAnalysis)
+      CkpvAccess(perfDatabase)->endCurrent();
+  }
+}
+
+void TraceAutoPerf::resetTimings(){
+}
+
+void TraceAutoPerf::resetAll(){
+  ObjectLoadMap_t::iterator  iter;
+  double curTimer = CkWallTimer();
+  totalIdleTime = 0.0;
+  totalEntryMethodTime = 0.0;
+  totalEntryMethodTime_1 = 0.0;
+  totalEntryMethodTime_2 = 0.0;
+  totalAppTime = 0.0;
+  tuneOverheadTotalTime = 0.0;
+  maxEntryTime = 0;
+  maxEntryTime_1 = 0;
+  maxEntryTime_2 = 0;
+  totalEntryMethodInvocations = 0;
+  totalEntryMethodInvocations_1 = 0;
+  totalEntryMethodInvocations_2 = 0;
+  startTimer = lastBeginIdle = lastBeginExecuteTime = lastResetTime = curTimer;
+  totalUntracedTime = 0;
+  numNewObjects = 0;
+  objectLoads.clear();
+  if(whenStoppedTracing !=0){
+    whenStoppedTracing = curTimer;
+  }
+#if CMK_HAS_COUNTER_PAPI
+  memcpy(previous_papiValues, papiValues, sizeof(LONG_LONG_PAPI)*NUMPAPIEVENTS);
+#endif
+}
+
+void TraceAutoPerf::traceBegin(void){
+  if(isTraceOn){
+    if(whenStoppedTracing != 0)
+      totalUntracedTime += (CkWallTimer() - whenStoppedTracing);
+    whenStoppedTracing = 0;
+  }
+}
+
+void TraceAutoPerf::traceEnd(void){
+  if(isTraceOn){
+    CkAssert(whenStoppedTracing == 0); // can't support nested traceEnds on one processor yet...
+    whenStoppedTracing = CkWallTimer();
+  }
+}
+
+void TraceAutoPerf::userEvent(int eventID) { }
+
+void TraceAutoPerf::userBracketEvent(int eventID, double bt, double et) { 
+  if(isTraceOn){
+    if(eventID == DECOMPRESS_EVENT_NO || eventID == COMPRESS_EVENT_NO) 
+    {
+      currentSummary->data[AVG_CompressTime] += (et-bt);
+    }
+  }
+}
+
+void TraceAutoPerf::beginTuneOverhead()
+{
+  if(isTraceOn){
+    tuneOverheadStartTimer = CkWallTimer(); 
+  }
+}
+
+void TraceAutoPerf::endTuneOverhead()
+{
+  if(isTraceOn){
+    tuneOverheadTotalTime += (CkWallTimer() - tuneOverheadStartTimer);
+  }
+}
+
+void TraceAutoPerf::beginAppWork() 
+{
+  if(isTraceOn){
+    appWorkStartTimer = CkWallTimer();
+  }
+}
+
+void TraceAutoPerf::endAppWork() 
+{
+  if(isTraceOn){
+    totalAppTime += (CkWallTimer() - appWorkStartTimer);
+  }
+}
+
+void TraceAutoPerf::countNewChare() 
+{
+  if(isTraceOn){
+    numNewObjects++;
+  }
+}
+
+void TraceAutoPerf::creation(envelope *env, int epIdx, int num) { 
+} 
+
+void TraceAutoPerf::creationMulticast(envelope *, int epIdx, int num, int *pelist) { }
+
+void TraceAutoPerf::creationDone(int num) { }
+
+void TraceAutoPerf::messageRecv(void *env, int size) {
+  if(isTraceOn){
+    currentSummary->data[AVG_NumMsgRecv]++;
+    currentSummary->data[AVG_BytesMsgRecv] += size;
+  }
+}
+
+void TraceAutoPerf::messageSend(void *env, int pe, int size) {
+  if(isTraceOn){
+  }
+}
+
+void TraceAutoPerf::beginExecute(CmiObjId *tid)
+{
+  if(isTraceOn){
+    lastBeginExecuteTime = CkWallTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    lastbeginMessageSize = 0;
+    currentObject = tid;
+    currentEP = 0;
+  }
+}
+
+void TraceAutoPerf::beginExecute(envelope *env, void *obj)
+{
+  if(isTraceOn){
+    lastBeginExecuteTime = CkWallTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    lastbeginMessageSize = env->getTotalsize();
+    currentObject = obj;
+    currentEP = env->getEpIdx();
+#if USE_MIRROR
+    if(_entryTable[currentEP]->mirror){
+      currentAID = env->getArrayMgr().idx;
+    }
+#endif
+  }
+}
+
+void TraceAutoPerf::beginExecute(envelope *env, int event,int msgType,int ep,
+    int srcPe, int mlen, CmiObjId *idx)
+{
+  if(isTraceOn){
+    lastbeginMessageSize = env->getTotalsize();
+    lastBeginExecuteTime = CkWallTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    currentEP = ep; 
+#if USE_MIRROR
+    if(_entryTable[currentEP]->mirror){
+      currentAID = env->getArrayMgr().idx;
+      currentIDX = env->getsetArrayIndex().getCombinedCount();
+    }
+#endif
+  }
+}
+
+void TraceAutoPerf::beginExecute(int event,int msgType,int ep,int srcPe,
+    int mlen, CmiObjId *idx, void *obj)
+{
+  if(isTraceOn){
+    lastBeginExecuteTime = CkWallTimer();
+    lastbeginMessageSize = mlen;
+    lastEvent =  BEGIN_PROCESSING;
+    currentObject = obj;
+    currentEP = ep; 
+  }
+}
+
+void TraceAutoPerf::endExecute(void)
+{
+  if(isTraceOn){
+    double endTime = CkWallTimer() ;
+    double executionTime = endTime - lastBeginExecuteTime;
+    lastEvent =  -1;
+    totalEntryMethodTime += executionTime;
+    totalEntryMethodInvocations ++;
+    if(executionTime > maxEntryTime) {
+      maxEntryTime = executionTime;
+      maxEntryIdx = currentEP;
+    }
+    
+    {
+      ObjectLoadMap_t::iterator  iter;
+      iter = objectLoads.find(currentObject);
+      if(iter == objectLoads.end())
+      {
+        ObjInfo  *myobjInfo = new ObjInfo(executionTime, 1, lastbeginMessageSize);
+        objectLoads[currentObject] = myobjInfo;
+      }else
+      {
+        iter->second->executeTime += executionTime;
+        iter->second->msgCount += 1;
+        iter->second->msgSize += lastbeginMessageSize;
+      }
+    } 
+    currentObject = NULL;    
+  }
+}
+
+void TraceAutoPerf::beginIdle(double curWallTime) {
+  if(isTraceOn){
+  lastBeginIdle =  curWallTime; 
+  lastEvent =  BEGIN_IDLE;
+  }
+}
+
+void TraceAutoPerf::endIdle(double curWallTime) {
+  if(isTraceOn){
+  double idleTime = curWallTime - lastBeginIdle;
+  totalIdleTime += idleTime; 
+  lastEvent =  -1;
+  }
+}
+
+void TraceAutoPerf::beginComputation(void) {
+  if(isTraceOn){
+#if CMK_HAS_COUNTER_PAPI
+  if(CkpvAccess(papiStarted) == 0)
+  {
+    if (PAPI_start(CkpvAccess(papiEventSet)) != PAPI_OK) {
+      CmiAbort("PAPI failed to start designated counters!\n");
+    }
+    CkpvAccess(papiStarted) = 1;
+  }
+#endif
+  }
+}
+
+void TraceAutoPerf::endComputation(void) { 
+  if(isTraceOn){
+#if CMK_HAS_COUNTER_PAPI
+  // we stop the counters here. A silent failure is alright since we
+  // are already at the end of the program.
+  if(CkpvAccess(papiStopped) == 0) {
+    if (PAPI_stop(CkpvAccess(papiEventSet), papiValues) != PAPI_OK) {
+      CkPrintf("Warning: PAPI failed to stop correctly!\n");
+    }
+    CkpvAccess(papiStopped) = 1;
+  }
+#endif
+  }
+}
+
+void TraceAutoPerf::malloc(void *where, int size, void **stack, int stackSize)
+{
+}
+
+void TraceAutoPerf::free(void *where, int size) { }
+
+void TraceAutoPerf::traceClose(void)
+{
+  CkpvAccess(_traces)->endComputation();
+  CkpvAccess(_traces)->removeTrace(this);
+}
+
+
+void TraceAutoPerf::printSummary() { }
+
+void TraceAutoPerf::summarizeObjectInfo(double &maxtime, double &totaltime,
+    double &maxMsgCount, double &totalMsgCount, double &maxMsgSize,
+    double &totalMsgSize, double &numObjs) {
+  void *maximum = NULL;
+  for(ObjectLoadMap_t::iterator it= objectLoads.begin(); it!= objectLoads.end(); it++)
+  {
+    if( it->second->executeTime > maxtime)
+      maxtime = it->second->executeTime;
+    totaltime += it->second->executeTime;
+
+    if( it->second->msgCount > maxMsgCount) 
+    {
+      maxMsgCount = it->second->msgCount;
+      maximum = it->first;
+    }
+    totalMsgCount += it->second->msgCount;
+
+    if( it->second->msgSize > maxMsgSize) 
+      maxMsgSize = it->second->msgSize;
+    totalMsgSize += it->second->msgSize;
+    numObjs++;
+  }
+  numObjs += numNewObjects;
+}
+
+PerfData* TraceAutoPerf::getSummary() {
+  if(isTraceOn){
+  currentSummary->data[AVG_TotalTime] = CkWallTimer()-startTimer;
+  currentSummary->data[AVG_IdlePercentage] = currentSummary->data[MIN_IdlePercentage]= currentSummary->data[MAX_IdlePercentage]= (idleTime())/currentSummary->data[AVG_TotalTime]; 
+  currentSummary->data[MAX_LoadPerPE] = currentSummary->data[AVG_TotalTime] - idleTime();
+  currentSummary->data[MIN_UtilizationPercentage] = currentSummary->data[MAX_UtilizationPercentage] = (utilTime())/currentSummary->data[AVG_TotalTime]; 
+  currentSummary->data[AVG_UtilizationPercentage] = utilTime()/currentSummary->data[AVG_TotalTime];
+  currentSummary->data[MIN_AppPercentage] = currentSummary->data[MAX_AppPercentage] = appTime();
+  currentSummary->data[AVG_AppPercentage] = appTime();
+  currentSummary->data[AVG_TuningOverhead] = tuneOverheadTotalTime; 
+  currentSummary->data[MIN_OverheadPercentage] = currentSummary->data[MAX_OverheadPercentage] = overheadTime(); 
+  currentSummary->data[AVG_OverheadPercentage] = overheadTime()/currentSummary->data[AVG_TotalTime];
+  currentSummary->data[AVG_EntryMethodDuration]= (double)totalEntryMethodTime;
+  currentSummary->data[AVG_EntryMethodDuration_1]= (double)totalEntryMethodTime_1;
+  currentSummary->data[AVG_EntryMethodDuration_2]= (double)totalEntryMethodTime_2;
+  currentSummary->data[AVG_NumInvocations] = (double)totalEntryMethodInvocations;
+  currentSummary->data[AVG_NumInvocations_1] = (double)totalEntryMethodInvocations_1;
+  currentSummary->data[AVG_NumInvocations_2] = (double)totalEntryMethodInvocations_2;
+  currentSummary->data[MAX_EntryMethodDuration]= maxEntryTime;
+  currentSummary->data[MAX_EntryMethodDuration_1]= maxEntryTime_1;
+  currentSummary->data[MAX_EntryMethodDuration_2]= maxEntryTime_2;
+  currentSummary->data[MAX_EntryID]= maxEntryIdx;
+  currentSummary->data[MAX_EntryID_1]= maxEntryIdx_1;
+  currentSummary->data[MAX_EntryID_2]= maxEntryIdx_2;
+  summarizeObjectInfo(currentSummary->data[MAX_LoadPerObject], currentSummary->data[AVG_LoadPerObject], currentSummary->data[MAX_NumMsgsPerObject],  currentSummary->data[AVG_NumMsgsPerObject], currentSummary->data[MAX_BytesPerObject], currentSummary->data[AVG_BytesPerObject], currentSummary->data[AVG_NumObjectsPerPE]);
+  currentSummary->data[MAX_NumInvocations] = currentSummary->data[AVG_NumInvocations] = (double)totalEntryMethodInvocations;
+#if CMK_HAS_COUNTER_PAPI
+  readPAPI();
+  if((papiValues)[1]-previous_papiValues[1] > 0)
+    currentSummary->data[AVG_CacheMissRate] = ((papiValues)[0]-previous_papiValues[0]) / ((papiValues)[1]-previous_papiValues[1]);
+#endif
+  currentSummary->data[MAX_NumMsgRecv] = currentSummary->data[MIN_NumMsgRecv] = currentSummary->data[AVG_NumMsgRecv];
+  currentSummary->data[MAX_BytesMsgRecv] = currentSummary->data[MIN_BytesMsgRecv] = currentSummary->data[AVG_BytesMsgRecv];
+  currentSummary->data[MinIdlePE] = CkMyPe();
+  currentSummary->data[MAX_IdlePE] = CkMyPe();
+  currentSummary->data[MAX_OverheadPE] = CkMyPe();
+  currentSummary->data[MAX_UtilPE] = CkMyPe();
+  currentSummary->data[MAX_AppPE] = CkMyPe();
+  currentSummary->data[MAX_NumInvocPE] = CkMyPe();
+  currentSummary->data[MAX_LoadPE] = CkMyPe();
+  currentSummary->data[MAX_ExternalBytePE] = CkMyPe();
+  currentSummary->data[MAX_CPPE] = CkMyPe();
+  currentSummary->data[MAX_NumMsgRecvPE] = CkMyPe();
+  currentSummary->data[MAX_BytesMsgRecvPE] = CkMyPe();
+  currentSummary->data[MAX_NumMsgSendPE] = CkMyPe();
+  currentSummary->data[MAX_BytesSendPE] = CkMyPe();
+  currentSummary->data[MaxEntryPE] = CkMyPe();
+  }
+  return currentSummary;
+}
+
+void _createTraceperfReport(char **argv)
+{
+  CkpvInitialize(TraceAutoPerf*, _trace);
+  CkpvAccess(_trace) = new TraceAutoPerf(argv);
+  CkpvAccess(_traces)->addTrace(CkpvAccess(_trace));
+}
diff --git a/src/ck-pics/trace-perf.h b/src/ck-pics/trace-perf.h
new file mode 100644 (file)
index 0000000..912ce4c
--- /dev/null
@@ -0,0 +1,271 @@
+#ifndef __TRACE_PERF__H__
+#define __TRACE_PERF__H__
+#include "charm++.h"
+#include "TopoManager.h"
+#include "envelope.h"
+#include "trace-common.h"
+#include "picsdefs.h"
+#include "picsdefscpp.h"
+#include "picsautoperf.h"
+#include <map>
+
+#define COMPRESS_EVENT_NO         392
+#define DECOMPRESS_EVENT_NO       393
+
+class TraceAutoPerf : public Trace {
+
+  bool isTraceOn;
+
+  TopoManager tmgr;
+
+  ObjectLoadMap_t objectLoads;
+#if CMK_HAS_COUNTER_PAPI
+  LONG_LONG_PAPI previous_papiValues[NUMPAPIEVENTS];
+  LONG_LONG_PAPI papiValues[NUMPAPIEVENTS];
+#endif
+  double  lastBeginExecuteTime;
+  int     lastbeginMessageSize;
+  int     lastEvent;
+  /** The start of the idle region */
+  double  lastBeginIdle;
+  int     numNewObjects;
+
+  /** Amount of time spent so far in untraced regions */
+  double totalUntracedTime;
+
+  /** When tracing was suspended (0 if not currently suspended) */
+  double whenStoppedTracing;
+
+  /** The amount of time spent executing entry methods since we last reset the counters */
+  double totalEntryMethodTime;
+  double totalEntryMethodTime_1;
+  double totalEntryMethodTime_2;
+
+  double appWorkStartTimer;
+  /** the amount of application useful work, need app knowledge */
+  double totalAppTime;
+  double tuneOverheadTotalTime;
+
+  double startTimer;
+
+  double tuneOverheadStartTimer;
+
+  /** The amount of time spent idle since we last reset the counters */
+  double totalIdleTime;
+
+  /* * maximum excution time of a single entry method */
+  double maxEntryTime;
+  double maxEntryTime_1;
+  double maxEntryTime_2;
+  int    maxEntryIdx;
+  int    maxEntryIdx_1;
+  int    maxEntryIdx_2;
+
+  /*  maximum execution time of a single object  */
+  /*  obj load map */
+  void *currentObject;
+
+  int currentEP;
+
+  int currentAID;
+  int currentIDX;
+
+  /** The highest seen memory usage  since we last reset the counters */
+  double memUsage;
+
+  /** The number of entry method invocations since we last reset the counters */
+  long totalEntryMethodInvocations;
+  long totalEntryMethodInvocations_1;
+  long totalEntryMethodInvocations_2;
+
+  /** The time we last rest the counters */
+  double lastResetTime;
+
+  double phaseEndTime;
+
+  /* * summary data */
+  PerfData *currentSummary;
+  PerfData *currentTraceData;
+
+  int currentGroupID;
+  CkArrayIndex currentIndex;
+
+  // In some programs like Changa, entry methods may be nested, and hence we only want to consider the outermost one
+  int nesting_level;
+
+public:
+  TraceAutoPerf(char **argv);
+
+  //begin/end tracing
+  void traceBegin(void);
+  void traceEnd(void);
+
+
+  // a user event has just occured
+  void userEvent(int eventID);
+  // a pair of begin/end user event has just occured
+  void userBracketEvent(int eventID, double bt, double et);
+  void beginAppWork();
+  void endAppWork();
+  void countNewChare();
+
+  void beginTuneOverhead();
+  void endTuneOverhead();
+  // "creation" of message(s) - message Sends
+  void creation(envelope *, int epIdx, int num=1);
+  void creationMulticast(envelope *, int epIdx, int num=1, int *pelist=NULL);
+  void creationDone(int num=1);
+
+  void messageRecv(void *env, int pe);
+  void messageSend(void *env, int pe, int size);
+
+  void beginExecute(envelope *, void*);
+  void beginExecute(CmiObjId *tid);
+
+  void beginExecute(
+    envelope* env,
+    int event,   // event type defined in trace-common.h
+    int msgType, // message type
+    int ep,      // Charm++ entry point id
+    int srcPe,   // Which PE originated the call
+    int ml,      // message size
+    CmiObjId* idx);    // index
+
+  void beginExecute(
+    int event,   // event type defined in trace-common.h
+    int msgType, // message type
+    int ep,      // Charm++ entry point id
+    int srcPe,   // Which PE originated the call
+    int ml,      // message size
+    CmiObjId* idx,
+    void* obj);    // index
+  void endExecute(void);
+
+  // begin/end idle time for this pe
+  void beginIdle(double curWallTime);
+  void endIdle(double curWallTime);
+
+  // begin/end of execution
+  void beginComputation(void);
+  void endComputation(void);
+
+  /* Memory tracing */
+  void malloc(void *where, int size, void **stack, int stackSize);
+  void free(void *where, int size);
+
+  // do any clean-up necessary for tracing
+  void traceClose();
+
+  // ==================================================================
+  /** reset the idle time and entry method execution time accumulators */
+  void resetTimings();
+  /** Reset the idle, overhead, and memory measurements */
+  void resetAll();
+  void endPhase();
+  void startPhase(int step, int id);
+  void startStep(bool analysis);
+  void endStep(bool analysis);
+
+  /** Fraction of the time spent idle since resetting the counters */
+
+  inline double idleRatio(){
+    if(lastEvent == BEGIN_IDLE)
+      totalIdleTime += (CkWallTimer() - lastBeginIdle);
+    return (totalIdleTime) / totalTraceTime();
+  }
+
+  inline double idleTime()
+  {
+    if(lastEvent == BEGIN_IDLE)
+      totalIdleTime += (CkWallTimer() - lastBeginIdle);
+    return totalIdleTime;
+  }
+
+  inline double untracedTime(){
+    if(whenStoppedTracing <= 0){
+      return totalUntracedTime;
+    } else {
+      return totalUntracedTime + (phaseEndTime -whenStoppedTracing);
+    }
+  }
+
+  inline double totalTraceTime()
+  {
+    return CkWallTimer() - startTimer;
+  }
+  /** Fraction of time spent as overhead since resetting the counters */
+  inline double overheadRatio(){
+    double t = totalTraceTime();
+    return (t - totalIdleTime - totalEntryMethodTime)/t;
+  }
+
+  inline double overheadTime(){
+    double t = totalTraceTime();
+    return (t - totalIdleTime - totalEntryMethodTime);
+  }
+
+  inline double utilRatio() {
+    double inprogress_time = 0.0;
+    if(lastEvent == BEGIN_PROCESSING)
+      inprogress_time = (CkWallTimer() - lastBeginExecuteTime);
+    return (totalEntryMethodTime + inprogress_time)/ totalTraceTime();
+  }
+
+  inline double utilTime() {
+    double inprogress_time = 0.0;
+    if(lastEvent == BEGIN_PROCESSING)
+      inprogress_time = (CkWallTimer() - lastBeginExecuteTime);
+    return (totalEntryMethodTime + inprogress_time);
+  }
+
+  inline double appRatio() {
+    return totalAppTime/ totalTraceTime();
+  }
+
+  inline double appTime() {
+    return totalAppTime;
+  }
+  /** Highest memory usage (in MB) value we've seen since resetting the counters */
+  inline double memoryUsageMB(){
+    return ((double)memUsage) / 1024.0 / 1024.0;
+  }
+
+  /** Determine the average grain size since last reset of counters */
+  inline double grainSize(){
+    return (double)totalEntryMethodTime / totalEntryMethodInvocations;
+  }
+
+  inline double maxGrainSize() {
+    return maxEntryTime;
+  }
+
+  void summarizeObjectInfo(double &maxtime, double &totaltime, double &maxMsgCount, double &totalMsgCount, double &maxMsgSize, double &totalMsgSize, double &numObjs) ;
+
+
+  inline long numInvocations() {
+    return totalEntryMethodInvocations;
+  }
+
+#if CMK_HAS_COUNTER_PAPI
+  inline void readPAPI()
+  {
+    if (PAPI_read(CkpvAccess(papiEventSet), CkpvAccess(papiValues)) != PAPI_OK) {
+      CmiAbort("PAPI failed to read at begin execute!\n");
+    }
+  }
+#endif
+
+  PerfData* getSummary();
+  void printSummary();
+
+  void setTraceOn(bool b) {
+    isTraceOn = b;
+  }
+
+  bool getTraceOn() { return isTraceOn;}
+};
+
+
+TraceAutoPerf* localAutoPerfTracingInstance();
+
+#endif
index 6274097c5bf59e8a8e214aa79480bfab5fb95f8d..ecbc1ab8490fc5bfeb3514f1534a5d150837abaa 100644 (file)
@@ -19,6 +19,8 @@ void traceAwaken(CthThread t);
 void traceUserEvent(int);
 void beginAppWork();
 void endAppWork();
+void beginTuneOverhead();
+void endTuneOverhead();
 void traceUserBracketEvent(int, double, double);
 void traceUserSuppliedData(int);
 void traceUserSuppliedBracketedNote(const char *note, int eventID, double bt, double et);
index 6c1f0c6fc6787f6bbd9a960193402580a788c25c..6cae8e19d55e254312e05715bd245cef4b4a50e6 100644 (file)
@@ -50,6 +50,7 @@ NullLB.decl.h NullLB.def.h: NullLB.ci.stamp
 OrbLB.decl.h OrbLB.def.h: OrbLB.ci.stamp
 PathHistory.decl.h PathHistory.def.h: pathHistory.ci.stamp
 PhasebyArrayLB.decl.h PhasebyArrayLB.def.h: PhasebyArrayLB.ci.stamp
+TraceAutoPerf.decl.h TraceAutoPerf.def.h: picsautoperf.ci.stamp
 RandCentLB.decl.h RandCentLB.def.h: RandCentLB.ci.stamp
 RecBipartLB.decl.h RecBipartLB.def.h: RecBipartLB.ci.stamp
 RecBisectBfLB.decl.h RecBisectBfLB.def.h: RecBisectBfLB.ci.stamp
index d53c408e05b22222dc587b759560df84f58f2f3d..98d062043c2d122e60c90f75af0333ae9c5dac5a 100644 (file)
@@ -250,7 +250,7 @@ CKHEADERS=ck.h ckstream.h objid.h envelope.h init.h qd.h charm.h charm++.h \
           BaseLB.decl.h \
          NborBaseLB.decl.h DistBaseLB.decl.h \
           HybridBaseLB.decl.h EveryLB.decl.h CommonLBs.decl.h \
-          TraceSummary.decl.h TraceProjections.decl.h \
+          TraceSummary.decl.h TraceAutoPerf.decl.h TraceProjections.decl.h \
           TraceSimple.decl.h TraceControlPoints.decl.h TraceTau.decl.h \
          TraceUtilization.decl.h BlueGene.decl.h \
          ckdirect.h \
@@ -259,6 +259,9 @@ CKHEADERS=ck.h ckstream.h objid.h envelope.h init.h qd.h charm.h charm++.h \
           controlPoints.h controlPointsf.h arrayRedistributor.h cp_effects.h register.h stats.h   \
          cksequence_internal.h cksequence_factory.h random_sequence.h strided_sequence.h \
          mpi-interoperate.h mpi_main.decl.h  \
+                       trace-perf.h picsdefs.h  picsautoperf.h picstreenode.h \
+                       picsdecisiontree.h \
+           picsautoperfAPI.h picsautoperfAPIC.h \
          $(CVHEADERS)
 
 BLUE_HEADERS = blue_types.h bigsim_timing.h blue_defs.h bigsim_logs.h blue.h blue_impl.h bigsim_network.h bigsim_record.h bigsim_ooc.h bigsim_debug.h shared-alloc.h
@@ -318,6 +321,7 @@ dirs+sources:
        ./gatherflat $(SRCBASE)/conv-perf       .
        ./gatherflat $(SRCBASE)/ck-core          .
        ./gatherflat $(SRCBASE)/ck-perf          .
+       ./gatherflat $(SRCBASE)/ck-pics          .
        ./gatherflat $(SRCBASE)/ck-tune          .
        ./gatherflat $(SRCBASE)/ck-ldb           .
        ./gatherflat $(SRCBASE)/ck-cp            .
@@ -342,6 +346,8 @@ dirs+sources:
        chmod +x charmc
        -./system_ln  ../tmp/charmc ../bin/
        -./system_ln  $(SRCBASE)/scripts/testrun ../bin/
+       -./system_ln  ../tmp/tree.txt ../bin/
+       -./system_ln  ../tmp/fuzzytree.txt ../bin/
 #      -ln -s ../../java/bin charmjavabin
 #      -ln -s ../../java/bin ../bin/charmjavabin
 #      -ln -s charmjavabin/* ../bin/
@@ -367,11 +373,11 @@ TRACELIBS = $(L)/libtrace-converse.a $(L)/libtracef_f.a
 BUILD_TRACING=$(shell CHARMINC=.; if test -f ./conv-config.sh; then . ./conv-config.sh; echo $$CMK_TRACE_ENABLED; fi )
 
 ifneq "$(BUILD_TRACING)" "0"
-TRACELIBS += $(L)/libtrace-projections.a  $(L)/libtrace-summary.a  \
+TRACELIBS += $(L)/libtrace-projections.a $(L)/libtrace-controlPoints.a  $(L)/libtrace-summary.a \
              $(L)/libtrace-utilization.a  $(L)/libtrace-simple.a \
              $(L)/libtrace-counter.a $(L)/libtrace-bluegene.a \
             $(L)/libtrace-projector.a $(L)/libtrace-all.a  \
-             $(L)/libtrace-memory.a 
+             $(L)/libtrace-memory.a $(L)/libtrace-perfReport.a
 endif
 
 MEMLIBS=$(L)/libmemory-default.a $(L)/libmemory-os.a $(L)/libmemory-gnu.a \
@@ -641,6 +647,14 @@ LIBTRACE_PROJ=trace-projections.o
 $(L)/libtrace-projections.a: $(LIBTRACE_PROJ)
        $(CHARMC) -o $@ $(LIBTRACE_PROJ)
 
+LIBTRACE_CP=trace-controlPoints.o
+$(L)/libtrace-controlPoints.a: $(LIBTRACE_CP)
+       $(CHARMC) -o $@ $(LIBTRACE_CP)
+
+LIBTRACE_AP=trace-perf.o picsautoperf.o picsautoperfAPI.o picstreenode.o picsdecisiontree.o
+$(L)/libtrace-perfReport.a: $(LIBTRACE_AP)
+       $(CHARMC) -o $@ $(LIBTRACE_AP)
+
 LIBTRACE_SUMM=trace-summary.o
 $(L)/libtrace-summary.a: $(LIBTRACE_SUMM)
        $(CHARMC) -o $@ $(LIBTRACE_SUMM)
@@ -690,7 +704,7 @@ LIBTRACE_MEMORY=trace-memory.o
 $(L)/libtrace-memory.a: $(LIBTRACE_MEMORY)
        $(CHARMC) -o $@ $(LIBTRACE_MEMORY)
 
-LIBTRACE_ALL=trace-all.o trace-projections.o trace-summary.o trace-simple.o \
+LIBTRACE_ALL=trace-all.o trace-projections.o trace-controlPoints.o picstreenode.o picsdecisiontree.o picsautoperfAPI.o picsautoperf.o trace-perf.o trace-summary.o trace-simple.o  \
 $(TAU_TRACE_OBJ) trace-projector.o traceCore.o traceCoreCommon.o charmProjections.o converseProjections.o machineProjections.o trace-memory.o trace-utilization.o
 
 $(L)/libtrace-all.a: $(LIBTRACE_ALL)
@@ -712,7 +726,7 @@ tracef_f.o: tracef_f.f90
        -$(CHARMC) -o $@ -c tracef_f.f90 && $(CHARMC) -cpmod ../include tracemod.M  || touch $@
 
 # used for make depends
-TRACE_OBJS =  trace-projections.o  trace-summary.o  trace-simple.o \
+TRACE_OBJS =  trace-projections.o trace-controlPoints.o picstreenode.o picsdecisiontree.o trace-perf.o picsautoperfAPI.o picsautoperf.o trace-summary.o  trace-simple.o \
              trace-counter.o trace-utilization.o       \
              trace-bluegene.o trace-projector.o trace-converse.o trace-all.o \
           trace-memory.o 
index 96d9bd34f1a28eccb089ae7880b63061db0a0fe8..f9273b66daeddb88a8e302952147779c2abb0e23 100755 (executable)
@@ -1798,9 +1798,16 @@ for trace in $TRACEMODE; do
     elif test $trace = "controlPoints"
     then
       echo "  extern void _registerTraceControlPoints();" >> $modInitSrc
+    elif test $trace = "perfReport"
+    then
+      Do $CP $CHARMBIN/fuzzytree.txt fuzzytree.txt
+      Do $CP $CHARMBIN/tree.txt tree.txt
+      echo "  extern void _registerTraceAutoPerf();" >> $modInitSrc
+
     elif test $trace = "all"
     then
       echo "  extern void _registerTraceProjections();" >> $modInitSrc
+      echo "  extern void _registerTraceControlPoints();" >> $modInitSrc
       echo "  extern void _registerTraceSummary();" >> $modInitSrc
     fi
 done
@@ -1833,9 +1840,13 @@ for trace in $TRACEMODE; do
     elif test $trace = "controlPoints"
     then
       echo "  _registerTraceControlPoints();" >> $modInitSrc
+    elif test $trace = "perfReport"
+    then
+      echo "  _registerTraceAutoPerf();" >> $modInitSrc
     elif test $trace = "all"
     then
       echo "  _registerTraceProjections();" >> $modInitSrc
+      echo "  _registerTraceControlPoints();" >> $modInitSrc
       echo "  _registerTraceSummary();" >> $modInitSrc
     fi
 done
diff --git a/src/util/fuzzytree.txt b/src/util/fuzzytree.txt
new file mode 100644 (file)
index 0000000..6e61978
--- /dev/null
@@ -0,0 +1,25 @@
+#inter-leaf key                 avg-min-max     key_in_int              OP      flag        value/index     symbol  parent
+-1      Root
+0       CPU_Util                AVG            UtilizationPercentage    SUB         -1          0.9      GT     1   Root 
+0       High_Overhead           AVG            OverheadPercentage       SUB         -1          0.1      GT     1   Root
+0       High_Idle               AVG             IdlePercentage          SUB         -1          0.1      GT     1   Root
+0       Small_Message           AVG             BytesPerMsg             SUB         -1          300      LT     1   High_Overhead 
+0       High_CacheMissRate      AVG             CacheMissRate           SUB         -1          0.1      GT     1   CPU_Util 
+0       LOW_CacheMissRate       AVG             CacheMissRate           SUB         -1          0.1      LT     1   CPU_Util 
+0       Few_Obj_Per_PE          AVG             NumObjectsPerPE         SUB         -1          1        LT     1   High_Idle 
+0       Large_Bytes_Per_Obj     MAX             BytesPerObject          DIV         0           AVG       NumMsgRecv  GT    1.2      1      High_Idle 
+0       Large_Bytes_PerMsg      AVG             BytesPerMsg             SUB         -1          100000      GT  1   High_Idle 
+0       Load_Imbalance          MAX             LoadPerPE               DIV         0           AVG       LoadPerPE         GT   1.1      1      High_Idle     
+0       Comm_Imbalance          MAX             NumMsgRecv              DIV         0           AVG       NumMsgRecv        GT   1.5      1      High_Idle
+0       Much_External_Comm      AVG             ExternalBytePerPE       DIV         0           AVG       BytesMsgRecv      GT   0.8      2      High_Idle  High_Overhead    
+1       Leaf_Up_Grainsize       UP              PICS_EFF_GRAINSIZE      2           Small_Message  High_Overhead
+1       Leaf_Up_Grainsize_1     UP              PICS_EFF_GRAINSIZE_1    1           High_Overhead
+1       Leaf_Up_Grainsize_2     UP              PICS_EFF_GRAINSIZE_2    1           High_Overhead
+1       Leaf_Down_Grainsize     DOWN            PICS_EFF_GRAINSIZE      3           Few_Obj_Per_PE High_CacheMissRate Load_Imbalance
+1       Leaf_Down_LDB_Frequency DOWN            PICS_EFF_LDBFREQUENCY   1           High_Overhead
+1       Leaf_Up_Nodesize        UP              PICS_EFF_NODESIZE       1           Much_External_Comm
+1       Leaf_Down_Aggregation   DOWN            PICS_EFF_AGGREGATION    1           Large_Bytes_PerMsg   
+1       Leaf_Down_Msgsize       DOWN            PICS_EFF_MESSAGESIZE    1           Large_Bytes_PerMsg
+1       Leaf_Up_Compression     UP              PICS_EFF_COMPRESSION    1           Large_Bytes_PerMsg   
+1       Leaf_Perf_Good          UP              PICS_EFF_PERFGOOD       1           LOW_CacheMissRate
+1       Leaf_Up_Replica         UP              PICS_EFF_REPLICA        1           Large_Bytes_Per_Obj 
diff --git a/src/util/tree.txt b/src/util/tree.txt
new file mode 100644 (file)
index 0000000..2e70993
--- /dev/null
@@ -0,0 +1,28 @@
+#inter-leaf key                 avg-min-max     key_in_int              OP      flag        value/index     symbol  parent
+-1      Root
+0       CPU_Util                AVG            UtilizationPercentage    SUB         -1          0.90      GT     1   Root 
+0       High_Overhead           AVG            OverheadPercentage       SUB         -1          0.1      GT     1   Root
+0       High_Idle               AVG             IdlePercentage          SUB         -1          0.1      GT     1   Root
+0       Small_Message           AVG             BytesPerMsg             SUB         -1          300      LT     1   High_Overhead 
+0       High_CacheMissRate      AVG             CacheMissRate           SUB         -1          0.1      GT     1   CPU_Util 
+0       LOW_CacheMissRate       AVG             CacheMissRate           SUB         -1          0.1      LT     1   CPU_Util 
+0       Small_Entry             AVG             EntryMethodDuration     SUB         -1          0.00006  LT     1   High_Overhead 
+0       Small_Entry_1           AVG             EntryMethodDuration_1   SUB         -1          0.00006  LT     1   High_Overhead 
+0       Small_Entry_2           AVG             EntryMethodDuration_2   SUB         -1          0.00006  LT     1   High_Overhead 
+0       Long_Entry              MAX             EntryMethodDuration     DIV         0           AVG       LoadPerPE        GT    1.2      1      High_Idle 
+0       Long_Entry_1            MAX             EntryMethodDuration_1   DIV         0           AVG       LoadPerPE        GT    1.2      1      High_Idle 
+0       Long_Entry_2            MAX             EntryMethodDuration_2   DIV         0           AVG       LoadPerPE        GT    1.2      1      High_Idle 
+0       Long_Object             MAX             LoadPerObject           DIV         0           AVG       LoadPerPE        GT    1.2      1      High_Idle 
+0       Load_Imbalance          MAX             LoadPerPE               DIV         0           AVG       LoadPerPE         GT   1.1      1      High_Idle     
+0       Long_Critical_Path      MAX             CriticalPathLength      DIV         0           AVG       LoadPerPE         GT   1.2      1      High_Idle    
+0       FEW_INVOC               AVG             NumInvocations          SUB         -1          1         LT      1       High_Idle
+0       FEW_ENTRIES             AVG             EntryMethodDuration     SUB         0           AVG       LoadPerPE        GT    0        1      High_Idle
+1       Leaf_Up_Grainsize       UP              PICS_EFF_GRAINSIZE      1           Small_Entry 
+1       Leaf_Down_Grainsize     DOWN            PICS_EFF_GRAINSIZE      5           Long_Entry  Long_Object    Long_Critical_Path   FEW_ENTRIES FEW_INVOC
+1       Leaf_Up_Aggregation     UP              PICS_EFF_AGGREGATION    1           Small_Message
+1       Leaf_Up_LDB_Frequency   UP              PICS_EFF_LDBFREQUENCY   1           Load_Imbalance
+1       Leaf_Up_Msgsize         UP              PICS_EFF_MESSAGESIZE    1           Small_Message
+1       Leaf_Down_Grainsize_1   DOWN            PICS_EFF_GRAINSIZE_1    1           Long_Entry_1  
+1       Leaf_Down_Grainsize_2   DOWN            PICS_EFF_GRAINSIZE_2    1           Long_Entry_2  
+1       Leaf_Up_Grainsize_1     UP              PICS_EFF_GRAINSIZE_1    1           Small_Entry_1
+1       Leaf_Up_Grainsize_2     UP              PICS_EFF_GRAINSIZE_2    1           Small_Entry_2