add auto performance analysis tracemode, currently it performs post moterm analysis
authorYanhuaSun <sun51@illinois.edu>
Wed, 11 Sep 2013 04:27:55 +0000 (23:27 -0500)
committerYanhuaSun <sun51@illinois.edu>
Wed, 11 Sep 2013 04:27:55 +0000 (23:27 -0500)
src/ck-perf/trace-all.C
src/ck-tune/autoPerfAPI.C [new file with mode: 0644]
src/ck-tune/autoPerfAPI.h [new file with mode: 0644]
src/ck-tune/trace-autoPerf.C [new file with mode: 0644]
src/ck-tune/trace-autoPerf.ci [new file with mode: 0644]
src/ck-tune/trace-autoPerf.h [new file with mode: 0644]
src/ck-tune/trace-perf.C [new file with mode: 0644]
src/scripts/Make.cidepends
src/scripts/Make.depends
src/scripts/Makefile
src/scripts/charmc

index 4bcd93a9b7806f95cd9979f3f2bf44671800c60e..74b271b681d25bd4872cd636ea8d4d53ff04a3bc 100644 (file)
@@ -1,12 +1,13 @@
-
 extern void _createTraceprojections(char **argv);
 extern void _createTracesummary(char **argv);
 extern void _createTraceprojector(char **argv);
+extern void _createTraceautoPerf(char **argv);
 
 void _createTraceall(char **argv)
 {
   _createTraceprojections(argv);
   _createTracesummary(argv);
+  _createTraceautoPerf(argv);
   _createTraceprojector(argv);
 }
 
diff --git a/src/ck-tune/autoPerfAPI.C b/src/ck-tune/autoPerfAPI.C
new file mode 100644 (file)
index 0000000..c3ccc52
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * =====================================================================================
+ *
+ *       Filename:  autoPerfAPI.C
+ *
+ *    Description: API for users to use Control Points 
+ *
+ *        Version:  1.0
+ *        Created:  03/03/2013 05:25:52 PM
+ *       Revision:  none
+ *       Compiler:  gcc
+ *
+ *         Author:  Yanhua Sun(), 
+ *   Organization:  uiuc
+ *
+ * =====================================================================================
+ */
+
+#include "trace-autoPerf.h"
+#include "autoPerfAPI.h"
+#define PERF_FREQUENCY 1
+#define   CP_PERIOD  100
+
+CkpvDeclare(int, availAnalyzeCP);
+CksvDeclare(int, availAnalyzeNodeCP);
+CkpvDeclare(int, hasPendingAnalysis);
+CkpvDeclare(int, currentStep);
+CkpvDeclare(CkCallback, callBackAutoPerfDone);
+
+void autoPerfGlobalNextStep( )
+{
+    CkpvAccess(currentStep)++;
+    if(CkpvAccess(currentStep) % PERF_FREQUENCY == 0)
+        autoPerfProxy.timeStep(CkMyPe());
+    else
+        CkpvAccess(callBackAutoPerfDone).send(); 
+}
+
+void autoPerfLocalNextStep( )
+{
+    CkpvAccess(currentStep)++;
+    if(CkpvAccess(currentStep) % PERF_FREQUENCY == 0)
+        autoPerfProxy.ckLocalBranch()->timeStep(CkMyPe());
+    else
+        CkpvAccess(callBackAutoPerfDone).send(); 
+}
+
+void startAnalysisonIdle()
+{
+    if(traceAutoPerfGID.idx !=0 && ((CkGroupID)autoPerfProxy).idx != 0 && CksvAccess(availAnalyzeNodeCP) == 1 && CkpvAccess(hasPendingAnalysis) == 0 )
+    {
+        CksvAccess(availAnalyzeNodeCP) = 0;
+        CcdCallFnAfterOnPE((CcdVoidFn)autoPerfReset, NULL, CP_PERIOD, CkMyPe());
+        autoPerfProxy.ckLocalBranch()->localPerfQuery();
+    }
+}
+
+void autoPerfReset()
+{
+        CksvAccess(availAnalyzeNodeCP) = 1;
+}
+
+void setNoPendingAnalysis()
+{
+    CkpvAccess(hasPendingAnalysis) = 0;
+}
+
+void registerAutoPerfDone(CkCallback cb, bool frameworkShouldAdvancePhase){
+    CkAssert(CkMyPe() == 0);
+    autoPerfProxy.setAutoPerfDoneCallback(cb, frameworkShouldAdvancePhase);
+}
+
diff --git a/src/ck-tune/autoPerfAPI.h b/src/ck-tune/autoPerfAPI.h
new file mode 100644 (file)
index 0000000..1dec7b3
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef __AUTOPERFAPI__H__
+#define __AUTOPERFAPI__H__
+
+//three types of applications to start analysis
+//global barrier for each time step
+void autoPerfGlobalNextStep( );
+
+//local time step
+void autoPerfLocalNextStep( );
+
+//no timestep, analysis starts when idle
+void startAnalysisonIdle();
+void autoPerfReset();
+
+void registerAutoPerfDone(CkCallback cb, bool frameworkShouldAdvancePhase);
+
+#endif
diff --git a/src/ck-tune/trace-autoPerf.C b/src/ck-tune/trace-autoPerf.C
new file mode 100644 (file)
index 0000000..90da9fc
--- /dev/null
@@ -0,0 +1,253 @@
+#include "charm++.h"
+#include "TraceAutoPerf.decl.h"
+#include "trace-autoPerf.h"
+#include <algorithm>
+#include <math.h>
+#define TRIGGER_PERF_IDLE_PERCENTAGE 0.1 
+
+#define SMP_ANALYSIS  0 
+#define DEBUG_LEVEL 0
+#define   CP_PERIOD  100
+
+#define TIMESTEP_RATIO_THRESHOLD 0
+
+#define UTIL_PERCENTAGE   0.95
+
+#if 0 
+#define DEBUG_PRINT(x) x  
+#else
+#define DEBUG_PRINT(x) 
+#endif
+
+// trace functions here
+#include "trace-perf.C"
+CkpvDeclare(savedPerfDatabase*, perfDatabase);
+CkpvExtern(int, availAnalyzeCP);
+CksvExtern(int, availAnalyzeNodeCP);
+CkpvExtern(int, hasPendingAnalysis);
+CkpvExtern(int, currentStep);
+CkpvExtern(CkCallback, callBackAutoPerfDone);
+CkGroupID traceAutoPerfGID;
+CProxy_TraceAutoPerfBOC autoPerfProxy;
+CProxy_TraceNodeAutoPerfBOC autoPerfNodeProxy;
+extern void setNoPendingAnalysis();
+extern void startAnalysisonIdle();
+extern void autoPerfReset();
+//-----------------------utility functions ----------------------
+//Reduce summary data
+CkReductionMsg *perfDataReduction(int nMsg,CkReductionMsg **msgs){
+    perfData *ret;
+    if(nMsg > 0){
+        ret=(perfData*)msgs[0]->getData();
+    }
+    for (int i=1;i<nMsg;i++) {
+        perfData *m=(perfData*)(msgs[i]->getData());
+        // idle time (min/s$um/max)
+        ret->idleMin = min(ret->idleMin, m->idleMin);
+        ret->idleTotalTime += m->idleTotalTime; 
+        ret->idleMax = max(ret->idleMax, m->idleMax);
+        // overhead time (min/sum/max)
+        ret->overheadMin = min(ret->overheadMin, m->overheadMin);
+        ret->overheadTotalTime += m->overheadTotalTime; 
+        ret->overheadMax = max(ret->overheadMax, m->overheadMax);
+        // util time (min/sum/max)
+        ret->utilMin = min(ret->utilMin, m->utilMin);
+        ret->utilTotalTime += m->utilTotalTime; 
+        ret->utilMax = max(ret->utilMax, m->utilMax);
+        // mem usage (max)
+        ret->mem =max(ret->mem,m->mem);
+        // bytes per invocation for two types of entry methods
+        ret->numMsgs += m->numMsgs; 
+        ret->numBytes += m->numBytes; 
+        ret->commTime += m->commTime; 
+        // Grain size (avg, max)
+        ret->grainsizeAvg += m->grainsizeAvg;
+        ret->grainsizeMax = max(ret->grainsizeMax, m->grainsizeMax);
+        //Total invocations
+        ret->numInvocations += m->numInvocations;
+        ret->objLoadMax = max(ret->objLoadMax, m->objLoadMax);
+    }  
+    CkReductionMsg *msg= CkReductionMsg::buildNew(sizeof(perfData),ret); 
+    return msg;
+}
+
+TraceAutoPerfInit::TraceAutoPerfInit(CkArgMsg* args)
+{
+    traceAutoPerfGID = CProxy_TraceAutoPerfBOC::ckNew();
+    autoPerfProxy = CProxy_TraceAutoPerfBOC::ckNew();
+    autoPerfNodeProxy = CProxy_TraceNodeAutoPerfBOC::ckNew();
+    bool isIdleAnalysis = CmiGetArgFlagDesc(args->argv,"+idleAnalysis","start performance analysis when idle");
+    if(isIdleAnalysis){
+        CcdCallOnConditionKeep(CcdPROCESSOR_STILL_IDLE,(CcdVoidFn)startAnalysisonIdle, NULL);
+        CcdCallFnAfterOnPE((CcdVoidFn)autoPerfReset, NULL, 10, CmiMyPe());
+    }
+}
+
+// set the call back function, which is invoked after auto perf is done
+void TraceAutoPerfBOC::setAutoPerfDoneCallback(CkCallback cb, bool frameworkShouldAdvancePhase)
+{
+    CkpvAccess(callBackAutoPerfDone) = cb;
+}
+
+//mark time step
+void TraceNodeAutoPerfBOC::timeStep(int reductionPE)
+{
+    getPerfData(reductionPE, CkCallback::ignore );
+}
+
+CkReduction::reducerType perfDataReductionType;
+void TraceNodeAutoPerfBOC::getPerfData(int reductionPE, CkCallback cb)
+{
+}
+
+void TraceAutoPerfBOC::timeStep(int reductionPE)
+{
+    getPerfData(reductionPE, CkCallback::ignore );
+}
+
+// Collect local perf data and send results to reductionPE
+void TraceAutoPerfBOC::getPerfData(int reductionPE, CkCallback cb)
+{
+    TraceAutoPerf *t = localAutoPerfTracingInstance();
+    t->markStep();
+    perfData * data = t->getSummary();
+    DEBUG_PRINT (
+        t->printSummary();
+        )
+    CkCallback *cb1 = new CkCallback(CkIndex_TraceAutoPerfBOC::globalPerfAnalyze(NULL), thisProxy[reductionPE]);
+    contribute(sizeof(perfData),data,perfDataReductionType, *cb1);
+    t->resetAll();
+    CkpvAccess(hasPendingAnalysis) = 1;
+    CcdCallFnAfterOnPE((CcdVoidFn)setNoPendingAnalysis, NULL, CP_PERIOD, CkMyPe());
+}
+
+//check local idle percentage to decide whether trigger global analysis
+void TraceAutoPerfBOC::localPerfQuery()
+{
+
+    TraceAutoPerf *t = localAutoPerfTracingInstance();
+    double idlePercent = t->checkIdleRatioDuringIdle();
+    CkpvAccess(currentStep)++;
+    if( idlePercent > TRIGGER_PERF_IDLE_PERCENTAGE ) //TUNABLE  
+    {
+        //CkPrintf("\nTIMER:%f PE:%d idle percentage is HIGH start analysis  %.3f\n", TraceTimer(), CkMyPe(),   idlePercent);
+#if SMP_ANALYSIS
+        {
+            for(int i=0; i<CkNumNodes(); i++)
+                autoPerfNodeProxy[i].getPerfData(CkMyNode(), CkCallback::ignore);
+        }
+#else
+        autoPerfProxy.getPerfData(0, CkCallback::ignore);
+        //autoPerfProxy.getPerfData(CkMyPe(), CkCallback::ignore);
+#endif
+    }else if(idlePercent < 0)
+    {
+        TraceAutoPerf *t = localAutoPerfTracingInstance();
+        t->markStep();
+        //CkPrintf("%f PE:%d idle percentage is negative %f\n", TraceTimer(), CkMyPe(), idlePercent);
+    } else
+    {
+        //CkPrintf("%f PE:%d idle percentage is okay  %f\n", TraceTimer(), CkMyPe(),idlePercent);
+    }
+}
+
+//perf data from all processors are collected on one PE, perform analysis based on global data
+void TraceAutoPerfBOC::globalPerfAnalyze(CkReductionMsg *msg )
+{
+    static int counters = 0;
+    int level = 0;
+    //CkPrintf("\n-------------------------global %d  Timer:%f analyzing------- %d \n\n", CkMyPe(), CkWallTimer(), counters++);
+    int size=msg->getSize() / sizeof(double);
+    perfData *data=(perfData*) msg->getData();
+    double totalTime = data->utilTotalTime  + data->idleTotalTime + data->overheadTotalTime ;
+    double idlePercentage = data->idleTotalTime/totalTime;
+    double overheadPercentage = data->overheadTotalTime/totalTime;
+    double utilPercentage = data->utilTotalTime/totalTime;
+    //DEBUG_PRINT ( 
+    CkPrintf("Utilization(%):  \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f)\n", data->utilMin*100, data->utilMax*100, utilPercentage*100 );
+    CkPrintf("Idle(%):         \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) \n", data->idleMin*100,  data->idleMax*100, idlePercentage*100);
+    CkPrintf("Overhead(%):     \t(min:max:avg):(%.1f:\t  %.1f:\t  %.1f) \n", data->overheadMin*100, data->overheadMax*100, overheadPercentage*100);
+    CkPrintf("Grainsize(ms):\t(avg:max)\t: (%.3f:    %.3f) \n", data->utilTotalTime/data->numInvocations*1000, data->grainsizeMax*1000);
+    CkPrintf("Invocations:  \t%lld\n", data->numInvocations);
+    //)
+   
+    // --- time step measurement 
+    double timeElapse = CkWallTimer() - startStepTimer;
+    double avgTimeStep = timeElapse/(CkpvAccess(currentStep) - lastAnalyzeStep);
+    CkpvAccess(perfDatabase)->insert(avgTimeStep, utilPercentage,  idlePercentage, overheadPercentage); 
+    DEBUG_PRINT ( 
+        CkPrintf("-------------- timestep --%d:%d--- \n", CkpvAccess(currentStep),  lastAnalyzeStep);
+        )
+    startStepTimer = CkWallTimer();
+    lastAnalyzeStep = CkpvAccess(currentStep);
+    //check the performance, and decide whether to tune
+    //
+    CkpvAccess(callBackAutoPerfDone).send(); 
+}
+
+/*
+ *  based on the history data, (tunnable parameter values, performance metrics)
+ *  generate a performance model using curve fitting.
+ */
+
+enum  functionType { LINEAR, SECOND_ORDER, THIRD_ORDER };
+
+void TraceAutoPerfBOC::generatePerfModel()
+{
+    // a set of performance results is the function value
+    // a set of tunable parameter values  is the function variable
+    // linear,  second degree polynomial , third degree polynomial 
+    // exponential polynomial fit
+    // GNU scientific library has tools to do this
+
+    int modelType;
+    modelType = LINEAR;
+    switch( modelType)
+    {
+        case LINEAR:
+
+            break;
+
+        case SECOND_ORDER:
+            break;
+
+        case THIRD_ORDER:
+            break;
+
+        default:
+            break;
+    }
+}
+
+extern "C" void traceAutoPerfExitFunction() {
+    CkPrintf("calling before exiting............................\n");
+    autoPerfProxy.timeStep(CkMyPe());
+    //CkExit();
+}
+void _initTraceAutoPerfBOC()
+{
+    perfDataReductionType=CkReduction::addReducer(perfDataReduction);
+
+    CkpvInitialize(int, currentStep);
+    CkpvAccess(currentStep) = 0;
+    CkpvInitialize(int, hasPendingAnalysis);
+    CkpvAccess(hasPendingAnalysis) = 0;
+    CkpvInitialize(CkCallback, callBackAutoPerfDone);
+    CkpvAccess(callBackAutoPerfDone) = CkCallback::ckExit; 
+    CkpvInitialize(savedPerfDatabase*, perfDatabase);
+    CkpvAccess(perfDatabase) = new savedPerfDatabase();
+#ifdef __BIGSIM__
+    if (BgNodeRank()==0) {
+#else               
+    if (CkMyRank() == 0) {
+#endif
+            registerExitFn(traceAutoPerfExitFunction);
+        }
+}
+
+void _initTraceNodeAutoPerfBOC()
+{
+    CksvInitialize(int, availAnalyzeNodeCP);
+    CksvAccess(availAnalyzeNodeCP) = 1;
+}
+#include "TraceAutoPerf.def.h"
diff --git a/src/ck-tune/trace-autoPerf.ci b/src/ck-tune/trace-autoPerf.ci
new file mode 100644 (file)
index 0000000..3332183
--- /dev/null
@@ -0,0 +1,28 @@
+module TraceAutoPerf {
+    mainchare TraceAutoPerfInit {
+        entry TraceAutoPerfInit(CkArgMsg *m);
+    };
+
+    initproc void _initTraceAutoPerfBOC();
+    initnode void _initTraceNodeAutoPerfBOC();
+    readonly CkGroupID traceAutoPerfGID;
+    readonly CProxy_TraceAutoPerfBOC autoPerfProxy;
+    readonly CProxy_TraceNodeAutoPerfBOC autoPerfNodeProxy;
+
+    group [migratable] TraceAutoPerfBOC {
+        entry TraceAutoPerfBOC(void);
+        entry void setAutoPerfDoneCallback(CkCallback cb, bool frameworkShouldAdvancePhase); 
+        entry void timeStep(int reductionPE);
+        entry void getPerfData(int reductionPE, CkCallback cb);
+        entry void globalPerfAnalyze(CkReductionMsg *msg);
+        entry void localPerfQuery();
+    };
+
+    nodegroup TraceNodeAutoPerfBOC {
+        entry TraceNodeAutoPerfBOC(void);
+        entry void timeStep(int reductionPE);
+        entry void getPerfData(int reductionPE, CkCallback cb);
+    };
+
+};
+
diff --git a/src/ck-tune/trace-autoPerf.h b/src/ck-tune/trace-autoPerf.h
new file mode 100644 (file)
index 0000000..d5b4f57
--- /dev/null
@@ -0,0 +1,467 @@
+#ifndef  TRACE__AUTOPERF__H__
+#define  TRACE__AUTOPERF__H__
+#define _VERBOSE_H
+
+#include <stdio.h>
+#include <errno.h>
+#include "charm++.h"
+#include "trace.h"
+#include "envelope.h"
+#include "register.h"
+#include "trace-common.h"
+#include "TraceAutoPerf.decl.h"
+#include "trace-projections.h"
+#include <vector>
+#include <map>
+#include <list>
+
+
+using namespace std;
+
+extern CkGroupID traceAutoPerfGID;
+extern CProxy_TraceAutoPerfBOC autoPerfProxy;
+extern CProxy_TraceNodeAutoPerfBOC autoPerfNodeProxy;
+// class to store performance data on each PE
+
+
+class perfMetric
+{
+public:
+    double timeStep;
+    double utilPercentage;
+    double overheadPercentage;
+    double idlePercentage;
+
+    perfMetric(double step, double util, double idle, double overhead)
+    {
+        timeStep = step;
+        idlePercentage = idle;
+        overheadPercentage = overhead;
+        utilPercentage = util;
+    }
+};
+
+
+class savedPerfDatabase
+{
+private:
+    std::list<perfMetric*> perfList;
+    perfMetric *previous;
+    perfMetric *current;
+
+public:
+    savedPerfDatabase() {}
+
+    void insert(double timestep, double idleP, double utilP, double overheadP) {
+        if(perfList.size() ==0)
+        {
+            previous = current= new perfMetric(timestep, utilP, idleP, overheadP);
+        }
+        else if(perfList.size() < 10)
+        {
+            //only save 10 iterations to save memory
+            previous = (perfMetric*)perfList.back();
+            current = new perfMetric(timestep, utilP, idleP, overheadP);
+        }
+        else
+        {
+            previous = (perfMetric*)perfList.back();
+            current = (perfMetric*) perfList.front();
+            perfList.pop_front();
+            current->timeStep = timestep;
+            current->utilPercentage = utilP;
+            current->idlePercentage = idleP;
+            current->overheadPercentage = overheadP;
+        }
+        perfList.push_back(current);
+    }
+
+    void getData(int i)
+    {
+
+    }
+
+    bool timeStepLonger()
+    {
+        return current->timeStep > previous->timeStep;
+    }
+
+    double getCurrentTimestep()
+    {
+        return current->timeStep; 
+    }
+
+    double getPreviousTimestep()
+    {
+        return previous->timeStep;
+    }
+
+    double getTimestepRatio()
+    {
+        CkPrintf("Time step changes from %f to %f \n", previous->timeStep, current->timeStep);
+        return current->timeStep/previous->timeStep;
+    }
+    
+    double getUtilRatio()
+    {
+       return current->utilPercentage/ previous->utilPercentage; 
+    }
+
+    double getCurrentIdlePercentage()
+    {
+        return current->idlePercentage;
+    }
+    
+    double getPreviousIdlePercentage()
+    {
+        return previous->idlePercentage;
+    }
+
+    double getIdleRatio()
+    {
+        return  current->idlePercentage/previous->idlePercentage;
+    }
+    double getCurrentOverheadPercentage()
+    {
+        return current->overheadPercentage;
+    }
+    
+    double getPreviousOverheadPercentage()
+    {
+        return previous->overheadPercentage;
+    }
+    
+    double getOverheadRatio()
+    {
+        return current->overheadPercentage/previous->overheadPercentage;
+    }
+
+    void getAllTimeSteps(double *y)
+    {
+       int i=0; 
+
+       for(std::list<perfMetric*>::iterator it=perfList.begin(); it != perfList.end(); it++,i++)
+       {
+           y[i] = (*it)->timeStep;
+       }
+    }
+};
+
+
+class perfData 
+{
+public:
+    double idleMin;
+    double idleTotalTime;
+    double idleMax;
+    
+    double utilMin;
+    double utilTotalTime;
+    double utilMax;
+   
+    double overheadMin;
+    double overheadTotalTime;
+    double overheadMax;
+
+    double mem;
+    
+    double grainsizeAvg;
+    double grainsizeMax;
+    
+    long   numInvocations;
+    
+    // communication related data 
+    long    numMsgs;
+    long    numBytes;
+    double  commTime;
+    double  objLoadMax;
+
+    // functions
+    perfData(){}
+};
+
+
+typedef struct {
+    double packing;
+    double unpacking;
+
+} sideSummary_t;
+
+typedef struct{
+    double beginTimer;
+    double endTimer;
+}timerPair;
+
+//map<int, double> ObjectLoadTime;
+
+class TraceAutoPerfInit : public Chare {
+
+public:
+    TraceAutoPerfInit(CkArgMsg*);
+
+    TraceAutoPerfInit(CkMigrateMessage *m):Chare(m) {}
+};
+
+
+class TraceAutoPerfBOC : public CBase_TraceAutoPerfBOC {
+private:
+    int         lastAnalyzeStep;   
+    double      startStepTimer;
+public:
+    TraceAutoPerfBOC() {
+        startStepTimer = CkWallTimer();
+        lastAnalyzeStep = 0;
+    }
+
+    TraceAutoPerfBOC(CkMigrateMessage *m) : CBase_TraceAutoPerfBOC(m) {};
+
+    void pup(PUP::er &p)
+    {
+        CBase_TraceAutoPerfBOC::pup(p);
+    }
+
+    void setAutoPerfDoneCallback(CkCallback cb, bool frameworkShouldAdvancePhase); 
+    void timeStep(int);
+    void getPerfData(int reductionPE, CkCallback cb);
+    void globalPerfAnalyze(CkReductionMsg *msg);
+    void localPerfQuery();
+    void generatePerfModel();
+
+};
+
+//SMP mode
+class TraceNodeAutoPerfBOC : public CBase_TraceNodeAutoPerfBOC {
+
+public:
+    TraceNodeAutoPerfBOC(void) {}
+    TraceNodeAutoPerfBOC(CkMigrateMessage *m) : CBase_TraceNodeAutoPerfBOC(m) {};
+
+    void timeStep(int);
+    void getPerfData(int reductionPE, CkCallback cb);
+
+};
+
+
+class TraceAutoPerf : public Trace {
+
+    friend class TraceAutoPerfBOC;
+
+public:
+
+    double  lastBeginExecuteTime;
+    int     lastbeginMessageSize;
+    int     lastEvent;
+    /** The start of the idle region */
+    double  lastBeginIdle;
+
+    /** Amount of time spent so far in untraced regions */
+    double totalUntracedTime;
+
+    /** When tracing was suspended (0 if not currently suspended) */
+    double whenStoppedTracing;
+
+    /** The amount of time spent executing entry methods since we last reset the counters */
+    double totalEntryMethodTime;
+
+    /** The amount of time spent idle since we last reset the counters */
+    double totalIdleTime;
+
+    /* * maximum excution time of a single entry method */
+    double maxEntryMethodTime;
+
+    /** The highest seen memory usage  since we last reset the counters */
+    double memUsage;
+
+    /** The number of entry method invocations since we last reset the counters */
+    long totalEntryMethodInvocations;
+
+    /** The time we last rest the counters */
+    double lastResetTime;
+
+    double phaseEndTime;
+
+    /* * summary data */
+    perfData *currentSummary; 
+
+    vector<timerPair> phasesTimers;
+
+    int currentGroupID;
+    CkArrayIndex currentIndex;
+    map<int, map<CkArrayIndex, double> > ObjectLoadTime;
+
+    // In some programs like Changa, entry methods may be nested, and hence we only want to consider the outermost one
+    int nesting_level;
+
+    TraceAutoPerf(char **argv);
+  
+    //begin/end tracing
+    void traceBegin(void);
+    void traceEnd(void);
+
+
+  // a user event has just occured
+  void userEvent(int eventID);
+  // a pair of begin/end user event has just occured
+  void userBracketEvent(int eventID, double bt, double et);
+  
+  // "creation" of message(s) - message Sends
+  void creation(envelope *, int epIdx, int num=1);
+  void creationMulticast(envelope *, int epIdx, int num=1, int *pelist=NULL);
+  void creationDone(int num=1);
+  
+  void messageRecv(char *env, int pe);
+  
+  void beginExecute(envelope *);
+  void beginExecute(CmiObjId *tid);
+  
+  void beginExecute(
+            envelope* env,
+            int event,   // event type defined in trace-common.h
+                   int msgType, // message type
+                   int ep,      // Charm++ entry point id
+                   int srcPe,   // Which PE originated the call
+                   int ml,      // message size
+                   CmiObjId* idx);    // index
+
+  
+  void beginExecute(
+                   int event,   // event type defined in trace-common.h
+                   int msgType, // message type
+                   int ep,      // Charm++ entry point id
+                   int srcPe,   // Which PE originated the call
+                   int ml,      // message size
+                   CmiObjId* idx);    // index
+  void endExecute(void);
+  
+  // begin/end idle time for this pe
+  void beginIdle(double curWallTime);
+  void endIdle(double curWallTime);
+  
+  // begin/end of execution
+  void beginComputation(void);
+  void endComputation(void);
+  
+  /* Memory tracing */
+  void malloc(void *where, int size, void **stack, int stackSize);
+  void free(void *where, int size);
+  
+  // do any clean-up necessary for tracing
+  void traceClose();
+
+  // ==================================================================
+  /** reset the idle time and entry method execution time accumulators */
+  void resetTimings();
+  /** Reset the idle, overhead, and memory measurements */
+  void resetAll();
+
+  /*  mark one phase (to record begin and end timer ) */
+  void markStep();
+
+  /** Fraction of the time spent idle since resetting the counters */
+  inline double checkIdleRatioDuringIdle() 
+  {
+      if(lastEvent == BEGIN_IDLE)
+          return (totalIdleTime + TraceTimer() - lastBeginIdle ) / ( TraceTimer()-lastResetTime);  
+      else
+          return (totalIdleTime) / ( TraceTimer()-lastResetTime);  
+
+  }
+
+  inline double idleRatio(){
+      if(lastEvent == BEGIN_IDLE)
+          totalIdleTime += (TraceTimer() - lastBeginIdle);
+      return (totalIdleTime) / totalTraceTime();
+  }
+
+  inline double idleTime()
+  {
+      if(lastEvent == BEGIN_IDLE)
+          totalIdleTime += (TraceTimer() - lastBeginIdle);
+      return totalIdleTime;
+  }
+
+  inline double untracedTime(){
+    if(whenStoppedTracing <= 0){
+      return totalUntracedTime;     
+    } else {
+      return totalUntracedTime + (phaseEndTime -whenStoppedTracing);
+    }
+
+  }
+
+  inline double totalTraceTime()
+  {
+      return phaseEndTime - lastResetTime ;
+      //return phaseEndTime - lastResetTime - untracedTime();
+  }
+  /** Fraction of time spent as overhead since resetting the counters */
+  inline double overheadRatio(){
+    double t = totalTraceTime(); 
+    return (t - totalIdleTime - totalEntryMethodTime)/t; 
+  } 
+
+  inline double overheadTime(){
+    double t = totalTraceTime(); 
+    return (t - totalIdleTime - totalEntryMethodTime); 
+  } 
+
+  inline double utilRatio() {
+      if(lastEvent == BEGIN_PROCESSING)
+          totalEntryMethodTime += (TraceTimer() - lastBeginExecuteTime);
+      return totalEntryMethodTime/ totalTraceTime(); 
+  }
+
+  inline double utilTime() {
+      if(lastEvent == BEGIN_PROCESSING)
+          totalEntryMethodTime += (TraceTimer() - lastBeginExecuteTime);
+      return totalEntryMethodTime; 
+  }
+  /** Highest memory usage (in MB) value we've seen since resetting the counters */
+  inline double memoryUsageMB(){
+    return ((double)memUsage) / 1024.0 / 1024.0;
+  }
+
+  /** Determine the average grain size since last reset of counters */
+  inline double grainSize(){
+    return (double)totalEntryMethodTime / totalEntryMethodInvocations;
+  }
+
+  inline double maxGrainSize() {
+    return maxEntryMethodTime;
+  }
+
+  inline long bytesPerEntry() {
+    return currentSummary->numBytes / currentSummary->numMsgs;
+  }
+   
+  inline long numInvocations() {
+      return totalEntryMethodInvocations;
+  }
+
+  perfData* getSummary()
+  {
+      currentSummary->idleMin = currentSummary->idleMax= idleRatio(); 
+      currentSummary->idleTotalTime = idleTime();
+      currentSummary->utilMin = currentSummary->utilMax = utilRatio(); 
+      currentSummary->utilTotalTime= utilTime();
+      currentSummary->overheadMin = currentSummary->overheadMax = overheadRatio();
+      currentSummary->overheadTotalTime = overheadTime();
+      currentSummary->grainsizeAvg = grainSize();
+      currentSummary->grainsizeMax = maxGrainSize();
+      currentSummary->numInvocations = totalEntryMethodInvocations;
+      return currentSummary;
+  }
+
+  void printSummary()
+  {
+      CkPrintf("################\n");
+      CkPrintf("\t-------%d local data idle:util:overhead %f:%f:%f\n", CkMyPe(), currentSummary->idleMin, currentSummary->utilMin, currentSummary->overheadMin);
+      CkPrintf("################\n");
+  }
+};
+
+
+TraceAutoPerf *localControlPointTracingInstance();
+
+#endif
+
diff --git a/src/ck-tune/trace-perf.C b/src/ck-tune/trace-perf.C
new file mode 100644 (file)
index 0000000..8bbe06e
--- /dev/null
@@ -0,0 +1,187 @@
+CkpvStaticDeclare(TraceAutoPerf*, _trace);
+//-------- group information ---------------------------
+
+TraceAutoPerf *localAutoPerfTracingInstance()
+{
+  return CkpvAccess(_trace);
+}
+
+// instrumentation and analysis 
+TraceAutoPerf::TraceAutoPerf(char **argv) 
+{
+    DEBUG_PRINT( CkPrintf("trace control point resetting %f\n", TraceTimer()); ) 
+    currentSummary = new perfData();  
+    resetTimings();
+    nesting_level = 0;
+    whenStoppedTracing = 0; 
+    if (CkpvAccess(traceOnPe) == 0) return;
+}
+
+void TraceAutoPerf::resetTimings(){
+    totalIdleTime = 0.0;
+    totalEntryMethodTime = 0.0;
+    totalEntryMethodInvocations = 0;
+    lastBeginIdle = lastBeginExecuteTime = lastResetTime = TraceTimer();
+    totalUntracedTime = 0;
+    maxEntryMethodTime = 0;
+    if(whenStoppedTracing !=0){
+        whenStoppedTracing = TraceTimer();
+    }
+
+    currentSummary->numMsgs = 0;
+    currentSummary->numBytes = 0;
+    currentSummary->commTime = 0;
+    currentSummary->objLoadMax = 0;
+}
+
+void TraceAutoPerf::resetAll(){
+    totalIdleTime = 0.0;
+    totalEntryMethodTime = 0.0;
+    memUsage = 0;
+    totalEntryMethodInvocations = 0;
+    lastBeginIdle = lastBeginExecuteTime = lastResetTime = TraceTimer();
+    totalUntracedTime = 0;
+    if(whenStoppedTracing !=0){
+        whenStoppedTracing = TraceTimer();
+    }
+    currentSummary->numMsgs = 0;
+    currentSummary->numBytes = 0;
+    currentSummary->commTime = 0;
+    currentSummary->objLoadMax = 0;
+}
+
+void TraceAutoPerf::traceBegin(void){
+    if(whenStoppedTracing != 0)
+        totalUntracedTime += (TraceTimer() - whenStoppedTracing);
+    whenStoppedTracing = 0;
+}
+
+void TraceAutoPerf::traceEnd(void){
+  CkAssert(whenStoppedTracing == 0); // can't support nested traceEnds on one processor yet...
+  whenStoppedTracing = TraceTimer();
+}
+
+void TraceAutoPerf::userEvent(int eventID) { }
+void TraceAutoPerf::userBracketEvent(int eventID, double bt, double et) { }
+void TraceAutoPerf::creation(envelope *, int epIdx, int num) { } 
+void TraceAutoPerf::creationMulticast(envelope *, int epIdx, int num, int *pelist) { }
+void TraceAutoPerf::creationDone(int num) { }
+void TraceAutoPerf::messageRecv(char *env, int pe) { }
+
+void TraceAutoPerf::beginExecute(CmiObjId *tid)
+{
+    //nesting_level++;
+    lastBeginExecuteTime = TraceTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    lastbeginMessageSize = -1;
+    DEBUG_PRINT( CkPrintf("begin Executing tid   %d  msg(%d:%d) time:%d\n", nesting_level, currentSummary->numMsgs, currentSummary->numBytes, (int)(lastBeginExecuteTime*1000000)); )
+}
+
+void TraceAutoPerf::beginExecute(envelope *env)
+{
+    //nesting_level++;
+    //if(nesting_level == 1){
+    lastBeginExecuteTime = TraceTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    lastbeginMessageSize = env->getTotalsize();
+    currentSummary->numMsgs++;
+    currentSummary->numBytes += lastbeginMessageSize;
+    DEBUG_PRINT( CkPrintf("begin Executing env   %d  msg(%d:%d) time:%d\n", nesting_level, currentSummary->numMsgs, currentSummary->numBytes, (int)(lastBeginExecuteTime*1000000)); )
+}
+
+void TraceAutoPerf::beginExecute(envelope *env, int event,int msgType,int ep,int srcPe, int mlen, CmiObjId *idx)
+{
+    //nesting_level++;
+    //if(nesting_level == 1){
+    lastbeginMessageSize = mlen;
+    currentSummary->numMsgs++;
+    currentSummary->numBytes += lastbeginMessageSize;
+    //`currentSummary->commTime += (env->getRecvTime() - env->getSentTime());
+    lastBeginExecuteTime = TraceTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    DEBUG_PRINT( CkPrintf("begin Executing env  6  %d  msg(%d:%d) time:%d\n", nesting_level, currentSummary->numMsgs, currentSummary->numBytes, (int)(lastBeginExecuteTime*1000000)); )
+}
+
+void TraceAutoPerf::beginExecute(int event,int msgType,int ep,int srcPe, int mlen, CmiObjId *idx)
+{
+    //nesting_level++;
+    //if(nesting_level == 1){
+    lastbeginMessageSize = mlen;
+    lastBeginExecuteTime = TraceTimer();
+    lastEvent =  BEGIN_PROCESSING;
+    DEBUG_PRINT( CkPrintf("begin Executing 6 no env %d  msg(%d:%d) time:%d\n", nesting_level, currentSummary->numMsgs, currentSummary->numBytes, (int)(lastBeginExecuteTime*1000000)); )
+}
+
+void TraceAutoPerf::endExecute(void)
+{
+    //MAYBE a bug
+    //nesting_level--;
+    nesting_level = 0;
+    if(nesting_level == 0){
+        double endTime = TraceTimer() ;
+        double executionTime = endTime - lastBeginExecuteTime;
+        lastEvent =  -1;
+        DEBUG_PRINT( CkPrintf("end executing %d, duration %d\n", (int)(1000000*endTime), (int)(executionTime*1000000)); )
+        totalEntryMethodTime += executionTime;
+        totalEntryMethodInvocations ++;
+        if(executionTime > maxEntryMethodTime)
+            maxEntryMethodTime = executionTime;
+        double m = (double)CmiMemoryUsage();
+        if(memUsage < m){
+            memUsage = m;
+        }    
+    }
+}
+
+void TraceAutoPerf::beginIdle(double curWallTime) {
+    lastBeginIdle =  curWallTime; 
+    lastEvent =  BEGIN_IDLE;
+    double m = (double)CmiMemoryUsage();
+    if(memUsage < m){
+        memUsage = m;
+    }
+}
+
+void TraceAutoPerf::endIdle(double curWallTime) {
+    totalIdleTime += (curWallTime - lastBeginIdle) ;
+    lastEvent =  -1;
+}
+
+void TraceAutoPerf::beginComputation(void) { }
+void TraceAutoPerf::endComputation(void) { }
+
+void TraceAutoPerf::malloc(void *where, int size, void **stack, int stackSize)
+{
+    double m = (double)CmiMemoryUsage();
+    if(memUsage < m){
+        memUsage = m;
+    }
+}
+
+void TraceAutoPerf::free(void *where, int size) { }
+
+void TraceAutoPerf::traceClose(void)
+{
+    CkpvAccess(_traces)->endComputation();
+    CkpvAccess(_traces)->removeTrace(this);
+}
+
+void TraceAutoPerf::markStep()
+{
+    double now = TraceTimer();
+    timerPair newpairs;
+    newpairs.beginTimer = lastResetTime;
+    newpairs.endTimer = now; 
+    phasesTimers.push_back(newpairs);
+    phaseEndTime = now;
+    DEBUG_PRINT ( CkPrintf(" PE %d marking phase  %d at timer:%f traceTimer:%f (%f:%f) \n", CmiMyPe(), phasesTimers.size(), now, TraceTimer(), newpairs.beginTimer,  newpairs.endTimer); )
+
+}
+
+void _createTraceautoPerf(char **argv)
+{
+    CkpvInitialize(TraceAutoPerf*, _trace);
+    CkpvAccess(_trace) = new TraceAutoPerf(argv);
+    CkpvAccess(_traces)->addTrace(CkpvAccess(_trace));
+    //CkPrintf("##### init ####\n");
+}
index 7a7caadcedf43b81fcf518dc8ebb5d0811940fca..ecf9de79fcce0e94971be1362d68b086008433c3 100644 (file)
@@ -67,6 +67,7 @@ TempAwareRefineLB.decl.h TempAwareRefineLB.def.h: TempAwareRefineLB.ci.stamp
 tempo.decl.h tempo.def.h: tempo.ci.stamp
 TopoCentLB.decl.h TopoCentLB.def.h: TopoCentLB.ci.stamp
 TopoLB.decl.h TopoLB.def.h: TopoLB.ci.stamp
+TraceAutoPerf.decl.h TraceAutoPerf.def.h: trace-autoPerf.ci.stamp
 TraceControlPoints.decl.h TraceControlPoints.def.h: trace-controlPoints.ci.stamp
 TraceProjections.decl.h TraceProjections.def.h: trace-projections.ci.stamp
 TraceSimple.decl.h TraceSimple.def.h: trace-simple.ci.stamp
index 3448874acb3fd9db37bb7234a36e9329617774da..358d382b7b3f118583677a231a7db3e4d002143d 100644 (file)
@@ -1927,6 +1927,28 @@ arrayRedistributor.o: arrayRedistributor.C charm++.h charm.h converse.h \
  cp_effects.h ck.h qd.h register.h stats.h TopoManager.h ckarray.h
        $(CHARMC) -c -I. $<
 
+autoPerfAPI.o: autoPerfAPI.C trace-autoPerf.h charm++.h charm.h \
+ converse.h conv-config.h conv-autoconfig.h conv-common.h conv-mach.h \
+ conv-mach-opt.h cmiqueue.h pup_c.h queueing.h conv-cpm.h conv-cpath.h \
+ conv-qd.h conv-random.h conv-lists.h conv-trace.h persistent.h \
+ debug-conv.h pup.h middle.h middle-conv.h cklists.h ckbitvector.h \
+ ckstream.h init.h ckhashtable.h debug-charm.h debug-conv++.h simd.h \
+ ckmessage.h pup.h CkMarshall.decl.h envelope.h middle.h ckarrayindex.h \
+ pup.h ckhashtable.h charm.h objid.h converse.h cklists.h objid.h sdag.h \
+ pup_stl.h envelope.h debug-charm.h ckarrayindex.h cksection.h \
+ ckcallback.h conv-ccs.h sockRoutines.h ccs-server.h ckobjQ.h \
+ ckreduction.h CkReduction.decl.h CkArrayReductionMgr.decl.h \
+ ckmemcheckpoint.h CkMemCheckpoint.decl.h readonly.h ckarray.h \
+ cklocation.h LBDatabase.h lbdb.h LBDBManager.h LBObj.h LBOM.h LBComm.h \
+ LBMachineUtil.h lbdb++.h LBDatabase.decl.h NullLB.decl.h BaseLB.decl.h \
+ MetaBalancer.h MetaBalancer.decl.h CkLocation.decl.h cklocrec.h \
+ ckmigratable.h CkArray.decl.h ckfutures.h CkFutures.decl.h charisma.h \
+ charisma.decl.h tempo.h tempo.decl.h waitqd.h waitqd.decl.h \
+ ckcheckpoint.h ckcallback.h ckevacuation.h ckarrayreductionmgr.h trace.h \
+ trace-bluegene.h register.h trace-common.h TraceAutoPerf.decl.h \
+ trace-projections.h autoPerfAPI.h
+       $(CHARMC) -c -I. $<
+
 bigsim_api.o: bigsim_api.C blue.h converse.h conv-config.h \
  conv-autoconfig.h conv-common.h conv-mach.h conv-mach-opt.h cmiqueue.h \
  pup_c.h queueing.h conv-cpm.h conv-cpath.h conv-qd.h conv-random.h \
@@ -2261,7 +2283,7 @@ ckcheckpoint.o: ckcheckpoint.C charm++.h charm.h converse.h conv-config.h \
 ckdll.o: ckdll.C converse.h conv-config.h conv-autoconfig.h conv-common.h \
  conv-mach.h conv-mach-opt.h cmiqueue.h pup_c.h queueing.h conv-cpm.h \
  conv-cpath.h conv-qd.h conv-random.h conv-lists.h conv-trace.h \
- persistent.h debug-conv.h ckdll.h ckdll_dlopen.C ckdll_system.C
+ persistent.h debug-conv.h ckdll.h ckdll_dlopen.C
        $(CHARMC) -c -I. $<
 
 ckevacuation.o: ckevacuation.C charm++.h charm.h converse.h conv-config.h \
@@ -2572,7 +2594,7 @@ conv-ccs.o: conv-ccs.c converse.h conv-config.h conv-autoconfig.h \
  conv-common.h conv-mach.h conv-mach-opt.h cmiqueue.h pup_c.h queueing.h \
  conv-cpm.h conv-cpath.h conv-qd.h conv-random.h conv-lists.h \
  conv-trace.h persistent.h debug-conv.h conv-ccs.h sockRoutines.h \
- ccs-server.h ckhashtable.h pup.h
+ ccs-server.h ckhashtable.h pup.h ccs-server.c ccs-auth.h ccs-auth.c
        $(CHARMC) -c -I. $<
 
 conv-conds.o: conv-conds.c converse.h conv-config.h conv-autoconfig.h \
@@ -3433,6 +3455,28 @@ topology.o: topology.C cklists.h pup.h converse.h conv-config.h \
 trace-all.o: trace-all.C
        $(CHARMC) -c -I. $<
 
+trace-autoPerf.o: trace-autoPerf.C charm++.h charm.h converse.h \
+ conv-config.h conv-autoconfig.h conv-common.h conv-mach.h \
+ conv-mach-opt.h cmiqueue.h pup_c.h queueing.h conv-cpm.h conv-cpath.h \
+ conv-qd.h conv-random.h conv-lists.h conv-trace.h persistent.h \
+ debug-conv.h pup.h middle.h middle-conv.h cklists.h ckbitvector.h \
+ ckstream.h init.h ckhashtable.h debug-charm.h debug-conv++.h simd.h \
+ ckmessage.h pup.h CkMarshall.decl.h envelope.h middle.h ckarrayindex.h \
+ pup.h ckhashtable.h charm.h objid.h converse.h cklists.h objid.h sdag.h \
+ pup_stl.h envelope.h debug-charm.h ckarrayindex.h cksection.h \
+ ckcallback.h conv-ccs.h sockRoutines.h ccs-server.h ckobjQ.h \
+ ckreduction.h CkReduction.decl.h CkArrayReductionMgr.decl.h \
+ ckmemcheckpoint.h CkMemCheckpoint.decl.h readonly.h ckarray.h \
+ cklocation.h LBDatabase.h lbdb.h LBDBManager.h LBObj.h LBOM.h LBComm.h \
+ LBMachineUtil.h lbdb++.h LBDatabase.decl.h NullLB.decl.h BaseLB.decl.h \
+ MetaBalancer.h MetaBalancer.decl.h CkLocation.decl.h cklocrec.h \
+ ckmigratable.h CkArray.decl.h ckfutures.h CkFutures.decl.h charisma.h \
+ charisma.decl.h tempo.h tempo.decl.h waitqd.h waitqd.decl.h \
+ ckcheckpoint.h ckcallback.h ckevacuation.h ckarrayreductionmgr.h trace.h \
+ trace-bluegene.h TraceAutoPerf.decl.h trace-autoPerf.h register.h \
+ trace-common.h trace-projections.h trace-perf.C TraceAutoPerf.def.h
+       $(CHARMC) -c -I. $<
+
 trace-bluegene.o: trace-bluegene.C charm++.h charm.h converse.h \
  conv-config.h conv-autoconfig.h conv-common.h conv-mach.h \
  conv-mach-opt.h cmiqueue.h pup_c.h queueing.h conv-cpm.h conv-cpath.h \
index 3d3dc48d2abfb8d33b5db3ae9ec2275d2e52f9ee..fdeb4fcf2051447b837232455a0cf27d7106eec1 100644 (file)
@@ -195,6 +195,7 @@ include Make.extlib
 CVHEADERS=cpthreads.h converse.h conv-trace.h conv-random.h conv-qd.h \
       msgq.h queueing.h conv-cpath.h conv-cpm.h persistent.h\
       trace.h trace-common.h trace-bluegene.h trace-projections.h  \
+         trace-autoPerf.h  autoPerfAPI.h \
       trace-simple.h trace-controlPoints.h charm-api.h \
       conv-ccs.h ccs-client.c ccs-client.h \
       ccs-server.h ccs-auth.c ccs-auth.h \
@@ -243,7 +244,7 @@ CKHEADERS=ck.h ckstream.h objid.h envelope.h init.h qd.h charm.h charm++.h \
           BaseLB.decl.h \
          NborBaseLB.decl.h \
           HybridBaseLB.decl.h EveryLB.decl.h CommonLBs.decl.h \
-          charisma.decl.h TraceSummary.decl.h TraceProjections.decl.h \
+          charisma.decl.h TraceSummary.decl.h TraceProjections.decl.h TraceAutoPerf.decl.h \
           TraceSimple.decl.h TraceControlPoints.decl.h TraceTau.decl.h \
          TraceUtilization.decl.h BlueGene.decl.h \
          comlib.decl.h ComlibManager.h DummyStrategy.h \
@@ -316,6 +317,7 @@ dirs+sources:
        ./gatherflat ../../src/ck-core          .
        ./gatherflat ../../src/util             .
        ./gatherflat ../../src/ck-perf          .
+       ./gatherflat ../../src/ck-tune          .
        ./gatherflat ../../src/ck-ldb           .
        ./gatherflat ../../src/ck-com           .
        ./gatherflat ../../src/ck-cp            .
@@ -368,7 +370,7 @@ TRACELIBS += $(L)/libtrace-projections.a  $(L)/libtrace-summary.a  \
              $(L)/libtrace-utilization.a  $(L)/libtrace-simple.a \
              $(L)/libtrace-counter.a $(L)/libtrace-bluegene.a \
             $(L)/libtrace-projector.a $(L)/libtrace-all.a  \
-             $(L)/libtrace-memory.a
+             $(L)/libtrace-memory.a $(L)/libtrace-autoPerf.a
 endif
 
 MEMLIBS=$(L)/libmemory-default.o $(L)/libmemory-os.o $(L)/libmemory-gnu.o \
@@ -711,6 +713,10 @@ LIBTRACE_SIMPLE=trace-simple.o
 $(L)/libtrace-simple.a: $(LIBTRACE_SIMPLE)
        $(CHARMC) -o $@ $(LIBTRACE_SIMPLE)
 
+LIBTRACE_AP=trace-autoPerf.o autoPerfAPI.o
+$(L)/libtrace-autoPerf.a: $(LIBTRACE_AP)
+           $(CHARMC) -o $@ $(LIBTRACE_AP)
+
 libtrace-Tau.o: trace-Tau.C charm++.h charm.h converse.h conv-config.h \
   conv-autoconfig.h conv-common.h conv-mach.h conv-mach-opt.h \
   conv-mach-ifort.h pup_c.h conv-cpm.h conv-cpath.h conv-qd.h \
@@ -749,7 +755,7 @@ LIBTRACE_MEMORY=trace-memory.o
 $(L)/libtrace-memory.a: $(LIBTRACE_MEMORY)
        $(CHARMC) -o $@ $(LIBTRACE_MEMORY)
 
-LIBTRACE_ALL=trace-all.o trace-projections.o trace-summary.o trace-simple.o \
+LIBTRACE_ALL=trace-all.o trace-projections.o trace-summary.o trace-simple.o autoPerfAPI.o trace-autoPerf.o \
 $(TAU_TRACE_OBJ) trace-projector.o traceCore.o traceCoreCommon.o charmProjections.o converseProjections.o machineProjections.o trace-memory.o trace-utilization.o
 
 $(L)/libtrace-all.a: $(LIBTRACE_ALL)
@@ -774,7 +780,7 @@ tracef_f.o: tracef_f.f90
 TRACE_OBJS =  trace-projections.o  trace-summary.o  trace-simple.o \
              trace-counter.o trace-utilization.o       \
              trace-bluegene.o trace-projector.o trace-converse.o trace-all.o \
-          trace-memory.o
+          trace-memory.o autoPerfAPI.o trace-autoPerf.o
 
 ###############################################################################
 #
@@ -1004,7 +1010,7 @@ cidepends: charmxi
        echo '#generated by make cidepends' > $(CIDEPENDFILE); \
        for cifile in $(wildcard *.ci); do \
               echo "checking generated modules for $$cifile" ; \
-              $(CHARMXI) -M $$cifile | sed '/\.stamp:/a \\t $$(CHARMC) -intrinsic $$< && touch $$@' >> $(CIDEPENDFILE) ; \
+              $(CHARMXI) -M  $$cifile | sed '/\.stamp:/a \\t $$(CHARMC) -intrinsic $$< && touch $$@' >> $(CIDEPENDFILE) ; \
         done
 
 depends: cidepends commitid.c
index cb450548c89d5626289c831d50b5a6c43e7bfad3..11832cc21a00e6825e45f55e11885c4a66fd1262 100755 (executable)
@@ -1721,6 +1721,9 @@ for trace in $TRACEMODE; do
     elif test $trace = "controlPoints"
     then
       echo "  extern void _registerTraceControlPoints();" >> $modInitSrc
+    elif test $trace = "autoPerf"
+    then
+      echo "  extern void _registerTraceAutoPerf();" >> $modInitSrc
     elif test $trace = "all"
     then
       echo "  extern void _registerTraceProjections();" >> $modInitSrc
@@ -1756,6 +1759,9 @@ for trace in $TRACEMODE; do
     elif test $trace = "controlPoints"
     then
       echo "  _registerTraceControlPoints();" >> $modInitSrc
+    elif test $trace = "autoPerf"
+    then
+      echo "  _registerTraceAutoPerf();" >> $modInitSrc
     elif test $trace = "all"
     then
       echo "  _registerTraceProjections();" >> $modInitSrc