llfasttimer cleanup

removed unnecessary cache miss from fast timers
renamed llfasttimer_class back to llfasttimer
master
Richard Linden 2012-08-06 16:08:04 -07:00
parent 4d395a0de0
commit a98c7e150b
9 changed files with 406 additions and 552 deletions

View File

@ -53,7 +53,7 @@ set(llcommon_SOURCE_FILES
lleventfilter.cpp
llevents.cpp
lleventtimer.cpp
llfasttimer_class.cpp
llfasttimer.cpp
llfile.cpp
llfindlocale.cpp
llfixedbuffer.cpp
@ -167,7 +167,6 @@ set(llcommon_HEADER_FILES
lleventemitter.h
llextendedstatus.h
llfasttimer.h
llfasttimer_class.h
llfile.h
llfindlocale.h
llfixedbuffer.h

View File

@ -1,5 +1,5 @@
/**
* @file llfasttimer_class.cpp
* @file llfasttimer.cpp
* @brief Implementation of the fast timer.
*
* $LicenseInfo:firstyear=2004&license=viewerlgpl$
@ -64,16 +64,12 @@ BOOL LLFastTimer::sMetricLog = FALSE;
LLMutex* LLFastTimer::sLogLock = NULL;
std::queue<LLSD> LLFastTimer::sLogQueue;
#define USE_RDTSC 0
#if LL_LINUX || LL_SOLARIS
U64 LLFastTimer::sClockResolution = 1000000000; // Nanosecond resolution
#else
U64 LLFastTimer::sClockResolution = 1000000; // Microsecond resolution
#endif
std::vector<LLFastTimer::FrameState>* LLFastTimer::sTimerInfos = NULL;
// FIXME: move these declarations to the relevant modules
// helper functions
@ -106,52 +102,41 @@ static timer_tree_dfs_iterator_t end_timer_tree()
return timer_tree_dfs_iterator_t();
}
// factory class that creates NamedTimers via static DeclareTimer objects
class NamedTimerFactory : public LLSingleton<NamedTimerFactory>
{
public:
NamedTimerFactory()
: mActiveTimerRoot(NULL),
mTimerRoot(NULL),
mAppTimer(NULL),
mRootFrameState(NULL)
: mTimerRoot(NULL)
{}
/*virtual */ void initSingleton()
{
mTimerRoot = new LLFastTimer::NamedTimer("root");
mActiveTimerRoot = new LLFastTimer::NamedTimer("Frame");
mActiveTimerRoot->setCollapsed(false);
mRootFrameState = new LLFastTimer::FrameState(mActiveTimerRoot);
mRootFrameState->mParent = &mTimerRoot->getFrameState();
mActiveTimerRoot->setParent(mTimerRoot);
mAppTimer = new LLFastTimer(mRootFrameState);
mRootFrameState.setNamedTimer(mTimerRoot);
mTimerRoot->setFrameState(&mRootFrameState);
mTimerRoot->mParent = mTimerRoot;
mRootFrameState.mParent = &mRootFrameState;
}
~NamedTimerFactory()
{
std::for_each(mTimers.begin(), mTimers.end(), DeletePairedPointer());
delete mAppTimer;
delete mActiveTimerRoot;
delete mTimerRoot;
delete mRootFrameState;
}
LLFastTimer::NamedTimer& createNamedTimer(const std::string& name)
LLFastTimer::NamedTimer& createNamedTimer(const std::string& name, LLFastTimer::FrameState* state)
{
timer_map_t::iterator found_it = mTimers.find(name);
if (found_it != mTimers.end())
{
llerrs << "Duplicate timer declaration for: " << name << llendl;
return *found_it->second;
}
LLFastTimer::NamedTimer* timer = new LLFastTimer::NamedTimer(name);
timer->setFrameState(state);
timer->setParent(mTimerRoot);
mTimers.insert(std::make_pair(name, timer));
@ -168,10 +153,7 @@ public:
return NULL;
}
LLFastTimer::NamedTimer* getActiveRootTimer() { return mActiveTimerRoot; }
LLFastTimer::NamedTimer* getRootTimer() { return mTimerRoot; }
const LLFastTimer* getAppTimer() { return mAppTimer; }
LLFastTimer::FrameState& getRootFrameState() { return *mRootFrameState; }
typedef std::map<std::string, LLFastTimer::NamedTimer*> timer_map_t;
timer_map_t::iterator beginTimers() { return mTimers.begin(); }
@ -181,55 +163,19 @@ public:
private:
timer_map_t mTimers;
LLFastTimer::NamedTimer* mActiveTimerRoot;
LLFastTimer::NamedTimer* mTimerRoot;
LLFastTimer* mAppTimer;
LLFastTimer::FrameState* mRootFrameState;
LLFastTimer::FrameState mRootFrameState;
};
void update_cached_pointers_if_changed()
{
// detect when elements have moved and update cached pointers
static LLFastTimer::FrameState* sFirstTimerAddress = NULL;
if (&*(LLFastTimer::getFrameStateList().begin()) != sFirstTimerAddress)
{
LLFastTimer::DeclareTimer::updateCachedPointers();
}
sFirstTimerAddress = &*(LLFastTimer::getFrameStateList().begin());
}
LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name, bool open )
: mTimer(NamedTimerFactory::instance().createNamedTimer(name))
: mTimer(NamedTimerFactory::instance().createNamedTimer(name, &mFrameState))
{
mTimer.setCollapsed(!open);
mFrameState = &mTimer.getFrameState();
update_cached_pointers_if_changed();
}
LLFastTimer::DeclareTimer::DeclareTimer(const std::string& name)
: mTimer(NamedTimerFactory::instance().createNamedTimer(name))
: mTimer(NamedTimerFactory::instance().createNamedTimer(name, &mFrameState))
{
mFrameState = &mTimer.getFrameState();
update_cached_pointers_if_changed();
}
// static
void LLFastTimer::DeclareTimer::updateCachedPointers()
{
// propagate frame state pointers to timer declarations
for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
// update cached pointer
it->mFrameState = &it->mTimer.getFrameState();
}
// also update frame states of timers on stack
LLFastTimer* cur_timerp = LLFastTimer::sCurTimerData.mCurTimer;
while(cur_timerp->mLastTimerData.mCurTimer != cur_timerp)
{
cur_timerp->mFrameState = &cur_timerp->mFrameState->mTimer->getFrameState();
cur_timerp = cur_timerp->mLastTimerData.mCurTimer;
}
}
//static
@ -241,7 +187,7 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
#else // windows or x86-mac or x86-linux or x86-solaris
U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
{
#if USE_RDTSC || !LL_WINDOWS
#if LL_FASTTIMER_USE_RDTSC || !LL_WINDOWS
//getCPUFrequency returns MHz and sCPUClockFrequency wants to be in Hz
static U64 sCPUClockFrequency = U64(LLProcessorInfo().getCPUFrequency()*1000000.0);
@ -262,14 +208,13 @@ U64 LLFastTimer::countsPerSecond() // counts per second for the *32-bit* timer
}
#endif
LLFastTimer::FrameState::FrameState(LLFastTimer::NamedTimer* timerp)
LLFastTimer::FrameState::FrameState()
: mActiveCount(0),
mCalls(0),
mSelfTimeCounter(0),
mParent(NULL),
mLastCaller(NULL),
mMoveUpTree(false),
mTimer(timerp)
mMoveUpTree(false)
{}
@ -280,12 +225,9 @@ LLFastTimer::NamedTimer::NamedTimer(const std::string& name)
mTotalTimeCounter(0),
mCountAverage(0),
mCallAverage(0),
mNeedsSorting(false)
mNeedsSorting(false),
mFrameState(NULL)
{
info_list_t& frame_state_list = getFrameStateList();
mFrameStateIndex = frame_state_list.size();
getFrameStateList().push_back(FrameState(this));
mCountHistory = new U32[HISTORY_NUM];
memset(mCountHistory, 0, sizeof(U32) * HISTORY_NUM);
mCallHistory = new U32[HISTORY_NUM];
@ -366,15 +308,6 @@ void LLFastTimer::NamedTimer::processTimes()
accumulateTimings();
}
// sort timer info structs by depth first traversal order
struct SortTimersDFS
{
bool operator()(const LLFastTimer::FrameState& i1, const LLFastTimer::FrameState& i2)
{
return i1.mTimer->getFrameStateIndex() < i2.mTimer->getFrameStateIndex();
}
};
// sort child timers by name
struct SortTimerByName
{
@ -455,7 +388,7 @@ void LLFastTimer::NamedTimer::accumulateTimings()
LLFastTimer* cur_timer = sCurTimerData.mCurTimer;
// root defined by parent pointing to self
CurTimerData* cur_data = &sCurTimerData;
while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer)
{
U32 cumulative_time_delta = cur_time - cur_timer->mStartTime;
U32 self_time_delta = cumulative_time_delta - cur_data->mChildTime;
@ -470,7 +403,7 @@ void LLFastTimer::NamedTimer::accumulateTimings()
}
// traverse tree in DFS post order, or bottom up
for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getActiveRootTimer());
for(timer_tree_bottom_up_iterator_t it = begin_timer_tree_bottom_up(*NamedTimerFactory::instance().getRootTimer());
it != end_timer_tree_bottom_up();
++it)
{
@ -541,27 +474,6 @@ void LLFastTimer::NamedTimer::resetFrame()
}
}
// tag timers by position in depth first traversal of tree
S32 index = 0;
for(timer_tree_dfs_iterator_t it = begin_timer_tree(*NamedTimerFactory::instance().getRootTimer());
it != end_timer_tree();
++it)
{
NamedTimer* timerp = (*it);
timerp->mFrameStateIndex = index;
index++;
llassert_always(timerp->mFrameStateIndex < (S32)getFrameStateList().size());
}
// sort timers by DFS traversal order to improve cache coherency
std::sort(getFrameStateList().begin(), getFrameStateList().end(), SortTimersDFS());
// update pointers into framestatelist now that we've sorted it
DeclareTimer::updateCachedPointers();
// reset for next frame
for (instance_iter it = beginInstances(); it != endInstances(); ++it)
{
@ -592,7 +504,7 @@ void LLFastTimer::NamedTimer::reset()
// root defined by parent pointing to self
CurTimerData* cur_data = &sCurTimerData;
LLFastTimer* cur_timer = cur_data->mCurTimer;
while(cur_timer->mLastTimerData.mCurTimer != cur_timer)
while(cur_timer && cur_timer->mLastTimerData.mCurTimer != cur_timer)
{
cur_timer->mStartTime = cur_time;
cur_data->mChildTime = 0;
@ -622,17 +534,6 @@ void LLFastTimer::NamedTimer::reset()
sCurFrameIndex = 0;
}
//static
LLFastTimer::info_list_t& LLFastTimer::getFrameStateList()
{
if (!sTimerInfos)
{
sTimerInfos = new info_list_t();
}
return *sTimerInfos;
}
U32 LLFastTimer::NamedTimer::getHistoricalCount(S32 history_index) const
{
S32 history_idx = (getLastFrameIndex() + history_index) % LLFastTimer::NamedTimer::HISTORY_NUM;
@ -647,18 +548,7 @@ U32 LLFastTimer::NamedTimer::getHistoricalCalls(S32 history_index ) const
LLFastTimer::FrameState& LLFastTimer::NamedTimer::getFrameState() const
{
llassert_always(mFrameStateIndex >= 0);
if (this == NamedTimerFactory::instance().getActiveRootTimer())
{
return NamedTimerFactory::instance().getRootFrameState();
}
return getFrameStateList()[mFrameStateIndex];
}
// static
LLFastTimer::NamedTimer& LLFastTimer::NamedTimer::getRootNamedTimer()
{
return *NamedTimerFactory::instance().getActiveRootTimer();
return *mFrameState;
}
std::vector<LLFastTimer::NamedTimer*>::const_iterator LLFastTimer::NamedTimer::beginChildren()
@ -769,145 +659,3 @@ LLFastTimer::LLFastTimer(LLFastTimer::FrameState* state)
}
//////////////////////////////////////////////////////////////////////////////
//
// Important note: These implementations must be FAST!
//
#if LL_WINDOWS
//
// Windows implementation of CPU clock
//
//
// NOTE: put back in when we aren't using platform sdk anymore
//
// because MS has different signatures for these functions in winnt.h
// need to rename them to avoid conflicts
//#define _interlockedbittestandset _renamed_interlockedbittestandset
//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
//#include <intrin.h>
//#undef _interlockedbittestandset
//#undef _interlockedbittestandreset
//inline U32 LLFastTimer::getCPUClockCount32()
//{
// U64 time_stamp = __rdtsc();
// return (U32)(time_stamp >> 8);
//}
//
//// return full timer value, *not* shifted by 8 bits
//inline U64 LLFastTimer::getCPUClockCount64()
//{
// return __rdtsc();
//}
// shift off lower 8 bits for lower resolution but longer term timing
// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
#if USE_RDTSC
U32 LLFastTimer::getCPUClockCount32()
{
U32 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
shr eax,8
shl edx,24
or eax, edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
// return full timer value, *not* shifted by 8 bits
U64 LLFastTimer::getCPUClockCount64()
{
U64 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
mov eax,eax
mov edx,edx
mov dword ptr [ret_val+4], edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
std::string LLFastTimer::sClockType = "rdtsc";
#else
//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(get_clock_count()>>8);
}
U64 LLFastTimer::getCPUClockCount64()
{
return get_clock_count();
}
std::string LLFastTimer::sClockType = "QueryPerformanceCounter";
#endif
#endif
#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
//
// Linux and Solaris implementation of CPU clock - non-x86.
// This is accurate but SLOW! Only use out of desperation.
//
// Try to use the MONOTONIC clock if available, this is a constant time counter
// with nanosecond resolution (but not necessarily accuracy) and attempts are
// made to synchronize this value between cores at kernel start. It should not
// be affected by CPU frequency. If not available use the REALTIME clock, but
// this may be affected by NTP adjustments or other user activity affecting
// the system time.
U64 LLFastTimer::getCPUClockCount64()
{
struct timespec tp;
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
#endif
clock_gettime(CLOCK_REALTIME,&tp);
return (tp.tv_sec*LLFastTimer::sClockResolution)+tp.tv_nsec;
}
U32 LLFastTimer::getCPUClockCount32()
{
return (U32)(LLFastTimer::getCPUClockCount64() >> 8);
}
std::string LLFastTimer::sClockType = "clock_gettime";
#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
//
// Mac+Linux+Solaris FAST x86 implementation of CPU clock
U32 LLFastTimer::getCPUClockCount32()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return (U32)(x >> 8);
}
U64 LLFastTimer::getCPUClockCount64()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return x;
}
std::string LLFastTimer::sClockType = "rdtsc";
#endif

View File

@ -1,6 +1,6 @@
/**
* @file llfasttimer.h
* @brief Inline implementations of fast timers.
* @brief Declaration of a fast timer.
*
* $LicenseInfo:firstyear=2004&license=viewerlgpl$
* Second Life Viewer Source Code
@ -27,9 +27,363 @@
#ifndef LL_FASTTIMER_H
#define LL_FASTTIMER_H
// Implementation of getCPUClockCount32() and getCPUClockCount64 are now in llfastertimer_class.cpp.
#include "llinstancetracker.h"
// pull in the actual class definition
#include "llfasttimer_class.h"
#define FAST_TIMER_ON 1
#define DEBUG_FAST_TIMER_THREADS 1
class LLMutex;
#include <queue>
#include "llsd.h"
#define LL_FASTTIMER_USE_RDTSC 1
LL_COMMON_API void assert_main_thread();
class LL_COMMON_API LLFastTimer
{
public:
class NamedTimer;
struct LL_COMMON_API FrameState
{
FrameState();
void setNamedTimer(NamedTimer* timerp) { mTimer = timerp; }
U32 mSelfTimeCounter;
U32 mCalls;
FrameState* mParent; // info for caller timer
FrameState* mLastCaller; // used to bootstrap tree construction
NamedTimer* mTimer;
U16 mActiveCount; // number of timers with this ID active on stack
bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame
};
// stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
class LL_COMMON_API NamedTimer
: public LLInstanceTracker<NamedTimer>
{
friend class DeclareTimer;
public:
~NamedTimer();
enum { HISTORY_NUM = 300 };
const std::string& getName() const { return mName; }
NamedTimer* getParent() const { return mParent; }
void setParent(NamedTimer* parent);
S32 getDepth();
std::string getToolTip(S32 history_index = -1);
typedef std::vector<NamedTimer*>::const_iterator child_const_iter;
child_const_iter beginChildren();
child_const_iter endChildren();
std::vector<NamedTimer*>& getChildren();
void setCollapsed(bool collapsed) { mCollapsed = collapsed; }
bool getCollapsed() const { return mCollapsed; }
U32 getCountAverage() const { return mCountAverage; }
U32 getCallAverage() const { return mCallAverage; }
U32 getHistoricalCount(S32 history_index = 0) const;
U32 getHistoricalCalls(S32 history_index = 0) const;
void setFrameState(FrameState* state) { mFrameState = state; state->setNamedTimer(this); }
FrameState& getFrameState() const;
private:
friend class LLFastTimer;
friend class NamedTimerFactory;
//
// methods
//
NamedTimer(const std::string& name);
// recursive call to gather total time from children
static void accumulateTimings();
// updates cumulative times and hierarchy,
// can be called multiple times in a frame, at any point
static void processTimes();
static void buildHierarchy();
static void resetFrame();
static void reset();
//
// members
//
FrameState* mFrameState;
std::string mName;
U32 mTotalTimeCounter;
U32 mCountAverage;
U32 mCallAverage;
U32* mCountHistory;
U32* mCallHistory;
// tree structure
NamedTimer* mParent; // NamedTimer of caller(parent)
std::vector<NamedTimer*> mChildren;
bool mCollapsed; // don't show children
bool mNeedsSorting; // sort children whenever child added
};
// used to statically declare a new named timer
class LL_COMMON_API DeclareTimer
: public LLInstanceTracker<DeclareTimer>
{
friend class LLFastTimer;
public:
DeclareTimer(const std::string& name, bool open);
DeclareTimer(const std::string& name);
NamedTimer& getNamedTimer() { return mTimer; }
private:
FrameState mFrameState;
NamedTimer& mTimer;
};
public:
LLFastTimer(LLFastTimer::FrameState* state);
LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
: mFrameState(&timer.mFrameState)
{
#if FAST_TIMER_ON
LLFastTimer::FrameState* frame_state = mFrameState;
mStartTime = getCPUClockCount32();
frame_state->mActiveCount++;
frame_state->mCalls++;
// keep current parent as long as it is active when we are
frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0);
LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData;
mLastTimerData = *cur_timer_data;
cur_timer_data->mCurTimer = this;
cur_timer_data->mFrameState = frame_state;
cur_timer_data->mChildTime = 0;
#endif
#if DEBUG_FAST_TIMER_THREADS
#if !LL_RELEASE
assert_main_thread();
#endif
#endif
}
LL_FORCE_INLINE ~LLFastTimer()
{
#if FAST_TIMER_ON
LLFastTimer::FrameState* frame_state = mFrameState;
U32 total_time = getCPUClockCount32() - mStartTime;
frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
frame_state->mActiveCount--;
// store last caller to bootstrap tree creation
// do this in the destructor in case of recursion to get topmost caller
frame_state->mLastCaller = mLastTimerData.mFrameState;
// we are only tracking self time, so subtract our total time delta from parents
mLastTimerData.mChildTime += total_time;
LLFastTimer::sCurTimerData = mLastTimerData;
#endif
}
public:
static LLMutex* sLogLock;
static std::queue<LLSD> sLogQueue;
static BOOL sLog;
static BOOL sMetricLog;
static std::string sLogName;
static bool sPauseHistory;
static bool sResetHistory;
// call this once a frame to reset timers
static void nextFrame();
// dumps current cumulative frame stats to log
// call nextFrame() to reset timers
static void dumpCurTimes();
// call this to reset timer hierarchy, averages, etc.
static void reset();
static U64 countsPerSecond();
static S32 getLastFrameIndex() { return sLastFrameIndex; }
static S32 getCurFrameIndex() { return sCurFrameIndex; }
static void writeLog(std::ostream& os);
static const NamedTimer* getTimerByName(const std::string& name);
struct CurTimerData
{
LLFastTimer* mCurTimer;
FrameState* mFrameState;
U32 mChildTime;
};
static CurTimerData sCurTimerData;
private:
//////////////////////////////////////////////////////////////////////////////
//
// Important note: These implementations must be FAST!
//
#if LL_WINDOWS
//
// Windows implementation of CPU clock
//
//
// NOTE: put back in when we aren't using platform sdk anymore
//
// because MS has different signatures for these functions in winnt.h
// need to rename them to avoid conflicts
//#define _interlockedbittestandset _renamed_interlockedbittestandset
//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
//#include <intrin.h>
//#undef _interlockedbittestandset
//#undef _interlockedbittestandreset
//inline U32 LLFastTimer::getCPUClockCount32()
//{
// U64 time_stamp = __rdtsc();
// return (U32)(time_stamp >> 8);
//}
//
//// return full timer value, *not* shifted by 8 bits
//inline U64 LLFastTimer::getCPUClockCount64()
//{
// return __rdtsc();
//}
// shift off lower 8 bits for lower resolution but longer term timing
// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
#if LL_FASTTIMER_USE_RDTSC
static U32 getCPUClockCount32()
{
U32 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
shr eax,8
shl edx,24
or eax, edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
// return full timer value, *not* shifted by 8 bits
static U64 getCPUClockCount64()
{
U64 ret_val;
__asm
{
_emit 0x0f
_emit 0x31
mov eax,eax
mov edx,edx
mov dword ptr [ret_val+4], edx
mov dword ptr [ret_val], eax
}
return ret_val;
}
#else
//LL_COMMON_API U64 get_clock_count(); // in lltimer.cpp
// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
static U32 getCPUClockCount32()
{
return (U32)(get_clock_count()>>8);
}
static U64 getCPUClockCount64()
{
return get_clock_count();
}
#endif
#endif
#if (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
//
// Linux and Solaris implementation of CPU clock - non-x86.
// This is accurate but SLOW! Only use out of desperation.
//
// Try to use the MONOTONIC clock if available, this is a constant time counter
// with nanosecond resolution (but not necessarily accuracy) and attempts are
// made to synchronize this value between cores at kernel start. It should not
// be affected by CPU frequency. If not available use the REALTIME clock, but
// this may be affected by NTP adjustments or other user activity affecting
// the system time.
static U64 getCPUClockCount64()
{
struct timespec tp;
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
#endif
clock_gettime(CLOCK_REALTIME,&tp);
return (tp.tv_sec*sClockResolution)+tp.tv_nsec;
}
static U32 getCPUClockCount32()
{
return (U32)(getCPUClockCount64() >> 8);
}
#endif // (LL_LINUX || LL_SOLARIS) && !(defined(__i386__) || defined(__amd64__))
#if (LL_LINUX || LL_SOLARIS || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
//
// Mac+Linux+Solaris FAST x86 implementation of CPU clock
static U32 getCPUClockCount32()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return (U32)(x >> 8);
}
static U64 getCPUClockCount64()
{
U64 x;
__asm__ volatile (".byte 0x0f, 0x31": "=A"(x));
return x;
}
#endif
static U64 sClockResolution;
static S32 sCurFrameIndex;
static S32 sLastFrameIndex;
static U64 sLastFrameTime;
U32 mStartTime;
LLFastTimer::FrameState* mFrameState;
LLFastTimer::CurTimerData mLastTimerData;
};
typedef class LLFastTimer LLFastTimer;
#endif // LL_LLFASTTIMER_H

View File

@ -1,258 +0,0 @@
/**
* @file llfasttimer_class.h
* @brief Declaration of a fast timer.
*
* $LicenseInfo:firstyear=2004&license=viewerlgpl$
* Second Life Viewer Source Code
* Copyright (C) 2010, Linden Research, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License only.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
* $/LicenseInfo$
*/
#ifndef LL_FASTTIMER_CLASS_H
#define LL_FASTTIMER_CLASS_H
#include "llinstancetracker.h"
#define FAST_TIMER_ON 1
#define DEBUG_FAST_TIMER_THREADS 1
class LLMutex;
#include <queue>
#include "llsd.h"
LL_COMMON_API void assert_main_thread();
class LL_COMMON_API LLFastTimer
{
public:
class NamedTimer;
struct LL_COMMON_API FrameState
{
FrameState(NamedTimer* timerp);
U32 mSelfTimeCounter;
U32 mCalls;
FrameState* mParent; // info for caller timer
FrameState* mLastCaller; // used to bootstrap tree construction
NamedTimer* mTimer;
U16 mActiveCount; // number of timers with this ID active on stack
bool mMoveUpTree; // needs to be moved up the tree of timers at the end of frame
};
// stores a "named" timer instance to be reused via multiple LLFastTimer stack instances
class LL_COMMON_API NamedTimer
: public LLInstanceTracker<NamedTimer>
{
friend class DeclareTimer;
public:
~NamedTimer();
enum { HISTORY_NUM = 300 };
const std::string& getName() const { return mName; }
NamedTimer* getParent() const { return mParent; }
void setParent(NamedTimer* parent);
S32 getDepth();
std::string getToolTip(S32 history_index = -1);
typedef std::vector<NamedTimer*>::const_iterator child_const_iter;
child_const_iter beginChildren();
child_const_iter endChildren();
std::vector<NamedTimer*>& getChildren();
void setCollapsed(bool collapsed) { mCollapsed = collapsed; }
bool getCollapsed() const { return mCollapsed; }
U32 getCountAverage() const { return mCountAverage; }
U32 getCallAverage() const { return mCallAverage; }
U32 getHistoricalCount(S32 history_index = 0) const;
U32 getHistoricalCalls(S32 history_index = 0) const;
static NamedTimer& getRootNamedTimer();
S32 getFrameStateIndex() const { return mFrameStateIndex; }
FrameState& getFrameState() const;
private:
friend class LLFastTimer;
friend class NamedTimerFactory;
//
// methods
//
NamedTimer(const std::string& name);
// recursive call to gather total time from children
static void accumulateTimings();
// updates cumulative times and hierarchy,
// can be called multiple times in a frame, at any point
static void processTimes();
static void buildHierarchy();
static void resetFrame();
static void reset();
//
// members
//
S32 mFrameStateIndex;
std::string mName;
U32 mTotalTimeCounter;
U32 mCountAverage;
U32 mCallAverage;
U32* mCountHistory;
U32* mCallHistory;
// tree structure
NamedTimer* mParent; // NamedTimer of caller(parent)
std::vector<NamedTimer*> mChildren;
bool mCollapsed; // don't show children
bool mNeedsSorting; // sort children whenever child added
};
// used to statically declare a new named timer
class LL_COMMON_API DeclareTimer
: public LLInstanceTracker<DeclareTimer>
{
friend class LLFastTimer;
public:
DeclareTimer(const std::string& name, bool open);
DeclareTimer(const std::string& name);
static void updateCachedPointers();
private:
NamedTimer& mTimer;
FrameState* mFrameState;
};
public:
LLFastTimer(LLFastTimer::FrameState* state);
LL_FORCE_INLINE LLFastTimer(LLFastTimer::DeclareTimer& timer)
: mFrameState(timer.mFrameState)
{
#if FAST_TIMER_ON
LLFastTimer::FrameState* frame_state = mFrameState;
mStartTime = getCPUClockCount32();
frame_state->mActiveCount++;
frame_state->mCalls++;
// keep current parent as long as it is active when we are
frame_state->mMoveUpTree |= (frame_state->mParent->mActiveCount == 0);
LLFastTimer::CurTimerData* cur_timer_data = &LLFastTimer::sCurTimerData;
mLastTimerData = *cur_timer_data;
cur_timer_data->mCurTimer = this;
cur_timer_data->mFrameState = frame_state;
cur_timer_data->mChildTime = 0;
#endif
#if DEBUG_FAST_TIMER_THREADS
#if !LL_RELEASE
assert_main_thread();
#endif
#endif
}
LL_FORCE_INLINE ~LLFastTimer()
{
#if FAST_TIMER_ON
LLFastTimer::FrameState* frame_state = mFrameState;
U32 total_time = getCPUClockCount32() - mStartTime;
frame_state->mSelfTimeCounter += total_time - LLFastTimer::sCurTimerData.mChildTime;
frame_state->mActiveCount--;
// store last caller to bootstrap tree creation
// do this in the destructor in case of recursion to get topmost caller
frame_state->mLastCaller = mLastTimerData.mFrameState;
// we are only tracking self time, so subtract our total time delta from parents
mLastTimerData.mChildTime += total_time;
LLFastTimer::sCurTimerData = mLastTimerData;
#endif
}
public:
static LLMutex* sLogLock;
static std::queue<LLSD> sLogQueue;
static BOOL sLog;
static BOOL sMetricLog;
static std::string sLogName;
static bool sPauseHistory;
static bool sResetHistory;
typedef std::vector<FrameState> info_list_t;
static info_list_t& getFrameStateList();
// call this once a frame to reset timers
static void nextFrame();
// dumps current cumulative frame stats to log
// call nextFrame() to reset timers
static void dumpCurTimes();
// call this to reset timer hierarchy, averages, etc.
static void reset();
static U64 countsPerSecond();
static S32 getLastFrameIndex() { return sLastFrameIndex; }
static S32 getCurFrameIndex() { return sCurFrameIndex; }
static void writeLog(std::ostream& os);
static const NamedTimer* getTimerByName(const std::string& name);
struct CurTimerData
{
LLFastTimer* mCurTimer;
FrameState* mFrameState;
U32 mChildTime;
};
static CurTimerData sCurTimerData;
static std::string sClockType;
private:
static U32 getCPUClockCount32();
static U64 getCPUClockCount64();
static U64 sClockResolution;
static S32 sCurFrameIndex;
static S32 sLastFrameIndex;
static U64 sLastFrameTime;
static info_list_t* sTimerInfos;
U32 mStartTime;
LLFastTimer::FrameState* mFrameState;
LLFastTimer::CurTimerData mLastTimerData;
};
typedef class LLFastTimer LLFastTimer;
#endif // LL_LLFASTTIMER_CLASS_H

View File

@ -1168,6 +1168,8 @@ static LLFastTimer::DeclareTimer FTM_SERVICE_CALLBACK("Callback");
static LLFastTimer::DeclareTimer FTM_AGENT_AUTOPILOT("Autopilot");
static LLFastTimer::DeclareTimer FTM_AGENT_UPDATE("Update");
LLFastTimer::DeclareTimer FTM_FRAME("Frame");
bool LLAppViewer::mainLoop()
{
LLMemType mt1(LLMemType::MTYPE_MAIN);
@ -1206,7 +1208,8 @@ bool LLAppViewer::mainLoop()
// Handle messages
while (!LLApp::isExiting())
{
LLFastTimer::nextFrame(); // Should be outside of any timer instances
LLFastTimer _(FTM_FRAME);
LLFastTimer::nextFrame();
//clear call stack records
llclearcallstacks;
@ -3106,8 +3109,6 @@ void LLAppViewer::writeSystemInfo()
LL_INFOS("SystemInfo") << "OS: " << getOSInfo().getOSStringSimple() << LL_ENDL;
LL_INFOS("SystemInfo") << "OS info: " << getOSInfo() << LL_ENDL;
LL_INFOS("SystemInfo") << "Timers: " << LLFastTimer::sClockType << LL_ENDL;
writeDebugInfo(); // Save out debug_info.log early, in case of crash.
}

View File

@ -41,6 +41,9 @@ class LLTextureFetch;
class LLWatchdogTimeout;
class LLUpdaterService;
extern LLFastTimer::DeclareTimer FTM_FRAME;
class LLAppViewer : public LLApp
{
public:

View File

@ -1192,7 +1192,7 @@ static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_MIN_MAX("Min/Max");
static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_RGB2LUM("RGB to Luminance");
static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_RESCALE("Rescale");
static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_GEN_NORMAL("Generate Normal");
static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_CREATE("Create");
static LLFastTimer::DeclareTimer FTM_BUMP_SOURCE_CREATE("Bump Source Create");
// static
void LLBumpImageList::onSourceLoaded( BOOL success, LLViewerTexture *src_vi, LLImageRaw* src, LLUUID& source_asset_id, EBumpEffect bump_code )

View File

@ -160,7 +160,7 @@ LLFastTimer::NamedTimer* LLFastTimerView::getLegendID(S32 y)
BOOL LLFastTimerView::handleDoubleClick(S32 x, S32 y, MASK mask)
{
for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != end_timer_tree();
++it)
{
@ -257,7 +257,7 @@ BOOL LLFastTimerView::handleHover(S32 x, S32 y, MASK mask)
}
S32 i = 0;
for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != end_timer_tree();
++it, ++i)
{
@ -419,11 +419,11 @@ void LLFastTimerView::draw()
y -= (texth + 2);
sTimerColors[&LLFastTimer::NamedTimer::getRootNamedTimer()] = LLColor4::grey;
sTimerColors[&getFrameTimer()] = LLColor4::grey;
F32 hue = 0.f;
for (timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for (timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != timer_tree_iterator_t();
++it)
{
@ -448,7 +448,7 @@ void LLFastTimerView::draw()
S32 cur_line = 0;
ft_display_idx.clear();
std::map<LLFastTimer::NamedTimer*, S32> display_line;
for (timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for (timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != timer_tree_iterator_t();
++it)
{
@ -558,7 +558,7 @@ void LLFastTimerView::draw()
U64 totalticks;
if (!LLFastTimer::sPauseHistory)
{
U64 ticks = LLFastTimer::NamedTimer::getRootNamedTimer().getHistoricalCount(mScrollIndex);
U64 ticks = getFrameTimer().getHistoricalCount(mScrollIndex);
if (LLFastTimer::getCurFrameIndex() >= 10)
{
@ -598,7 +598,7 @@ void LLFastTimerView::draw()
totalticks = 0;
for (S32 j=0; j<histmax; j++)
{
U64 ticks = LLFastTimer::NamedTimer::getRootNamedTimer().getHistoricalCount(j);
U64 ticks = getFrameTimer().getHistoricalCount(j);
if (ticks > totalticks)
totalticks = ticks;
@ -704,7 +704,7 @@ void LLFastTimerView::draw()
LLFastTimer::NamedTimer* prev_id = NULL;
S32 i = 0;
for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != end_timer_tree();
++it, ++i)
{
@ -867,7 +867,7 @@ void LLFastTimerView::draw()
}
U64 cur_max = 0;
for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != end_timer_tree();
++it)
{
@ -968,7 +968,7 @@ void LLFastTimerView::draw()
{
std::string legend_stat;
bool first = true;
for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != end_timer_tree();
++it)
{
@ -990,7 +990,7 @@ void LLFastTimerView::draw()
std::string timer_stat;
first = true;
for(timer_tree_iterator_t it = begin_timer_tree(LLFastTimer::NamedTimer::getRootNamedTimer());
for(timer_tree_iterator_t it = begin_timer_tree(getFrameTimer());
it != end_timer_tree();
++it)
{
@ -1551,3 +1551,9 @@ void LLFastTimerView::onClickCloseBtn()
setVisible(false);
}
LLFastTimer::NamedTimer& LLFastTimerView::getFrameTimer()
{
return FTM_FRAME.getNamedTimer();
}

View File

@ -46,6 +46,7 @@ private:
static LLSD analyzePerformanceLogDefault(std::istream& is) ;
static void exportCharts(const std::string& base, const std::string& target);
void onPause();
LLFastTimer::NamedTimer& getFrameTimer();
public: