343 lines
11 KiB
C++
343 lines
11 KiB
C++
/**
|
|
* @file llfasttimer.h
|
|
* @brief Declaration of a fast timer.
|
|
*
|
|
* $LicenseInfo:firstyear=2004&license=viewerlgpl$
|
|
* Second Life Viewer Source Code
|
|
* Copyright (C) 2010, Linden Research, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation;
|
|
* version 2.1 of the License only.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
|
|
* $/LicenseInfo$
|
|
*/
|
|
|
|
#ifndef LL_FASTTIMER_H
|
|
#define LL_FASTTIMER_H
|
|
|
|
#include "llinstancetracker.h"
|
|
#include "lltrace.h"
|
|
#include "lltreeiterators.h"
|
|
|
|
#if LL_WINDOWS
|
|
#include <intrin.h>
|
|
#endif
|
|
|
|
#define LL_FAST_TIMER_ON 1
|
|
#define LL_FASTTIMER_USE_RDTSC 1
|
|
|
|
// NOTE: Also see llprofiler.h
|
|
#if !defined(LL_PROFILER_CONFIGURATION)
|
|
#define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
|
|
#endif // LL_PROFILER_CONFIGURATION
|
|
|
|
namespace LLTrace
|
|
{
|
|
// use to create blocktimer rvalue to be captured in a reference so that the BlockTimer lives to the end of the block.
|
|
class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer);
|
|
|
|
class BlockTimer
|
|
{
|
|
public:
|
|
typedef BlockTimer self_t;
|
|
typedef class BlockTimerStatHandle DeclareTimer;
|
|
|
|
~BlockTimer();
|
|
|
|
F64Seconds getElapsedTime();
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Important note: These implementations must be FAST!
|
|
//
|
|
|
|
|
|
#if LL_WINDOWS
|
|
//
|
|
// Windows implementation of CPU clock
|
|
//
|
|
|
|
//
|
|
// NOTE: put back in when we aren't using platform sdk anymore
|
|
//
|
|
// because MS has different signatures for these functions in winnt.h
|
|
// need to rename them to avoid conflicts
|
|
//#define _interlockedbittestandset _renamed_interlockedbittestandset
|
|
//#define _interlockedbittestandreset _renamed_interlockedbittestandreset
|
|
//#include <intrin.h>
|
|
//#undef _interlockedbittestandset
|
|
//#undef _interlockedbittestandreset
|
|
|
|
//inline U32 getCPUClockCount32()
|
|
//{
|
|
// U64 time_stamp = __rdtsc();
|
|
// return (U32)(time_stamp >> 8);
|
|
//}
|
|
//
|
|
//// return full timer value, *not* shifted by 8 bits
|
|
//inline U64 getCPUClockCount64()
|
|
//{
|
|
// return __rdtsc();
|
|
//}
|
|
|
|
|
|
|
|
// shift off lower 8 bits for lower resolution but longer term timing
|
|
// on 1Ghz machine, a 32-bit word will hold ~1000 seconds of timing
|
|
#if LL_FASTTIMER_USE_RDTSC
|
|
static U32 getCPUClockCount32()
|
|
{
|
|
unsigned __int64 val = __rdtsc();
|
|
val = val >> 8;
|
|
return static_cast<U32>(val);
|
|
}
|
|
|
|
// return full timer value, *not* shifted by 8 bits
|
|
static U64 getCPUClockCount64()
|
|
{
|
|
return static_cast<U64>( __rdtsc() );
|
|
}
|
|
|
|
#else
|
|
//U64 get_clock_count(); // in lltimer.cpp
|
|
// These use QueryPerformanceCounter, which is arguably fine and also works on AMD architectures.
|
|
static U32 getCPUClockCount32()
|
|
{
|
|
return (U32)(get_clock_count()>>8);
|
|
}
|
|
|
|
static U64 getCPUClockCount64()
|
|
{
|
|
return get_clock_count();
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
#if (LL_LINUX) && !(defined(__i386__) || defined(__amd64__))
|
|
//
|
|
// Linux implementation of CPU clock - non-x86.
|
|
// This is accurate but SLOW! Only use out of desperation.
|
|
//
|
|
// Try to use the MONOTONIC clock if available, this is a constant time counter
|
|
// with nanosecond resolution (but not necessarily accuracy) and attempts are
|
|
// made to synchronize this value between cores at kernel start. It should not
|
|
// be affected by CPU frequency. If not available use the REALTIME clock, but
|
|
// this may be affected by NTP adjustments or other user activity affecting
|
|
// the system time.
|
|
static U64 getCPUClockCount64()
|
|
{
|
|
struct timespec tp;
|
|
|
|
#ifdef CLOCK_MONOTONIC // MONOTONIC supported at build-time?
|
|
if (-1 == clock_gettime(CLOCK_MONOTONIC,&tp)) // if MONOTONIC isn't supported at runtime then ouch, try REALTIME
|
|
#endif
|
|
clock_gettime(CLOCK_REALTIME,&tp);
|
|
|
|
return (tp.tv_sec*sClockResolution)+tp.tv_nsec;
|
|
}
|
|
|
|
static U32 getCPUClockCount32()
|
|
{
|
|
return (U32)(getCPUClockCount64() >> 8);
|
|
}
|
|
|
|
#endif // (LL_LINUX) && !(defined(__i386__) || defined(__amd64__))
|
|
|
|
|
|
#if (LL_LINUX || LL_DARWIN) && (defined(__i386__) || defined(__amd64__))
|
|
//
|
|
// Mac+Linux FAST x86 implementation of CPU clock
|
|
static U32 getCPUClockCount32()
|
|
{
|
|
U32 low(0),high(0);
|
|
__asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) );
|
|
return (low>>8) | (high<<24);
|
|
}
|
|
|
|
static U64 getCPUClockCount64()
|
|
{
|
|
U32 low(0),high(0);
|
|
__asm__ volatile (".byte 0x0f, 0x31": "=a"(low), "=d"(high) );
|
|
return (U64)low | ( ((U64)high) << 32);
|
|
}
|
|
|
|
#endif
|
|
|
|
static BlockTimerStatHandle& getRootTimeBlock();
|
|
static void pushLog(LLSD sd);
|
|
static void setLogLock(class LLMutex* mutex);
|
|
static void writeLog(std::ostream& os);
|
|
static void updateTimes();
|
|
|
|
static U64 countsPerSecond();
|
|
|
|
// updates cumulative times and hierarchy,
|
|
// can be called multiple times in a frame, at any point
|
|
static void processTimes();
|
|
|
|
static void bootstrapTimerTree();
|
|
static void incrementalUpdateTimerTree();
|
|
|
|
// call this once a frame to periodically log timers
|
|
static void logStats();
|
|
|
|
// dumps current cumulative frame stats to log
|
|
// call nextFrame() to reset timers
|
|
static void dumpCurTimes();
|
|
|
|
private:
|
|
friend class BlockTimerStatHandle;
|
|
// FIXME: this friendship exists so that each thread can instantiate a root timer,
|
|
// which could be a derived class with a public constructor instead, possibly
|
|
friend class ThreadRecorder;
|
|
friend BlockTimer timeThisBlock(BlockTimerStatHandle&);
|
|
|
|
BlockTimer(BlockTimerStatHandle& timer);
|
|
|
|
// no-copy
|
|
BlockTimer(const BlockTimer& other);
|
|
BlockTimer& operator=(const BlockTimer& other);
|
|
|
|
private:
|
|
U64 mStartTime;
|
|
BlockTimerStackRecord mParentTimerData{};
|
|
|
|
public:
|
|
// statics
|
|
static std::string sLogName;
|
|
static bool sMetricLog,
|
|
sLog;
|
|
static U64 sClockResolution;
|
|
|
|
};
|
|
|
|
// this dummy function assists in allocating a block timer with stack-based lifetime.
|
|
// this is done by capturing the return value in a stack-allocated const reference variable.
|
|
// (This is most easily done using the macro LL_RECORD_BLOCK_TIME)
|
|
// Otherwise, it would be possible to store a BlockTimer on the heap, resulting in non-nested lifetimes,
|
|
// which would break the invariants of the timing hierarchy logic
|
|
LL_FORCE_INLINE class BlockTimer timeThisBlock(class BlockTimerStatHandle& timer)
|
|
{
|
|
return BlockTimer(timer);
|
|
}
|
|
|
|
// stores a "named" timer instance to be reused via multiple BlockTimer stack instances
|
|
class BlockTimerStatHandle
|
|
: public StatType<TimeBlockAccumulator>
|
|
{
|
|
public:
|
|
BlockTimerStatHandle(const char* name, const char* description = "");
|
|
|
|
TimeBlockTreeNode& getTreeNode() const;
|
|
BlockTimerStatHandle* getParent() const { return getTreeNode().getParent(); }
|
|
void setParent(BlockTimerStatHandle* parent) { getTreeNode().setParent(parent); }
|
|
|
|
typedef std::vector<BlockTimerStatHandle*>::iterator child_iter;
|
|
typedef std::vector<BlockTimerStatHandle*>::const_iterator child_const_iter;
|
|
child_iter beginChildren();
|
|
child_iter endChildren();
|
|
bool hasChildren();
|
|
std::vector<BlockTimerStatHandle*>& getChildren();
|
|
|
|
StatType<TimeBlockAccumulator::CallCountFacet>& callCount()
|
|
{
|
|
return static_cast<StatType<TimeBlockAccumulator::CallCountFacet>&>(*(StatType<TimeBlockAccumulator>*)this);
|
|
}
|
|
|
|
StatType<TimeBlockAccumulator::SelfTimeFacet>& selfTime()
|
|
{
|
|
return static_cast<StatType<TimeBlockAccumulator::SelfTimeFacet>&>(*(StatType<TimeBlockAccumulator>*)this);
|
|
}
|
|
|
|
bool mCollapsed; // don't show children
|
|
};
|
|
|
|
// iterators and helper functions for walking the call hierarchy of block timers in different ways
|
|
typedef LLTreeDFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_iterator_t;
|
|
typedef LLTreeDFSPostIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_df_post_iterator_t;
|
|
typedef LLTreeBFSIter<BlockTimerStatHandle, BlockTimerStatHandle::child_const_iter> block_timer_tree_bf_iterator_t;
|
|
|
|
block_timer_tree_df_iterator_t begin_block_timer_tree_df(BlockTimerStatHandle& id);
|
|
block_timer_tree_df_iterator_t end_block_timer_tree_df();
|
|
block_timer_tree_df_post_iterator_t begin_block_timer_tree_df_post(BlockTimerStatHandle& id);
|
|
block_timer_tree_df_post_iterator_t end_block_timer_tree_df_post();
|
|
block_timer_tree_bf_iterator_t begin_block_timer_tree_bf(BlockTimerStatHandle& id);
|
|
block_timer_tree_bf_iterator_t end_block_timer_tree_bf();
|
|
|
|
LL_FORCE_INLINE BlockTimer::BlockTimer(BlockTimerStatHandle& timer)
|
|
{
|
|
#if LL_FAST_TIMER_ON
|
|
BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
|
|
if (!cur_timer_data)
|
|
{
|
|
// How likely is it that
|
|
// LLThreadLocalSingletonPointer<T>::getInstance() will return NULL?
|
|
// Even without researching, what we can say is that if we exit
|
|
// without setting mStartTime at all, gcc 4.7 produces (fatal)
|
|
// warnings about a possibly-uninitialized data member.
|
|
mStartTime = 0;
|
|
return;
|
|
}
|
|
TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();
|
|
accumulator.mActiveCount++;
|
|
// keep current parent as long as it is active when we are
|
|
accumulator.mMoveUpTree |= (accumulator.mParent->getCurrentAccumulator().mActiveCount == 0);
|
|
|
|
// store top of stack
|
|
mParentTimerData = *cur_timer_data;
|
|
// push new information
|
|
cur_timer_data->mActiveTimer = this;
|
|
cur_timer_data->mTimeBlock = &timer;
|
|
cur_timer_data->mChildTime = 0;
|
|
|
|
mStartTime = getCPUClockCount64();
|
|
#endif
|
|
}
|
|
|
|
LL_FORCE_INLINE BlockTimer::~BlockTimer()
|
|
{
|
|
#if LL_FAST_TIMER_ON
|
|
U64 total_time = getCPUClockCount64() - mStartTime;
|
|
BlockTimerStackRecord* cur_timer_data = LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
|
|
if (!cur_timer_data) return;
|
|
|
|
TimeBlockAccumulator& accumulator = cur_timer_data->mTimeBlock->getCurrentAccumulator();
|
|
|
|
accumulator.mCalls++;
|
|
accumulator.mTotalTimeCounter += total_time;
|
|
accumulator.mSelfTimeCounter += total_time - cur_timer_data->mChildTime;
|
|
accumulator.mActiveCount--;
|
|
|
|
// store last caller to bootstrap tree creation
|
|
// do this in the destructor in case of recursion to get topmost caller
|
|
accumulator.mLastCaller = mParentTimerData.mTimeBlock;
|
|
|
|
// we are only tracking self time, so subtract our total time delta from parents
|
|
mParentTimerData.mChildTime += total_time;
|
|
|
|
//pop stack
|
|
*cur_timer_data = mParentTimerData;
|
|
#endif
|
|
}
|
|
|
|
}
|
|
|
|
typedef LLTrace::BlockTimer LLFastTimer;
|
|
|
|
#endif // LL_LLFASTTIMER_H
|