Major refactor and extension of perfdata

All data now collated in a separate thread via lock free queue
data for all "self" attachments collected including non-rigged
known issues:
some double counting exists
*  in non rigged alpha mask, maybe elsewhere
master
Beq 2021-10-21 13:18:45 +01:00
parent 390c136430
commit a4a7a765f1
26 changed files with 5270 additions and 422 deletions

View File

@ -0,0 +1,582 @@
// Provides an efficient blocking version of moodycamel::ConcurrentQueue.
// ©2015-2020 Cameron Desrochers. Distributed under the terms of the simplified
// BSD license, available at the top of concurrentqueue.h.
// Also dual-licensed under the Boost Software License (see LICENSE.md)
// Uses Jeff Preshing's semaphore implementation (under the terms of its
// separate zlib license, see lightweightsemaphore.h).
#pragma once
#include "concurrentqueue.h"
#include "lightweightsemaphore.h"
#include <type_traits>
#include <cerrno>
#include <memory>
#include <chrono>
#include <ctime>
namespace moodycamel
{
// This is a blocking version of the queue. It has an almost identical interface to
// the normal non-blocking version, with the addition of various wait_dequeue() methods
// and the removal of producer-specific dequeue methods.
template<typename T, typename Traits = ConcurrentQueueDefaultTraits>
class BlockingConcurrentQueue
{
private:
typedef ::moodycamel::ConcurrentQueue<T, Traits> ConcurrentQueue;
typedef ::moodycamel::LightweightSemaphore LightweightSemaphore;
public:
typedef typename ConcurrentQueue::producer_token_t producer_token_t;
typedef typename ConcurrentQueue::consumer_token_t consumer_token_t;
typedef typename ConcurrentQueue::index_t index_t;
typedef typename ConcurrentQueue::size_t size_t;
typedef typename std::make_signed<size_t>::type ssize_t;
static const size_t BLOCK_SIZE = ConcurrentQueue::BLOCK_SIZE;
static const size_t EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD = ConcurrentQueue::EXPLICIT_BLOCK_EMPTY_COUNTER_THRESHOLD;
static const size_t EXPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::EXPLICIT_INITIAL_INDEX_SIZE;
static const size_t IMPLICIT_INITIAL_INDEX_SIZE = ConcurrentQueue::IMPLICIT_INITIAL_INDEX_SIZE;
static const size_t INITIAL_IMPLICIT_PRODUCER_HASH_SIZE = ConcurrentQueue::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE;
static const std::uint32_t EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE = ConcurrentQueue::EXPLICIT_CONSUMER_CONSUMPTION_QUOTA_BEFORE_ROTATE;
static const size_t MAX_SUBQUEUE_SIZE = ConcurrentQueue::MAX_SUBQUEUE_SIZE;
public:
// Creates a queue with at least `capacity` element slots; note that the
// actual number of elements that can be inserted without additional memory
// allocation depends on the number of producers and the block size (e.g. if
// the block size is equal to `capacity`, only a single block will be allocated
// up-front, which means only a single producer will be able to enqueue elements
// without an extra allocation -- blocks aren't shared between producers).
// This method is not thread safe -- it is up to the user to ensure that the
// queue is fully constructed before it starts being used by other threads (this
// includes making the memory effects of construction visible, possibly with a
// memory barrier).
explicit BlockingConcurrentQueue(size_t capacity = 6 * BLOCK_SIZE)
: inner(capacity), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
{
assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
if (!sema) {
MOODYCAMEL_THROW(std::bad_alloc());
}
}
BlockingConcurrentQueue(size_t minCapacity, size_t maxExplicitProducers, size_t maxImplicitProducers)
: inner(minCapacity, maxExplicitProducers, maxImplicitProducers), sema(create<LightweightSemaphore, ssize_t, int>(0, (int)Traits::MAX_SEMA_SPINS), &BlockingConcurrentQueue::template destroy<LightweightSemaphore>)
{
assert(reinterpret_cast<ConcurrentQueue*>((BlockingConcurrentQueue*)1) == &((BlockingConcurrentQueue*)1)->inner && "BlockingConcurrentQueue must have ConcurrentQueue as its first member");
if (!sema) {
MOODYCAMEL_THROW(std::bad_alloc());
}
}
// Disable copying and copy assignment
BlockingConcurrentQueue(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
BlockingConcurrentQueue& operator=(BlockingConcurrentQueue const&) MOODYCAMEL_DELETE_FUNCTION;
// Moving is supported, but note that it is *not* a thread-safe operation.
// Nobody can use the queue while it's being moved, and the memory effects
// of that move must be propagated to other threads before they can use it.
// Note: When a queue is moved, its tokens are still valid but can only be
// used with the destination queue (i.e. semantically they are moved along
// with the queue itself).
BlockingConcurrentQueue(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
: inner(std::move(other.inner)), sema(std::move(other.sema))
{ }
inline BlockingConcurrentQueue& operator=(BlockingConcurrentQueue&& other) MOODYCAMEL_NOEXCEPT
{
return swap_internal(other);
}
// Swaps this queue's state with the other's. Not thread-safe.
// Swapping two queues does not invalidate their tokens, however
// the tokens that were created for one queue must be used with
// only the swapped queue (i.e. the tokens are tied to the
// queue's movable state, not the object itself).
inline void swap(BlockingConcurrentQueue& other) MOODYCAMEL_NOEXCEPT
{
swap_internal(other);
}
private:
BlockingConcurrentQueue& swap_internal(BlockingConcurrentQueue& other)
{
if (this == &other) {
return *this;
}
inner.swap(other.inner);
sema.swap(other.sema);
return *this;
}
public:
// Enqueues a single item (by copying it).
// Allocates memory if required. Only fails if memory allocation fails (or implicit
// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue(T const& item)
{
if ((details::likely)(inner.enqueue(item))) {
sema->signal();
return true;
}
return false;
}
// Enqueues a single item (by moving it, if possible).
// Allocates memory if required. Only fails if memory allocation fails (or implicit
// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0,
// or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue(T&& item)
{
if ((details::likely)(inner.enqueue(std::move(item)))) {
sema->signal();
return true;
}
return false;
}
// Enqueues a single item (by copying it) using an explicit producer token.
// Allocates memory if required. Only fails if memory allocation fails (or
// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue(producer_token_t const& token, T const& item)
{
if ((details::likely)(inner.enqueue(token, item))) {
sema->signal();
return true;
}
return false;
}
// Enqueues a single item (by moving it, if possible) using an explicit producer token.
// Allocates memory if required. Only fails if memory allocation fails (or
// Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Thread-safe.
inline bool enqueue(producer_token_t const& token, T&& item)
{
if ((details::likely)(inner.enqueue(token, std::move(item)))) {
sema->signal();
return true;
}
return false;
}
// Enqueues several items.
// Allocates memory if required. Only fails if memory allocation fails (or
// implicit production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
// is 0, or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Note: Use std::make_move_iterator if the elements should be moved instead of copied.
// Thread-safe.
template<typename It>
inline bool enqueue_bulk(It itemFirst, size_t count)
{
if ((details::likely)(inner.enqueue_bulk(std::forward<It>(itemFirst), count))) {
sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
return true;
}
return false;
}
// Enqueues several items using an explicit producer token.
// Allocates memory if required. Only fails if memory allocation fails
// (or Traits::MAX_SUBQUEUE_SIZE has been defined and would be surpassed).
// Note: Use std::make_move_iterator if the elements should be moved
// instead of copied.
// Thread-safe.
template<typename It>
inline bool enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
{
if ((details::likely)(inner.enqueue_bulk(token, std::forward<It>(itemFirst), count))) {
sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
return true;
}
return false;
}
// Enqueues a single item (by copying it).
// Does not allocate memory. Fails if not enough room to enqueue (or implicit
// production is disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE
// is 0).
// Thread-safe.
inline bool try_enqueue(T const& item)
{
if (inner.try_enqueue(item)) {
sema->signal();
return true;
}
return false;
}
// Enqueues a single item (by moving it, if possible).
// Does not allocate memory (except for one-time implicit producer).
// Fails if not enough room to enqueue (or implicit production is
// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
// Thread-safe.
inline bool try_enqueue(T&& item)
{
if (inner.try_enqueue(std::move(item))) {
sema->signal();
return true;
}
return false;
}
// Enqueues a single item (by copying it) using an explicit producer token.
// Does not allocate memory. Fails if not enough room to enqueue.
// Thread-safe.
inline bool try_enqueue(producer_token_t const& token, T const& item)
{
if (inner.try_enqueue(token, item)) {
sema->signal();
return true;
}
return false;
}
// Enqueues a single item (by moving it, if possible) using an explicit producer token.
// Does not allocate memory. Fails if not enough room to enqueue.
// Thread-safe.
inline bool try_enqueue(producer_token_t const& token, T&& item)
{
if (inner.try_enqueue(token, std::move(item))) {
sema->signal();
return true;
}
return false;
}
// Enqueues several items.
// Does not allocate memory (except for one-time implicit producer).
// Fails if not enough room to enqueue (or implicit production is
// disabled because Traits::INITIAL_IMPLICIT_PRODUCER_HASH_SIZE is 0).
// Note: Use std::make_move_iterator if the elements should be moved
// instead of copied.
// Thread-safe.
template<typename It>
inline bool try_enqueue_bulk(It itemFirst, size_t count)
{
if (inner.try_enqueue_bulk(std::forward<It>(itemFirst), count)) {
sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
return true;
}
return false;
}
// Enqueues several items using an explicit producer token.
// Does not allocate memory. Fails if not enough room to enqueue.
// Note: Use std::make_move_iterator if the elements should be moved
// instead of copied.
// Thread-safe.
template<typename It>
inline bool try_enqueue_bulk(producer_token_t const& token, It itemFirst, size_t count)
{
if (inner.try_enqueue_bulk(token, std::forward<It>(itemFirst), count)) {
sema->signal((LightweightSemaphore::ssize_t)(ssize_t)count);
return true;
}
return false;
}
// Attempts to dequeue from the queue.
// Returns false if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template<typename U>
inline bool try_dequeue(U& item)
{
if (sema->tryWait()) {
while (!inner.try_dequeue(item)) {
continue;
}
return true;
}
return false;
}
// Attempts to dequeue from the queue using an explicit consumer token.
// Returns false if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template<typename U>
inline bool try_dequeue(consumer_token_t& token, U& item)
{
if (sema->tryWait()) {
while (!inner.try_dequeue(token, item)) {
continue;
}
return true;
}
return false;
}
// Attempts to dequeue several elements from the queue.
// Returns the number of items actually dequeued.
// Returns 0 if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template<typename It>
inline size_t try_dequeue_bulk(It itemFirst, size_t max)
{
size_t count = 0;
max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
while (count != max) {
count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
}
return count;
}
// Attempts to dequeue several elements from the queue using an explicit consumer token.
// Returns the number of items actually dequeued.
// Returns 0 if all producer streams appeared empty at the time they
// were checked (so, the queue is likely but not guaranteed to be empty).
// Never allocates. Thread-safe.
template<typename It>
inline size_t try_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
{
size_t count = 0;
max = (size_t)sema->tryWaitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
while (count != max) {
count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
}
return count;
}
// Blocks the current thread until there's something to dequeue, then
// dequeues it.
// Never allocates. Thread-safe.
template<typename U>
inline void wait_dequeue(U& item)
{
while (!sema->wait()) {
continue;
}
while (!inner.try_dequeue(item)) {
continue;
}
}
// Blocks the current thread until either there's something to dequeue
// or the timeout (specified in microseconds) expires. Returns false
// without setting `item` if the timeout expires, otherwise assigns
// to `item` and returns true.
// Using a negative timeout indicates an indefinite timeout,
// and is thus functionally equivalent to calling wait_dequeue.
// Never allocates. Thread-safe.
template<typename U>
inline bool wait_dequeue_timed(U& item, std::int64_t timeout_usecs)
{
if (!sema->wait(timeout_usecs)) {
return false;
}
while (!inner.try_dequeue(item)) {
continue;
}
return true;
}
// Blocks the current thread until either there's something to dequeue
// or the timeout expires. Returns false without setting `item` if the
// timeout expires, otherwise assigns to `item` and returns true.
// Never allocates. Thread-safe.
template<typename U, typename Rep, typename Period>
inline bool wait_dequeue_timed(U& item, std::chrono::duration<Rep, Period> const& timeout)
{
return wait_dequeue_timed(item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
}
// Blocks the current thread until there's something to dequeue, then
// dequeues it using an explicit consumer token.
// Never allocates. Thread-safe.
template<typename U>
inline void wait_dequeue(consumer_token_t& token, U& item)
{
while (!sema->wait()) {
continue;
}
while (!inner.try_dequeue(token, item)) {
continue;
}
}
// Blocks the current thread until either there's something to dequeue
// or the timeout (specified in microseconds) expires. Returns false
// without setting `item` if the timeout expires, otherwise assigns
// to `item` and returns true.
// Using a negative timeout indicates an indefinite timeout,
// and is thus functionally equivalent to calling wait_dequeue.
// Never allocates. Thread-safe.
template<typename U>
inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::int64_t timeout_usecs)
{
if (!sema->wait(timeout_usecs)) {
return false;
}
while (!inner.try_dequeue(token, item)) {
continue;
}
return true;
}
// Blocks the current thread until either there's something to dequeue
// or the timeout expires. Returns false without setting `item` if the
// timeout expires, otherwise assigns to `item` and returns true.
// Never allocates. Thread-safe.
template<typename U, typename Rep, typename Period>
inline bool wait_dequeue_timed(consumer_token_t& token, U& item, std::chrono::duration<Rep, Period> const& timeout)
{
return wait_dequeue_timed(token, item, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
}
// Attempts to dequeue several elements from the queue.
// Returns the number of items actually dequeued, which will
// always be at least one (this method blocks until the queue
// is non-empty) and at most max.
// Never allocates. Thread-safe.
template<typename It>
inline size_t wait_dequeue_bulk(It itemFirst, size_t max)
{
size_t count = 0;
max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
while (count != max) {
count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
}
return count;
}
// Attempts to dequeue several elements from the queue.
// Returns the number of items actually dequeued, which can
// be 0 if the timeout expires while waiting for elements,
// and at most max.
// Using a negative timeout indicates an indefinite timeout,
// and is thus functionally equivalent to calling wait_dequeue_bulk.
// Never allocates. Thread-safe.
template<typename It>
inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::int64_t timeout_usecs)
{
size_t count = 0;
max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
while (count != max) {
count += inner.template try_dequeue_bulk<It&>(itemFirst, max - count);
}
return count;
}
// Attempts to dequeue several elements from the queue.
// Returns the number of items actually dequeued, which can
// be 0 if the timeout expires while waiting for elements,
// and at most max.
// Never allocates. Thread-safe.
template<typename It, typename Rep, typename Period>
inline size_t wait_dequeue_bulk_timed(It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
{
return wait_dequeue_bulk_timed<It&>(itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
}
// Attempts to dequeue several elements from the queue using an explicit consumer token.
// Returns the number of items actually dequeued, which will
// always be at least one (this method blocks until the queue
// is non-empty) and at most max.
// Never allocates. Thread-safe.
template<typename It>
inline size_t wait_dequeue_bulk(consumer_token_t& token, It itemFirst, size_t max)
{
size_t count = 0;
max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max);
while (count != max) {
count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
}
return count;
}
// Attempts to dequeue several elements from the queue using an explicit consumer token.
// Returns the number of items actually dequeued, which can
// be 0 if the timeout expires while waiting for elements,
// and at most max.
// Using a negative timeout indicates an indefinite timeout,
// and is thus functionally equivalent to calling wait_dequeue_bulk.
// Never allocates. Thread-safe.
template<typename It>
inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::int64_t timeout_usecs)
{
size_t count = 0;
max = (size_t)sema->waitMany((LightweightSemaphore::ssize_t)(ssize_t)max, timeout_usecs);
while (count != max) {
count += inner.template try_dequeue_bulk<It&>(token, itemFirst, max - count);
}
return count;
}
// Attempts to dequeue several elements from the queue using an explicit consumer token.
// Returns the number of items actually dequeued, which can
// be 0 if the timeout expires while waiting for elements,
// and at most max.
// Never allocates. Thread-safe.
template<typename It, typename Rep, typename Period>
inline size_t wait_dequeue_bulk_timed(consumer_token_t& token, It itemFirst, size_t max, std::chrono::duration<Rep, Period> const& timeout)
{
return wait_dequeue_bulk_timed<It&>(token, itemFirst, max, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
}
// Returns an estimate of the total number of elements currently in the queue. This
// estimate is only accurate if the queue has completely stabilized before it is called
// (i.e. all enqueue and dequeue operations have completed and their memory effects are
// visible on the calling thread, and no further operations start while this method is
// being called).
// Thread-safe.
inline size_t size_approx() const
{
return (size_t)sema->availableApprox();
}
// Returns true if the underlying atomic variables used by
// the queue are lock-free (they should be on most platforms).
// Thread-safe.
static bool is_lock_free()
{
return ConcurrentQueue::is_lock_free();
}
private:
template<typename U, typename A1, typename A2>
static inline U* create(A1&& a1, A2&& a2)
{
void* p = (Traits::malloc)(sizeof(U));
return p != nullptr ? new (p) U(std::forward<A1>(a1), std::forward<A2>(a2)) : nullptr;
}
template<typename U>
static inline void destroy(U* p)
{
if (p != nullptr) {
p->~U();
}
(Traits::free)(p);
}
private:
ConcurrentQueue inner;
std::unique_ptr<LightweightSemaphore, void (*)(LightweightSemaphore*)> sema;
};
template<typename T, typename Traits>
inline void swap(BlockingConcurrentQueue<T, Traits>& a, BlockingConcurrentQueue<T, Traits>& b) MOODYCAMEL_NOEXCEPT
{
a.swap(b);
}
} // end namespace moodycamel

File diff suppressed because it is too large Load Diff

View File

@ -27,10 +27,10 @@
#include "fsperfstats.h"
namespace FSPerfStats
{
int RecordSceneTime::writeBuffer{0};
bool RecordSceneTime::collectionEnabled{true};
std::array< typename RecordSceneTime::StatsArray, 2 > RecordSceneTime::stats{ {} };
std::atomic<int> StatsRecorder::writeBuffer{0};
bool StatsRecorder::collectionEnabled{true};
std::array<StatsRecorder::StatsTypeMatrix,2> StatsRecorder::statsDoubleBuffer{ {} };
std::array<StatsRecorder::StatsSummaryArray,2> StatsRecorder::max{ {} };
std::array<StatsRecorder::StatsSummaryArray,2> StatsRecorder::sum{ {} };
}

View File

@ -28,19 +28,28 @@
* $/LicenseInfo$
*/
#include <atomic>
#include <chrono>
#include <array>
#include <unordered_map>
#include "lluuid.h"
#include "lltimer.h"
#include "blockingconcurrentqueue.h"
#include "llapp.h"
#include "fstelemetry.h"
extern LLUUID gAgentID;
namespace FSPerfStats
{
enum class ObjStatType_t{
RENDER_GEOMETRY=0,
RENDER_SHADOWS,
RENDER_COMBINED,
STATS_COUNT
enum class ObjType_t{
OT_GENERAL=0, // Also Unknown. Used for n/a type stats such as scenery
OT_AVATAR,
OT_ATTACHMENT,
OT_HUD,
OT_COUNT
};
enum class SceneStatType_t{
enum class StatType_t{
RENDER_GEOMETRY=0,
RENDER_SHADOWS,
RENDER_HUDS,
@ -57,243 +66,251 @@ namespace FSPerfStats
STATS_COUNT
};
using ObjStatType = ObjStatType_t;
using SceneStatType = SceneStatType_t;
struct StatsRecord
{
StatType_t statType;
ObjType_t objType;
LLUUID objID;
uint64_t time;
};
class RecordSceneTime
{
using StatsEnum = SceneStatType;
using StatsArray = std::array<uint64_t, static_cast<size_t>(StatsEnum::STATS_COUNT)>;
// using StatsBlock = std::unordered_map<T, StatsArray>;
static int writeBuffer;
static std::array<StatsArray,2> stats;
static bool collectionEnabled;
RecordSceneTime(const RecordSceneTime&) = delete;
RecordSceneTime() = delete;
const StatsEnum type;
std::chrono::steady_clock::time_point start;
class StatsRecorder{
using Queue = moodycamel::BlockingConcurrentQueue<StatsRecord>;
public:
static inline void enable(){collectionEnabled=true;};
static inline void disable(){collectionEnabled=false;};
static inline bool enabled(){return(collectionEnabled);};
RecordSceneTime(SceneStatType type):start{std::chrono::steady_clock::now()}, type{type} {}
~RecordSceneTime()
{
auto val = std::chrono::duration<uint64_t, std::nano>(std::chrono::steady_clock::now() - start).count();
stats[writeBuffer][static_cast<size_t>(type)] += val;
};
static inline void toggleBuffer()
static inline StatsRecorder& getInstance()
{
if(enabled())
{
// stats[writeBuffer][static_cast<size_t>(SceneStatType::RENDER_FPS)] = LLTrace::get_frame_recording().getPeriodMeanPerSec(LLStatViewer::FPS,3); // last 3 Frames
writeBuffer = (writeBuffer+1)%2;
}; // not we are relying on atomic updates here. The risk is low and would cause minor errors in the stats display.
auto& statsArray = stats[writeBuffer];
std::fill_n(statsArray.begin() ,static_cast<size_t>(SceneStatType::STATS_COUNT),0);
static StatsRecorder instance;
// volatile int dummy{};
return instance;
}
static inline int getReadBufferIndex(){return (writeBuffer+1)%2;};
static inline StatsArray getCurrentStatsBuffer(){ return stats[getReadBufferIndex()];}
static inline uint64_t get(StatsEnum type){return stats[getReadBufferIndex()][static_cast<size_t>(type)];}
};
template <typename T>
class RecordObjectTime
{
using StatsEnum = ObjStatType;
using StatsArray = std::array<uint64_t, static_cast<size_t>(StatsEnum::STATS_COUNT)>;
using StatsBlock = std::unordered_map<T, StatsArray>;
static int writeBuffer;
static std::array<StatsBlock,2> stats;
static inline void send(const StatsRecord& u){StatsRecorder::getInstance().q.enqueue(u);};
static inline void endFrame(){StatsRecorder::getInstance().q.enqueue(StatsRecord{});};
static std::array<StatsArray,2> max;
static std::array<StatsArray,2> sum;
static inline void enable() { collectionEnabled=true; };
static inline void disable() { collectionEnabled=false; };
static inline bool enabled() { return(collectionEnabled); };
static inline int getReadBufferIndex() { return (writeBuffer ^ 1); };
// static inline const StatsTypeMatrix& getCurrentStatsMatrix(){ return statsDoubleBuffer[getReadBufferIndex()];}
static inline uint64_t get(ObjType_t otype, LLUUID id, StatType_t type)
{
return statsDoubleBuffer[getReadBufferIndex()][static_cast<size_t>(otype)][id][static_cast<size_t>(type)];
}
static inline uint64_t getSceneStat(StatType_t type)
{
static const LLUUID null_id{};
return statsDoubleBuffer[getReadBufferIndex()][static_cast<size_t>(ObjType_t::OT_GENERAL)][null_id][static_cast<size_t>(type)];
}
static inline uint64_t getSum(ObjType_t otype, StatType_t type)
{
return sum[getReadBufferIndex()][static_cast<size_t>(otype)][static_cast<size_t>(type)];
}
static inline uint64_t getMax(ObjType_t otype, StatType_t type)
{
return max[getReadBufferIndex()][static_cast<size_t>(otype)][static_cast<size_t>(type)];
}
private:
StatsRecorder():q(100),t(&StatsRecorder::run)
{
// create a queue
// create a thread to consume from the queue
t.detach();
}
// StatsArray is a uint64_t for each possible statistic type.
using StatsArray = std::array<uint64_t, static_cast<size_t>(FSPerfStats::StatType_t::STATS_COUNT)>;
using StatsMap = std::unordered_map<LLUUID, StatsArray, FSUUIDHash>;
using StatsTypeMatrix = std::array<StatsMap, static_cast<size_t>(FSPerfStats::ObjType_t::OT_COUNT)>;
using StatsSummaryArray = std::array<StatsArray, static_cast<size_t>(FSPerfStats::ObjType_t::OT_COUNT)>;
static std::atomic<int> writeBuffer;
static std::array<StatsTypeMatrix,2> statsDoubleBuffer;
static std::array<StatsSummaryArray,2> max;
static std::array<StatsSummaryArray,2> sum;
static bool collectionEnabled;
RecordObjectTime(const RecordObjectTime&) = delete;
RecordObjectTime() = delete;
const T key;
const StatsEnum type;
std::chrono::steady_clock::time_point start;
public:
static inline void enable(){collectionEnabled=true;};
static inline void disable(){collectionEnabled=false;};
static inline bool enabled(){return(collectionEnabled);};
RecordObjectTime(T key, ObjStatType type):start{std::chrono::steady_clock::now()}, key{key}, type{type} {}
~RecordObjectTime()
{
using ST = StatsEnum;
void processUpdate(const StatsRecord& upd)
{
FSZone;
using ST = StatType_t;
// Note: nullptr is used as the key for global stats
constexpr auto period{500};
auto val = std::chrono::duration<uint64_t, std::nano>(std::chrono::steady_clock::now() - start).count();
if(key)
if(upd.statType == StatType_t::RENDER_GEOMETRY && upd.objType == ObjType_t::OT_GENERAL && upd.objID == LLUUID{} && upd.time == 0)
{
stats[writeBuffer][key][static_cast<size_t>(type)] += val;
stats[writeBuffer][key][static_cast<size_t>(ST::RENDER_COMBINED)] += val;
if(max[writeBuffer][static_cast<size_t>(type)] < stats[writeBuffer][key][static_cast<size_t>(type)])
{
max[writeBuffer][static_cast<size_t>(type)] = stats[writeBuffer][key][static_cast<size_t>(type)];
}
if(max[writeBuffer][static_cast<size_t>(ST::RENDER_COMBINED)] < stats[writeBuffer][key][static_cast<size_t>(ST::RENDER_COMBINED)])
{
max[writeBuffer][static_cast<size_t>(ST::RENDER_COMBINED)] = stats[writeBuffer][key][static_cast<size_t>(ST::RENDER_COMBINED)];
}
sum[writeBuffer][static_cast<size_t>(type)] += val;
sum[writeBuffer][static_cast<size_t>(ST::RENDER_COMBINED)] += val;
toggleBuffer();
return;
}
};
StatsMap& stm {statsDoubleBuffer[writeBuffer][static_cast<size_t>(upd.objType)]};
auto& key{upd.objID};
auto val {upd.time};
auto type {upd.statType};
FSZoneText(key.asString().c_str(), 36);
FSZoneValue(val);
auto& thisAsset = stm[key];
thisAsset[static_cast<size_t>(type)] += val;
thisAsset[static_cast<size_t>(ST::RENDER_COMBINED)] += val;
FSZoneValue(thisAsset[static_cast<size_t>(type)]);
sum[writeBuffer][static_cast<size_t>(upd.objType)][static_cast<size_t>(type)] += val;
sum[writeBuffer][static_cast<size_t>(upd.objType)][static_cast<size_t>(ST::RENDER_COMBINED)] += val;
FSZoneValue(static_cast<size_t>(upd.objType));
FSZoneValue(statsDoubleBuffer[writeBuffer][static_cast<size_t>(upd.objType)][key][static_cast<size_t>(ST::RENDER_COMBINED)]);
if(max[writeBuffer][static_cast<size_t>(upd.objType)][static_cast<size_t>(type)] < stm[key][static_cast<size_t>(type)])
{
max[writeBuffer][static_cast<size_t>(upd.objType)][static_cast<size_t>(type)] = stm[key][static_cast<size_t>(type)];
}
if(max[writeBuffer][static_cast<size_t>(upd.objType)][static_cast<size_t>(ST::RENDER_COMBINED)] < stm[key][static_cast<size_t>(ST::RENDER_COMBINED)])
{
max[writeBuffer][static_cast<size_t>(upd.objType)][static_cast<size_t>(ST::RENDER_COMBINED)] = stm[key][static_cast<size_t>(ST::RENDER_COMBINED)];
}
}
static inline void toggleBuffer()
{
using ST = StatsEnum;
FSPlot("q size", static_cast<int64_t>(StatsRecorder::getInstance().q.size_approx()));
FSZone;
using ST = StatType_t;
// auto& statsMap = stats[writeBuffer];
// for(auto& stat_entry : statsMap)
// {
// auto val = stat_entry.second[static_cast<size_t>(ST::RENDER_COMBINED)];
// auto avg = stats[(writeBuffer+1)%2][stat_entry.first][static_cast<size_t>(ST::RENDER_COMBINED)];
// stat_entry.second[static_cast<size_t>(ST::RENDER_COMBINED)] = avg + (val/500) - (avg/500);
// }
if(enabled())
{
writeBuffer = (writeBuffer+1)%2;
}; // note we are relying on atomic updates here. The risk is low and would cause minor errors in the stats display.
auto& statsMap = stats[writeBuffer];
auto& statsMap = statsDoubleBuffer[writeBuffer][static_cast<size_t>(ObjType_t::OT_ATTACHMENT)];
for(auto& stat_entry : statsMap)
{
std::fill_n(stat_entry.second.begin() ,static_cast<size_t>(ST::STATS_COUNT),0);
auto val = stat_entry.second[static_cast<size_t>(ST::RENDER_COMBINED)];
auto avg = statsDoubleBuffer[writeBuffer ^ 1][static_cast<size_t>(ObjType_t::OT_ATTACHMENT)][stat_entry.first][static_cast<size_t>(ST::RENDER_COMBINED)];
stat_entry.second[static_cast<size_t>(ST::RENDER_COMBINED)] = avg + (val/100) - (avg/100);
}
if(enabled())
{
writeBuffer ^= 1;
}; // note we are relying on atomic updates here. The risk is low and would cause minor errors in the stats display.
auto& statsTypeMatrix = statsDoubleBuffer[writeBuffer];
for(auto& statsMap : statsTypeMatrix)
{
FSZoneN("Clear stats maps");
for(auto& stat_entry : statsMap)
{
std::fill_n(stat_entry.second.begin() ,static_cast<size_t>(ST::STATS_COUNT),0);
}
statsMap.clear();
}
for(int i=0; i< static_cast<size_t>(ObjType_t::OT_COUNT); i++)
{
FSZoneN("clear max/sum");
max[writeBuffer][i].fill(0);
sum[writeBuffer][i].fill(0);
}
statsMap.clear();
std::fill_n(max[writeBuffer].begin(),static_cast<size_t>(ST::STATS_COUNT),0);
std::fill_n(sum[writeBuffer].begin(),static_cast<size_t>(ST::STATS_COUNT),0);
}
static inline int getReadbufferIndex(){return (writeBuffer+1)%2;};
static inline StatsBlock& getCurrentStatsBuffer(){ return stats[(writeBuffer+1)%2]; }
static inline uint64_t getMax(StatsEnum type){return max[(writeBuffer+1)%2][static_cast<size_t>(type)];}
static inline uint64_t getSum(StatsEnum type){return sum[(writeBuffer+1)%2][static_cast<size_t>(type)];}
static inline uint64_t getNum(){return stats[(writeBuffer+1)%2].size();}
static inline uint64_t get(T key, StatsEnum type){return stats[(writeBuffer+1)%2][key][static_cast<size_t>(type)];}
};
template <typename T>
class RecordAttachmentTime
static void run()
{
StatsRecord upd;
auto& instance {StatsRecorder::getInstance()};
FSThreadName( "PerfStats" );
while( !LLApp::isExiting() )
{
if(instance.q.wait_dequeue_timed(upd, std::chrono::milliseconds(5)))
{
instance.processUpdate(upd);
}
}
}
Queue q;
std::thread t;
~StatsRecorder() = default;
StatsRecorder(const StatsRecorder&) = delete;
StatsRecorder& operator=(const StatsRecorder&) = delete;
};
// std::chrono::duration<double> getTime(){
// auto begin= std::chrono::system_clock::now();
// for ( size_t i= 0; i <= tenMill; ++i){
// StatsRecorder::getInstance();
// }
// return std::chrono::system_clock::now() - begin;
// };
template <enum ObjType_t ObjType>
class RecordTime
{
using StatsEnum = ObjStatType;
using StatsArray = std::array<uint64_t, static_cast<size_t>(StatsEnum::STATS_COUNT)>;
using StatsBlock = std::unordered_map<T, StatsArray>;
static int writeBuffer;
static std::array<StatsBlock,2> stats;
static std::array<StatsArray,2> max;
static std::array<StatsArray,2> sum;
static bool collectionEnabled;
RecordAttachmentTime(const RecordAttachmentTime&) = delete;
RecordAttachmentTime() = delete;
const T key;
const StatsEnum type;
std::chrono::steady_clock::time_point start;
private:
RecordTime(const RecordTime&) = delete;
RecordTime() = delete;
const StatType_t type;
const decltype(ObjType) objType;
const LLUUID objID;
U64 start;
RecordTime( StatType_t type ){};//
public:
static inline void enable(){collectionEnabled=true;};
static inline void disable(){collectionEnabled=false;};
static inline bool enabled(){return(collectionEnabled);};
RecordAttachmentTime(T key, ObjStatType type):start{std::chrono::steady_clock::now()}, key{key}, type{type} {}
~RecordAttachmentTime()
RecordTime( const LLUUID id, StatType_t type ):start{LLTimer::getCurrentClockCount()}, type{type}, objType{ObjType}, objID{id}{};
~RecordTime()
{
using ST = StatsEnum;
// Note: nullptr is used as the key for global stats
auto val = std::chrono::duration<uint64_t, std::nano>(std::chrono::steady_clock::now() - start).count();
stats[writeBuffer][key][static_cast<size_t>(type)] += val;
stats[writeBuffer][key][static_cast<size_t>(ST::RENDER_COMBINED)] += val;
if(max[writeBuffer][static_cast<size_t>(type)] < stats[writeBuffer][key][static_cast<size_t>(type)])
{
max[writeBuffer][static_cast<size_t>(type)] = stats[writeBuffer][key][static_cast<size_t>(type)];
}
if(max[writeBuffer][static_cast<size_t>(ST::RENDER_COMBINED)] < stats[writeBuffer][key][static_cast<size_t>(ST::RENDER_COMBINED)])
{
max[writeBuffer][static_cast<size_t>(ST::RENDER_COMBINED)] = stats[writeBuffer][key][static_cast<size_t>(ST::RENDER_COMBINED)];
}
sum[writeBuffer][static_cast<size_t>(type)] += val;
sum[writeBuffer][static_cast<size_t>(ST::RENDER_COMBINED)] += val;
FSZoneC(tracy::Color::Red);
auto val = LLTimer::getCurrentClockCount() - start;
FSZoneValue(val);
FSZoneValue(static_cast<U64>(objType));
FSZoneText(objID.asString().c_str(), 36);
StatsRecord stat{type, objType, objID, val};
StatsRecorder::send(std::move(stat));
};
static inline void toggleBuffer()
{
using ST = StatsEnum;
if(enabled())
{
writeBuffer = (writeBuffer+1)%2;
}; // note we are relying on atomic updates here. The risk is low and would cause minor errors in the stats display.
auto& statsMap = stats[writeBuffer];
for(auto& stat_entry : statsMap)
{
std::fill_n(stat_entry.second.begin() ,static_cast<size_t>(ST::STATS_COUNT),0);
}
statsMap.clear();
std::fill_n(max[writeBuffer].begin(),static_cast<size_t>(ST::STATS_COUNT),0);
std::fill_n(sum[writeBuffer].begin(),static_cast<size_t>(ST::STATS_COUNT),0);
}
static inline int getReadbufferIndex(){return (writeBuffer+1)%2;};
static inline StatsBlock& getCurrentStatsBuffer(){ return stats[(writeBuffer+1)%2]; }
static inline uint64_t getMax(StatsEnum type){return max[(writeBuffer+1)%2][static_cast<size_t>(type)];}
static inline uint64_t getSum(StatsEnum type){return sum[(writeBuffer+1)%2][static_cast<size_t>(type)];}
static inline uint64_t getNum(){return stats[(writeBuffer+1)%2].size();}
static inline uint64_t get(T key, StatsEnum type){return stats[(writeBuffer+1)%2][key][static_cast<size_t>(type)];}
};
static inline void toggleBuffer()
{
// RecordObjectTime<LLVOAvatar*>::toggleBuffer();
RecordSceneTime::toggleBuffer();
}
template< typename T >
int RecordObjectTime<T>::writeBuffer{0};
inline double raw_to_ns(U64 raw) { return (static_cast<double>(raw) * 1000000000.0) * get_timer_info().mClockFrequencyInv; };
inline double raw_to_us(U64 raw) { return (static_cast<double>(raw) * 1000000.0) * get_timer_info().mClockFrequencyInv; };
inline double raw_to_ms(U64 raw) { return (static_cast<double>(raw) * 1000.0) * get_timer_info().mClockFrequencyInv; };
template< typename T >
bool RecordObjectTime<T>::collectionEnabled{true};
template< typename T >
std::array< typename RecordObjectTime< T >::StatsArray, 2 > RecordObjectTime<T>::max;
template< typename T >
std::array< typename RecordObjectTime< T >::StatsArray, 2 > RecordObjectTime<T>::sum;
template< typename T >
std::array< typename RecordObjectTime< T >::StatsBlock, 2 > RecordObjectTime< T >::stats{ {{}} };
template< typename T >
int RecordAttachmentTime<T>::writeBuffer{0};
template< typename T >
bool RecordAttachmentTime<T>::collectionEnabled{true};
template< typename T >
std::array< typename RecordAttachmentTime< T >::StatsArray, 2 > RecordAttachmentTime<T>::max;
template< typename T >
std::array< typename RecordAttachmentTime< T >::StatsArray, 2 > RecordAttachmentTime<T>::sum;
template< typename T >
std::array< typename RecordAttachmentTime< T >::StatsBlock, 2 > RecordAttachmentTime< T >::stats{ {{}} };
using RecordSceneTime = RecordTime<ObjType_t::OT_GENERAL>;
using RecordAvatarTime = RecordTime<ObjType_t::OT_AVATAR>;
using RecordAttachmentTime = RecordTime<ObjType_t::OT_ATTACHMENT>;
}// namespace FSPerfStats
// <FS:Beq> helper function
using RATptr = std::unique_ptr<FSPerfStats::RecordAttachmentTime>;
template <typename T>
static inline RATptr trackMyAttachment(const T * vobj)
{
if( !vobj ){return nullptr;};
const T* rootAtt{vobj};
if( rootAtt->isAttachment() )
{
FSZone;
while( !rootAtt->isRootEdit() )
{
rootAtt = (T*)(rootAtt->getParent());
}
if( ((T*)(rootAtt->getParent()))->getID() == gAgentID )
{
#if TRACY_ENABLE
FSZoneNC( "trackMyAttachment:self", tracy::Color::Red );
auto& str = rootAtt->getAttachmentItemName();
FSZoneText(str.c_str(), str.size());
FSZoneText( rootAtt->getAttachmentItemID().asString().c_str(), 36);
#endif
return( std::make_unique<FSPerfStats::RecordAttachmentTime>( rootAtt->getAttachmentItemID(), FSPerfStats::StatType_t::RENDER_GEOMETRY) );
}
}
return nullptr;
};
// </FS:Beq>
#endif

View File

@ -43,6 +43,8 @@
#define FSZoneN( name ) ZoneNamedN( ___tracy_scoped_zone, name, FSTelemetry::active)
#define FSZoneC( color ) ZoneNamedC( ___tracy_scoped_zone, color, FSTelemetry::active)
#define FSZoneNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, FSTelemetry::active)
#define FSZoneText( text, size ) ZoneText( text, size )
#define FSZoneValue( num_uint64 ) ZoneValue( num_uint64 )
#define FSPlot( name, value ) TracyPlot( name, value)
#define FSFrameMark FrameMark
#define FSThreadName( name ) tracy::SetThreadName( name )
@ -58,10 +60,12 @@
#define FSZoneN( name )
#define FSZoneC( color )
#define FSZoneNC( name, color )
#define FSZoneText( text, size )
#define FSZoneValue( num_uint64 )
#define FSPlot( name, value )
#define FSFrameMark
#define FSThreadName( name )
#define FSMessageL ( message )
#define FSMessageL( message )
#define FSTelemetryIsConnected
#endif // TRACY_ENABLE

View File

@ -0,0 +1,411 @@
// Provides an efficient implementation of a semaphore (LightweightSemaphore).
// This is an extension of Jeff Preshing's sempahore implementation (licensed
// under the terms of its separate zlib license) that has been adapted and
// extended by Cameron Desrochers.
#pragma once
#include <cstddef> // For std::size_t
#include <atomic>
#include <type_traits> // For std::make_signed<T>
#if defined(_WIN32)
// Avoid including windows.h in a header; we only need a handful of
// items, so we'll redeclare them here (this is relatively safe since
// the API generally has to remain stable between Windows versions).
// I know this is an ugly hack but it still beats polluting the global
// namespace with thousands of generic names or adding a .cpp for nothing.
extern "C" {
struct _SECURITY_ATTRIBUTES;
__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
}
#elif defined(__MACH__)
#include <mach/mach.h>
#elif defined(__unix__)
#include <semaphore.h>
#endif
namespace moodycamel
{
namespace details
{
// Code in the mpmc_sema namespace below is an adaptation of Jeff Preshing's
// portable + lightweight semaphore implementations, originally from
// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
// LICENSE:
// Copyright (c) 2015 Jeff Preshing
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must not
// claim that you wrote the original software. If you use this software
// in a product, an acknowledgement in the product documentation would be
// appreciated but is not required.
// 2. Altered source versions must be plainly marked as such, and must not be
// misrepresented as being the original software.
// 3. This notice may not be removed or altered from any source distribution.
#if defined(_WIN32)
class Semaphore
{
private:
void* m_hSema;
Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
const long maxLong = 0x7fffffff;
m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
assert(m_hSema);
}
~Semaphore()
{
CloseHandle(m_hSema);
}
bool wait()
{
const unsigned long infinite = 0xffffffff;
return WaitForSingleObject(m_hSema, infinite) == 0;
}
bool try_wait()
{
return WaitForSingleObject(m_hSema, 0) == 0;
}
bool timed_wait(std::uint64_t usecs)
{
return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) == 0;
}
void signal(int count = 1)
{
while (!ReleaseSemaphore(m_hSema, count, nullptr));
}
};
#elif defined(__MACH__)
//---------------------------------------------------------
// Semaphore (Apple iOS and OSX)
// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
//---------------------------------------------------------
class Semaphore
{
private:
semaphore_t m_sema;
Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
kern_return_t rc = semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
assert(rc == KERN_SUCCESS);
(void)rc;
}
~Semaphore()
{
semaphore_destroy(mach_task_self(), m_sema);
}
bool wait()
{
return semaphore_wait(m_sema) == KERN_SUCCESS;
}
bool try_wait()
{
return timed_wait(0);
}
bool timed_wait(std::uint64_t timeout_usecs)
{
mach_timespec_t ts;
ts.tv_sec = static_cast<unsigned int>(timeout_usecs / 1000000);
ts.tv_nsec = static_cast<int>((timeout_usecs % 1000000) * 1000);
// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
kern_return_t rc = semaphore_timedwait(m_sema, ts);
return rc == KERN_SUCCESS;
}
void signal()
{
while (semaphore_signal(m_sema) != KERN_SUCCESS);
}
void signal(int count)
{
while (count-- > 0)
{
while (semaphore_signal(m_sema) != KERN_SUCCESS);
}
}
};
#elif defined(__unix__)
//---------------------------------------------------------
// Semaphore (POSIX, Linux)
//---------------------------------------------------------
class Semaphore
{
private:
sem_t m_sema;
Semaphore(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
Semaphore& operator=(const Semaphore& other) MOODYCAMEL_DELETE_FUNCTION;
public:
Semaphore(int initialCount = 0)
{
assert(initialCount >= 0);
int rc = sem_init(&m_sema, 0, static_cast<unsigned int>(initialCount));
assert(rc == 0);
(void)rc;
}
~Semaphore()
{
sem_destroy(&m_sema);
}
bool wait()
{
// http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
int rc;
do {
rc = sem_wait(&m_sema);
} while (rc == -1 && errno == EINTR);
return rc == 0;
}
bool try_wait()
{
int rc;
do {
rc = sem_trywait(&m_sema);
} while (rc == -1 && errno == EINTR);
return rc == 0;
}
bool timed_wait(std::uint64_t usecs)
{
struct timespec ts;
const int usecs_in_1_sec = 1000000;
const int nsecs_in_1_sec = 1000000000;
clock_gettime(CLOCK_REALTIME, &ts);
ts.tv_sec += (time_t)(usecs / usecs_in_1_sec);
ts.tv_nsec += (long)(usecs % usecs_in_1_sec) * 1000;
// sem_timedwait bombs if you have more than 1e9 in tv_nsec
// so we have to clean things up before passing it in
if (ts.tv_nsec >= nsecs_in_1_sec) {
ts.tv_nsec -= nsecs_in_1_sec;
++ts.tv_sec;
}
int rc;
do {
rc = sem_timedwait(&m_sema, &ts);
} while (rc == -1 && errno == EINTR);
return rc == 0;
}
void signal()
{
while (sem_post(&m_sema) == -1);
}
void signal(int count)
{
while (count-- > 0)
{
while (sem_post(&m_sema) == -1);
}
}
};
#else
#error Unsupported platform! (No semaphore wrapper available)
#endif
} // end namespace details
//---------------------------------------------------------
// LightweightSemaphore
//---------------------------------------------------------
class LightweightSemaphore
{
public:
typedef std::make_signed<std::size_t>::type ssize_t;
private:
std::atomic<ssize_t> m_count;
details::Semaphore m_sema;
int m_maxSpins;
bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
{
ssize_t oldCount;
int spin = m_maxSpins;
while (--spin >= 0)
{
oldCount = m_count.load(std::memory_order_relaxed);
if ((oldCount > 0) && m_count.compare_exchange_strong(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
return true;
std::atomic_signal_fence(std::memory_order_acquire); // Prevent the compiler from collapsing the loop.
}
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
if (oldCount > 0)
return true;
if (timeout_usecs < 0)
{
if (m_sema.wait())
return true;
}
if (timeout_usecs > 0 && m_sema.timed_wait((std::uint64_t)timeout_usecs))
return true;
// At this point, we've timed out waiting for the semaphore, but the
// count is still decremented indicating we may still be waiting on
// it. So we have to re-adjust the count, but only if the semaphore
// wasn't signaled enough times for us too since then. If it was, we
// need to release the semaphore too.
while (true)
{
oldCount = m_count.load(std::memory_order_acquire);
if (oldCount >= 0 && m_sema.try_wait())
return true;
if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
return false;
}
}
ssize_t waitManyWithPartialSpinning(ssize_t max, std::int64_t timeout_usecs = -1)
{
assert(max > 0);
ssize_t oldCount;
int spin = m_maxSpins;
while (--spin >= 0)
{
oldCount = m_count.load(std::memory_order_relaxed);
if (oldCount > 0)
{
ssize_t newCount = oldCount > max ? oldCount - max : 0;
if (m_count.compare_exchange_strong(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
return oldCount - newCount;
}
std::atomic_signal_fence(std::memory_order_acquire);
}
oldCount = m_count.fetch_sub(1, std::memory_order_acquire);
if (oldCount <= 0)
{
if ((timeout_usecs == 0) || (timeout_usecs < 0 && !m_sema.wait()) || (timeout_usecs > 0 && !m_sema.timed_wait((std::uint64_t)timeout_usecs)))
{
while (true)
{
oldCount = m_count.load(std::memory_order_acquire);
if (oldCount >= 0 && m_sema.try_wait())
break;
if (oldCount < 0 && m_count.compare_exchange_strong(oldCount, oldCount + 1, std::memory_order_relaxed, std::memory_order_relaxed))
return 0;
}
}
}
if (max > 1)
return 1 + tryWaitMany(max - 1);
return 1;
}
public:
LightweightSemaphore(ssize_t initialCount = 0, int maxSpins = 10000) : m_count(initialCount), m_maxSpins(maxSpins)
{
assert(initialCount >= 0);
assert(maxSpins >= 0);
}
bool tryWait()
{
ssize_t oldCount = m_count.load(std::memory_order_relaxed);
while (oldCount > 0)
{
if (m_count.compare_exchange_weak(oldCount, oldCount - 1, std::memory_order_acquire, std::memory_order_relaxed))
return true;
}
return false;
}
bool wait()
{
return tryWait() || waitWithPartialSpinning();
}
bool wait(std::int64_t timeout_usecs)
{
return tryWait() || waitWithPartialSpinning(timeout_usecs);
}
// Acquires between 0 and (greedily) max, inclusive
ssize_t tryWaitMany(ssize_t max)
{
assert(max >= 0);
ssize_t oldCount = m_count.load(std::memory_order_relaxed);
while (oldCount > 0)
{
ssize_t newCount = oldCount > max ? oldCount - max : 0;
if (m_count.compare_exchange_weak(oldCount, newCount, std::memory_order_acquire, std::memory_order_relaxed))
return oldCount - newCount;
}
return 0;
}
// Acquires at least one, and (greedily) at most max
ssize_t waitMany(ssize_t max, std::int64_t timeout_usecs)
{
assert(max >= 0);
ssize_t result = tryWaitMany(max);
if (result == 0 && max > 0)
result = waitManyWithPartialSpinning(max, timeout_usecs);
return result;
}
ssize_t waitMany(ssize_t max)
{
ssize_t result = waitMany(max, -1);
assert(result > 0);
return result;
}
void signal(ssize_t count = 1)
{
assert(count >= 0);
ssize_t oldCount = m_count.fetch_add(count, std::memory_order_release);
ssize_t toRelease = -oldCount < count ? -oldCount : count;
if (toRelease > 0)
{
m_sema.signal((int)toRelease);
}
}
std::size_t availableApprox() const
{
ssize_t count = m_count.load(std::memory_order_relaxed);
return count > 0 ? static_cast<std::size_t>(count) : 0;
}
};
} // end namespace moodycamel

View File

@ -1633,7 +1633,7 @@ bool LLAppViewer::frame()
bool LLAppViewer::doFrame()
{
{
FSPerfStats::RecordSceneTime T (FSPerfStats::SceneStatType::RENDER_FRAME);
FSPerfStats::RecordSceneTime T (const LLUUID{}, FSPerfStats::StatType_t::RENDER_FRAME);
LLEventPump& mainloop(LLEventPumps::instance().obtain("mainloop"));
LLSD newFrame;
@ -1771,7 +1771,7 @@ bool LLAppViewer::doFrame()
// Update state based on messages, user input, object idle.
{
FSPerfStats::RecordSceneTime T (FSPerfStats::SceneStatType::RENDER_IDLE);
FSPerfStats::RecordSceneTime T (const LLUUID{}, FSPerfStats::StatType_t::RENDER_IDLE);
pauseMainloopTimeout(); // *TODO: Remove. Messages shouldn't be stalling for 20+ seconds!
@ -1851,7 +1851,7 @@ bool LLAppViewer::doFrame()
// of equal priority on Windows
if (milliseconds_to_sleep > 0)
{
FSPerfStats::RecordSceneTime T ( FSPerfStats::SceneStatType::RENDER_SLEEP );
FSPerfStats::RecordSceneTime T ( LLUUID{}, FSPerfStats::StatType_t::RENDER_SLEEP );
ms_sleep(milliseconds_to_sleep);
// also pause worker threads during this wait period
LLAppViewer::getTextureCache()->pause();
@ -1929,7 +1929,7 @@ bool LLAppViewer::doFrame()
if (fsLimitFramerate && LLStartUp::getStartupState() == STATE_STARTED && !gTeleportDisplay && !logoutRequestSent() && max_fps > F_APPROXIMATELY_ZERO)
{
// Sleep a while to limit frame rate.
FSPerfStats::RecordSceneTime T (FSPerfStats::SceneStatType::RENDER_FPSLIMIT);
FSPerfStats::RecordSceneTime T (const LLUUID{}, FSPerfStats::StatType_t::RENDER_FPSLIMIT);
F32 min_frame_time = 1.f / (F32)max_fps;
S32 milliseconds_to_sleep = llclamp((S32)((min_frame_time - frameTimer.getElapsedTimeF64()) * 1000.f), 0, 1000);
if (milliseconds_to_sleep > 0)
@ -1970,9 +1970,7 @@ bool LLAppViewer::doFrame()
FSFrameMark; // <FS:Beq> Tracy support delineate Frame
LLPROFILE_UPDATE();
}
FSPerfStats::RecordSceneTime::toggleBuffer();
FSPerfStats::RecordObjectTime<const LLVOAvatar*>::toggleBuffer();
FSPerfStats::RecordAttachmentTime<U32>::toggleBuffer();
FSPerfStats::StatsRecorder::endFrame();
return ! LLApp::isRunning();
}

View File

@ -453,24 +453,15 @@ void LLRenderPass::applyModelMatrix(const LLDrawInfo& params)
void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
{
FSZone;
// <FS:Beq> Capture render times
LLViewerObject* rootAtt{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
LLViewerObject* vobj = params.mFace->getViewerObject();
if(vobj->isAttachment())
{
auto par = (LLViewerObject*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLViewerObject*)par->getParent();
}
LL_INFOS() << "pushBatch recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << " as " << rootAtt->getAttachmentItemID().getCRC32() << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T = trackMyAttachment( vobj );
}
}
// </FS:Beq>

View File

@ -342,6 +342,7 @@ void LLDrawPoolAlpha::render(S32 pass)
void LLDrawPoolAlpha::renderAlphaHighlight(U32 mask)
{
FSZone;
for (LLCullResult::sg_iterator i = gPipeline.beginAlphaGroups(); i != gPipeline.endAlphaGroups(); ++i)
{
LLSpatialGroup* group = *i;
@ -354,23 +355,13 @@ void LLDrawPoolAlpha::renderAlphaHighlight(U32 mask)
{
LLDrawInfo& params = **k;
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* rootAtt{};
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
auto par = (LLViewerObject*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLViewerObject*)par->getParent();
}
LL_INFOS() << "recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << " as " << rootAtt->getAttachmentItemID().getCRC32() << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T = trackMyAttachment(vobj);
}
}
// </FS:Beq>
@ -499,6 +490,7 @@ void LLDrawPoolAlpha::RestoreTexSetup(bool tex_setup)
void LLDrawPoolAlpha::renderSimples(U32 mask, std::vector<LLDrawInfo*>& simples)
{
FSZone;
gPipeline.enableLightsDynamic();
simple_shader->bind();
simple_shader->bindTexture(LLShaderMgr::BUMP_MAP, LLViewerFetchedTexture::sFlatNormalImagep);
@ -509,6 +501,15 @@ void LLDrawPoolAlpha::renderSimples(U32 mask, std::vector<LLDrawInfo*>& simples)
bool use_shaders = gPipeline.canUseVertexShaders();
for (LLDrawInfo* draw : simples)
{
// <FS:Beq> Capture render times
FSZoneN("Simples");
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
auto vobj = draw->mFace?draw->mFace->getViewerObject():nullptr;
if(vobj && vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
// </FS:Beq>
bool tex_setup = TexSetup(draw, use_shaders, false, simple_shader);
LLGLEnableFunc stencil_test(GL_STENCIL_TEST, draw->mSelected, &LLGLCommonFunc::selected_stencil_test);
gGL.blendFunc((LLRender::eBlendFactor) draw->mBlendFuncSrc, (LLRender::eBlendFactor) draw->mBlendFuncDst, mAlphaSFactor, mAlphaDFactor);
@ -527,6 +528,15 @@ void LLDrawPoolAlpha::renderFullbrights(U32 mask, std::vector<LLDrawInfo*>& full
bool use_shaders = gPipeline.canUseVertexShaders();
for (LLDrawInfo* draw : fullbrights)
{
// <FS:Beq> Capture render times
FSZoneN("Fullbrights");
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
auto vobj = draw->mFace?draw->mFace->getViewerObject():nullptr;
if(vobj && vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
// </FS:Beq>
bool tex_setup = TexSetup(draw, use_shaders, false, fullbright_shader);
LLGLEnableFunc stencil_test(GL_STENCIL_TEST, draw->mSelected, &LLGLCommonFunc::selected_stencil_test);
@ -547,6 +557,16 @@ void LLDrawPoolAlpha::renderMaterials(U32 mask, std::vector<LLDrawInfo*>& materi
bool use_shaders = gPipeline.canUseVertexShaders();
for (LLDrawInfo* draw : materials)
{
// <FS:Beq> Capture render times
FSZoneN("Materials");
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
auto vobj = draw->mFace?draw->mFace->getViewerObject():nullptr;
if(vobj && vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
// </FS:Beq>
U32 mask = draw->mShaderMask;
llassert(mask < LLMaterial::SHADER_COUNT);
@ -629,6 +649,16 @@ void LLDrawPoolAlpha::renderEmissives(U32 mask, std::vector<LLDrawInfo*>& emissi
bool use_shaders = gPipeline.canUseVertexShaders();
for (LLDrawInfo* draw : emissives)
{
// <FS:Beq> Capture render times
FSZoneN("Emissives");
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
auto vobj = draw->mFace?draw->mFace->getViewerObject():nullptr;
if(vobj && vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
// </FS:Beq>
bool tex_setup = TexSetup(draw, use_shaders, false, emissive_shader);
drawEmissive(mask, draw);
RestoreTexSetup(tex_setup);
@ -702,23 +732,14 @@ void LLDrawPoolAlpha::renderAlpha(U32 mask, S32 pass)
}
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* rootAtt{};
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
auto par = (LLViewerObject*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLViewerObject*)par->getParent();
}
LL_INFOS() << "ALPHA recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << " as " << rootAtt->getAttachmentItemID().getCRC32() << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T = trackMyAttachment(vobj);
}
}
// </FS:Beq>

View File

@ -581,7 +581,7 @@ void LLDrawPoolAvatar::renderShadow(S32 pass)
{
return;
}
FSPerfStats::RecordObjectTime<const LLVOAvatar*> T(avatarp, FSPerfStats::ObjStatType::RENDER_SHADOWS);
FSPerfStats::RecordAvatarTime T(avatarp->getID(), FSPerfStats::StatType_t::RENDER_SHADOWS);
LLVOAvatar::AvatarOverallAppearance oa = avatarp->getOverallAppearance();
BOOL impostor = !LLPipeline::sImpostorRender && avatarp->isImpostor();
@ -1504,7 +1504,7 @@ void LLDrawPoolAvatar::renderAvatars(LLVOAvatar* single_avatar, S32 pass)
{
return;
}
FSPerfStats::RecordObjectTime<const LLVOAvatar*> T(avatarp, FSPerfStats::ObjStatType::RENDER_GEOMETRY);
FSPerfStats::RecordAvatarTime T(avatarp->getID(), FSPerfStats::StatType_t::RENDER_GEOMETRY);
// <FS:Zi> Add avatar hitbox debug
static LLCachedControl<bool> render_hitbox(gSavedSettings, "DebugRenderHitboxes", false);
@ -2282,21 +2282,11 @@ void LLDrawPoolAvatar::renderRigged(LLVOAvatar* avatar, U32 type, bool glow)
}
auto self = avatar->isSelf();
LLViewerObject * parentAttachment{nullptr};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(self && vobj->isAttachment())
{
LLViewerObject * vtop = vobj;
LLViewerObject * par = (LLViewerObject *) vobj->getParent();
while (par && !(par->asAvatar()))
{
vtop = par;
par = (LLViewerObject *)vtop->getParent();
}
parentAttachment = vtop;
T = trackMyAttachment(vobj);
}
FSPerfStats::RecordAttachmentTime<U32> T(parentAttachment?parentAttachment->getAttachmentItemID().getCRC32():0, FSPerfStats::ObjStatType::RENDER_GEOMETRY);
LLVolume* volume = vobj->getVolume();
S32 te = face->getTEOffset();
@ -2612,7 +2602,13 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)
{
continue;
}
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
// </FS:Beq>
LLVolume* volume = vobj->getVolume();
S32 te = face->getTEOffset();

View File

@ -641,28 +641,20 @@ void LLDrawPoolBump::endFullbrightShiny()
}
void LLDrawPoolBump::renderGroup(LLSpatialGroup* group, U32 type, U32 mask, BOOL texture = TRUE)
{
{
FSZone;
LLSpatialGroup::drawmap_elem_t& draw_info = group->mDrawMap[type];
for (LLSpatialGroup::drawmap_elem_t::iterator k = draw_info.begin(); k != draw_info.end(); ++k)
{
LLDrawInfo& params = **k;
// <FS:Beq> Capture render times
LLViewerObject* rootAtt{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
auto par = (LLViewerObject*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLViewerObject*)par->getParent();
}
LL_INFOS() << "recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << " as " << rootAtt->getAttachmentItemID().getCRC32() << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T= trackMyAttachment(vobj);
}
// </FS:Beq>
applyModelMatrix(params);
@ -1530,24 +1522,16 @@ void LLDrawPoolBump::renderBump(U32 type, U32 mask)
void LLDrawPoolBump::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
{
FSZone;
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* rootAtt{};
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
auto par = (LLViewerObject*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLViewerObject*)par->getParent();
}
// LL_INFOS() << "recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << " as " << rootAtt->getAttachmentItemID().getCRC32() << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T = trackMyAttachment(vobj);
}
}
// </FS:Beq>

View File

@ -141,23 +141,14 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
LLDrawInfo& params = **i;
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* rootAtt{};
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
auto par = (LLViewerObject*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLViewerObject*)par->getParent();
}
LL_INFOS() << "MATERIALS recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << " as " << rootAtt->getAttachmentItemID().getCRC32() << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T = trackMyAttachment(vobj);
}
}
// </FS:Beq>
@ -196,6 +187,18 @@ void LLDrawPoolMaterials::bindNormalMap(LLViewerTexture* tex)
void LLDrawPoolMaterials::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
{
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
}
// </FS:Beq>
applyModelMatrix(params);
bool tex_setup = false;

View File

@ -36,6 +36,7 @@
#include "llspatialpartition.h"
#include "llviewershadermgr.h"
#include "llrender.h"
#include "fsperfstats.h"
static LLGLSLShader* simple_shader = NULL;
static LLGLSLShader* fullbright_shader = NULL;
@ -152,6 +153,18 @@ void LLDrawPoolGlow::render(S32 pass)
void LLDrawPoolGlow::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
{
// <FS:Beq> Capture render times
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(params.mFace)
{
LLViewerObject* vobj = (LLViewerObject *)params.mFace->getViewerObject();
if(vobj->isAttachment())
{
T = trackMyAttachment(vobj);
}
}
// </FS:Beq>
//gGL.diffuseColor4ubv(params.mGlowColor.mV);
LLRenderPass::pushBatch(params, mask, texture, batch_textures);
}

View File

@ -228,12 +228,15 @@ BOOL LLViewerDynamicTexture::updateAllInstances()
BOOL ret = FALSE ;
for( S32 order = 0; order < ORDER_COUNT; order++ )
{
FSZone;
for (instance_list_t::iterator iter = LLViewerDynamicTexture::sInstances[order].begin();
iter != LLViewerDynamicTexture::sInstances[order].end(); ++iter)
{
FSZone;
LLViewerDynamicTexture *dynamicTexture = *iter;
if (dynamicTexture->needsRender())
{
{
FSZoneN("needsRender");
glClear(GL_DEPTH_BUFFER_BIT);
gDepthDirty = TRUE;
@ -241,13 +244,19 @@ BOOL LLViewerDynamicTexture::updateAllInstances()
dynamicTexture->setBoundTarget(use_fbo ? &gPipeline.mBake : nullptr);
dynamicTexture->preRender(); // Must be called outside of startRender()
result = FALSE;
{
FSZoneN("DynTexture->render");
if (dynamicTexture->render())
{
ret = TRUE ;
result = TRUE;
sNumRenders++;
}
}
{
FSZoneN("flush");
gGL.flush();
}
LLVertexBuffer::unbind();
dynamicTexture->setBoundTarget(nullptr);
dynamicTexture->postRender(result);

View File

@ -643,19 +643,10 @@ void renderFace(LLDrawable* drawable, LLFace *face)
LLVOVolume* vobj = drawable->getVOVolume();
if (vobj)
{
LLVOVolume* rootAtt{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>> T{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(vobj->isAttachment())
{
auto par = (LLVOVolume*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLVOVolume*)par->getParent();
}
// LL_INFOS() << "recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << LL_ENDL;
if(rootAtt){T = std::unique_ptr<FSPerfStats::RecordAttachmentTime<U32>>(new FSPerfStats::RecordAttachmentTime<U32>(rootAtt->getAttachmentItemID().getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY));}
T = trackMyAttachment(vobj);
}
LLVolume* volume = NULL;
@ -1255,6 +1246,11 @@ bool LLFace::canRenderAsMask()
{
return false;
}
// <FS:Beq> shortcircuit fully alpha faces
if(getViewerObject()->isHUDAttachment()){return false;};
if(te->getAlpha() == 0.0f && (te->getGlow() == 0.f)){FSZoneN("beqshortcircuit invisible");return true;}
// </FS:Beq>
LLMaterial* mat = te->getMaterialParams();
if (mat && mat->getDiffuseAlphaMode() == LLMaterial::DIFFUSE_ALPHA_MODE_BLEND)

View File

@ -53,6 +53,10 @@ const S32 BAR_LEFT_PAD = 2;
const S32 BAR_RIGHT_PAD = 5;
const S32 BAR_BOTTOM_PAD = 9;
constexpr auto AvType {FSPerfStats::ObjType_t::OT_AVATAR};
constexpr auto AttType {FSPerfStats::ObjType_t::OT_ATTACHMENT};
constexpr auto HudType {FSPerfStats::ObjType_t::OT_HUD};
constexpr auto SceneType {FSPerfStats::ObjType_t::OT_GENERAL};
class LLExceptionsContextMenu : public LLListContextMenu
{
public:
@ -164,44 +168,58 @@ void LLFloaterPerformance::showSelectedPanel(LLPanel* selected_panel)
void LLFloaterPerformance::draw()
{
const S32 NUM_PERIODS = 50;
constexpr auto NANOS = 1000000000;
constexpr auto MICROS = 1000000;
constexpr auto MILLIS = 1000;
static LLCachedControl<U32> fps_cap(gSavedSettings, "FramePerSecondLimit"); // user limited FPS
static LLCachedControl<U32> target_fps(gSavedSettings, "FSTargetFPS"); // desired FPS
static LLCachedControl<bool> auto_tune(gSavedSettings, "FSAutoTuneFPS"); // auto tune enabled?
static LLCachedControl<F32> max_render_cost(gSavedSettings, "RenderAvatarMaxART", 0);
static auto freq_divisor = get_timer_info().mClockFrequencyInv;
if (mUpdateTimer->hasExpired())
{
LLStringUtil::format_map_t args;
auto fps = LLTrace::get_frame_recording().getPeriodMedianPerSec(LLStatViewer::FPS, NUM_PERIODS);
getChild<LLTextBox>("fps_value")->setValue((S32)llround(fps));
auto tot_frame_time_ns = 1000000000/fps;
auto target_frame_time_ns = 1000000000/(target_fps==0?1:target_fps);
auto tot_avatar_time = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::getSum(FSPerfStats::ObjStatType::RENDER_COMBINED);
auto tot_huds_time = FSPerfStats::RecordSceneTime::get(FSPerfStats::SceneStatType::RENDER_HUDS) ;
auto tot_sleep_time = FSPerfStats::RecordSceneTime::get(FSPerfStats::SceneStatType::RENDER_SLEEP);
auto tot_ui_time = FSPerfStats::RecordSceneTime::get(FSPerfStats::SceneStatType::RENDER_UI);
auto tot_idle_time = FSPerfStats::RecordSceneTime::get(FSPerfStats::SceneStatType::RENDER_IDLE);
auto tot_limit_time = FSPerfStats::RecordSceneTime::get(FSPerfStats::SceneStatType::RENDER_FPSLIMIT);
auto tot_swap_time = FSPerfStats::RecordSceneTime::get(FSPerfStats::SceneStatType::RENDER_SWAP);
auto tot_frame_time_ns = NANOS/fps;
auto target_frame_time_ns = NANOS/(target_fps==0?1:target_fps);
auto tot_avatar_time_raw = FSPerfStats::StatsRecorder::getSum(AvType, FSPerfStats::StatType_t::RENDER_COMBINED);
auto tot_huds_time_raw = FSPerfStats::StatsRecorder::getSceneStat(FSPerfStats::StatType_t::RENDER_HUDS);
auto tot_sleep_time_raw = FSPerfStats::StatsRecorder::getSceneStat(FSPerfStats::StatType_t::RENDER_SLEEP);
auto tot_ui_time_raw = FSPerfStats::StatsRecorder::getSceneStat(FSPerfStats::StatType_t::RENDER_UI);
auto tot_idle_time_raw = FSPerfStats::StatsRecorder::getSceneStat(FSPerfStats::StatType_t::RENDER_IDLE);
auto tot_limit_time_raw = FSPerfStats::StatsRecorder::getSceneStat(FSPerfStats::StatType_t::RENDER_FPSLIMIT);
auto tot_swap_time_raw = FSPerfStats::StatsRecorder::getSceneStat(FSPerfStats::StatType_t::RENDER_SWAP);
// once the rest is extracted what is left is the scene cost (we don't include non-render activities such as network here prlloy should.)
auto tot_scene_time = tot_frame_time_ns - tot_avatar_time - tot_huds_time - tot_ui_time - tot_sleep_time - tot_limit_time - tot_swap_time;
auto tot_avatar_time_ns = FSPerfStats::raw_to_ns( tot_avatar_time_raw );
auto tot_huds_time_ns = FSPerfStats::raw_to_ns( tot_huds_time_raw );
auto tot_sleep_time_ns = FSPerfStats::raw_to_ns( tot_sleep_time_raw );
auto tot_ui_time_ns = FSPerfStats::raw_to_ns( tot_ui_time_raw );
auto tot_idle_time_ns = FSPerfStats::raw_to_ns( tot_idle_time_raw );
auto tot_limit_time_ns = FSPerfStats::raw_to_ns( tot_limit_time_raw );
auto tot_swap_time_ns = FSPerfStats::raw_to_ns( tot_swap_time_raw );
// once the rest is extracted what is left is the scene cost
auto tot_scene_time_ns = tot_frame_time_ns - tot_avatar_time_ns - tot_huds_time_ns - tot_ui_time_ns - tot_sleep_time_ns - tot_limit_time_ns - tot_swap_time_ns - tot_idle_time_ns;
// remove time spent sleeping for fps limit or out of focus.
tot_frame_time_ns -= tot_limit_time;
tot_frame_time_ns -= tot_sleep_time;
tot_frame_time_ns -= tot_limit_time_ns;
tot_frame_time_ns -= tot_sleep_time_ns;
if(tot_frame_time_ns == 0)
{
LL_WARNS("performance") << "things went wrong, quit while we can." << LL_ENDL;
return;
}
auto pct_avatar_time = (tot_avatar_time*100)/tot_frame_time_ns;
auto pct_huds_time = (tot_huds_time*100)/tot_frame_time_ns;
auto pct_ui_time = (tot_ui_time*100)/tot_frame_time_ns;
auto pct_idle_time = (tot_idle_time*100)/tot_frame_time_ns;
auto pct_swap_time = (tot_swap_time*100)/tot_frame_time_ns;
auto pct_scene_time = (tot_scene_time*100)/tot_frame_time_ns;
auto pct_avatar_time = (tot_avatar_time_ns * 100)/tot_frame_time_ns;
auto pct_huds_time = (tot_huds_time_ns * 100)/tot_frame_time_ns;
auto pct_ui_time = (tot_ui_time_ns * 100)/tot_frame_time_ns;
auto pct_idle_time = (tot_idle_time_ns * 100)/tot_frame_time_ns;
auto pct_swap_time = (tot_swap_time_ns * 100)/tot_frame_time_ns;
auto pct_scene_time = (tot_scene_time_ns * 100)/tot_frame_time_ns;
args["AV_FRAME_PCT"] = llformat("%02u", (U32)llround(pct_avatar_time));
args["HUDS_FRAME_PCT"] = llformat("%02u", (U32)llround(pct_huds_time));
@ -217,13 +235,13 @@ void LLFloaterPerformance::draw()
getChild<LLTextBox>("frame_breakdown")->setText(getString("frame_stats", args));
auto textbox = getChild<LLTextBox>("fps_warning");
if(tot_sleep_time > 0) // We are sleeping because view is not focussed
if(tot_sleep_time_raw > 0) // We are sleeping because view is not focussed
{
textbox->setVisible(true);
textbox->setText(getString("focus_fps"));
textbox->setColor(LLUIColorTable::instance().getColor("DrYellow"));
}
else if (tot_limit_time > 0)
else if (tot_limit_time_raw > 0)
{
textbox->setVisible(true);
textbox->setText(getString("limit_fps", args));
@ -242,11 +260,11 @@ void LLFloaterPerformance::draw()
if( auto_tune )
{
auto av_render_max = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::getMax(FSPerfStats::ObjStatType::RENDER_COMBINED);
auto av_render_max_raw = FSPerfStats::StatsRecorder::getMax(AvType, FSPerfStats::StatType_t::RENDER_COMBINED);
// if( target_frame_time_ns <= tot_frame_time_ns )
// {
// U32 non_avatar_time_ns = tot_frame_time_ns - tot_avatar_time;
// U32 non_avatar_time_ns = tot_frame_time_ns - tot_avatar_time_raw;
// if( non_avatar_time_ns < target_frame_time_ns )
// {
// F32 target_avatar_time_ms {F32(target_frame_time_ns-non_avatar_time_ns)/1000000};
@ -255,33 +273,41 @@ void LLFloaterPerformance::draw()
// }
// }
// Is our target frame time lower than current? If so we need to take action to reduce draw overheads.
if( target_frame_time_ns <= tot_frame_time_ns )
{
LL_INFOS() << "AUTO_TUNE: adapting frame rate" << LL_ENDL;
U32 non_avatar_time_ns = tot_frame_time_ns - tot_avatar_time;
LL_INFOS() << "AUTO_TUNE: adapting frame rate: target_frame=" << target_frame_time_ns << " nonav_frame_time=" << non_avatar_time_ns << " headroom=" << target_frame_time_ns - non_avatar_time_ns << LL_ENDL;
U32 non_avatar_time_ns = tot_frame_time_ns - tot_avatar_time_ns;
LL_INFOS() << "AUTO_TUNE: adapting frame rate: target_frame=" << target_frame_time_ns << " nonav_frame_time=" << non_avatar_time_ns << " headroom=" << (S64)target_frame_time_ns - non_avatar_time_ns << LL_ENDL;
// If the target frame time < non avatar frame time then we can pototentially reach it.
if( non_avatar_time_ns < target_frame_time_ns )
{
U64 target_avatar_time_ns {target_frame_time_ns-non_avatar_time_ns};
LL_INFOS() << "AUTO_TUNE: avatar_budget:" << target_avatar_time_ns << LL_ENDL;
if(target_avatar_time_ns < tot_avatar_time)
if(target_avatar_time_ns < tot_avatar_time_ns)
{
F32 new_render_limit = (F32)(av_render_max-100000)/1000000;
if(new_render_limit >= max_render_cost)
F32 new_render_limit_ms = (F32)(FSPerfStats::raw_to_ms(av_render_max_raw)-0.1);
if(new_render_limit_ms >= max_render_cost)
{
// we caught a bad frame possibly with a forced refresh render.
new_render_limit = max_render_cost - 0.1;
new_render_limit_ms = max_render_cost - 0.1;
}
gSavedSettings.setF32( "RenderAvatarMaxART", new_render_limit);
LL_INFOS() << "AUTO_TUNE: avatar_budget adjusted to:" << new_render_limit << LL_ENDL;
gSavedSettings.setF32( "RenderAvatarMaxART", new_render_limit_ms);
LL_INFOS() << "AUTO_TUNE: avatar_budget adjusted to:" << new_render_limit_ms << LL_ENDL;
}
LL_INFOS() << "AUTO_TUNE: Target frame time:"<<target_frame_time_ns/1000000 << " (non_avatar is " << non_avatar_time_ns/1000000 << ") Max cost limited=" << max_render_cost << LL_ENDL;
LL_INFOS() << "AUTO_TUNE: Target frame time:"<<target_frame_time_ns/1000000 << "ms (non_avatar is " << non_avatar_time_ns/1000000 << "ms) Max cost limited=" << max_render_cost << LL_ENDL;
}
else
{
// TODO(Beq): Set advisory text for further actions
LL_INFOS() << "AUTO_TUNE: Unachievable target . Target frame time:"<<target_frame_time_ns/1000000 << "ms (non_avatar is " << non_avatar_time_ns/1000000 << "ms)" << LL_ENDL;
textbox->setColor(LLUIColorTable::instance().getColor("red"));
}
}
else
if( target_frame_time_ns > (tot_frame_time_ns + max_render_cost))
{
// if we have more space to spare let's shift up little in the hope we'll restore an avatar.
// if we have more time to spare let's shift up little in the hope we'll restore an avatar.
gSavedSettings.setF32( "RenderAvatarMaxART", max_render_cost + 0.5 );
}
}
@ -339,18 +365,20 @@ void LLFloaterPerformance::populateHUDList()
hud_complexity_list_t::iterator iter = complexity_list.begin();
hud_complexity_list_t::iterator end = complexity_list.end();
static auto freq_divisor = get_timer_info().mClockFrequencyInv;
U32 max_complexity = 0;
for (; iter != end; ++iter)
{
max_complexity = llmax(max_complexity, (*iter).objectsCost);
}
auto huds_max_render_time = FSPerfStats::RecordObjectTime<LLHUDObject*>::getMax(FSPerfStats::ObjStatType::RENDER_GEOMETRY);
auto huds_max_render_time_raw = FSPerfStats::StatsRecorder::getMax(HudType, FSPerfStats::StatType_t::RENDER_GEOMETRY);
for (iter = complexity_list.begin(); iter != end; ++iter)
{
LLHUDComplexity hud_object_complexity = *iter;
auto hud_ptr = hud_object_complexity.objectPtr;
auto hud_render_time = FSPerfStats::RecordObjectTime<const LLViewerObject*>::get(hud_ptr, FSPerfStats::ObjStatType::RENDER_GEOMETRY);
auto hud_render_time_raw = FSPerfStats::StatsRecorder::get(HudType, hud_ptr->getID(), FSPerfStats::StatType_t::RENDER_GEOMETRY);
LLSD item;
item["special_id"] = hud_object_complexity.objectId;
item["target"] = LLNameListCtrl::SPECIAL;
@ -358,15 +386,15 @@ void LLFloaterPerformance::populateHUDList()
row[0]["column"] = "complex_visual";
row[0]["type"] = "bar";
LLSD& value = row[0]["value"];
value["ratio"] = (F32)hud_render_time / huds_max_render_time;
value["ratio"] = (F32)hud_render_time_raw / huds_max_render_time_raw;
value["bottom"] = BAR_BOTTOM_PAD;
value["left_pad"] = BAR_LEFT_PAD;
value["right_pad"] = BAR_RIGHT_PAD;
row[1]["column"] = "complex_value";
row[1]["type"] = "text";
LL_INFOS() << "HUD : hud[" << hud_ptr << " time:" << hud_render_time <<" total_time:" << huds_max_render_time << LL_ENDL;
row[1]["value"] = llformat("%.2f",((double)hud_render_time / 1000000000));
LL_INFOS() << "HUD : hud[" << hud_ptr << " time:" << hud_render_time_raw <<" total_time:" << huds_max_render_time_raw << LL_ENDL;
row[1]["value"] = llformat( "%.3f",FSPerfStats::raw_to_us(hud_render_time_raw) );
row[1]["font"]["name"] = "SANSSERIF";
row[2]["column"] = "name";
@ -401,42 +429,49 @@ void LLFloaterPerformance::populateObjectList()
object_complexity_list_t::iterator iter = complexity_list.begin();
object_complexity_list_t::iterator end = complexity_list.end();
static auto freq_divisor = get_timer_info().mClockFrequencyInv;
U32 max_complexity = 0;
for (; iter != end; ++iter)
{
max_complexity = llmax(max_complexity, (*iter).objectCost);
}
auto max_render_time = FSPerfStats::RecordAttachmentTime<U32>::getMax(FSPerfStats::ObjStatType::RENDER_GEOMETRY);
auto att_max_render_time_raw = FSPerfStats::StatsRecorder::getMax(AttType, FSPerfStats::StatType_t::RENDER_COMBINED);
for (iter = complexity_list.begin(); iter != end; ++iter)
{
LLObjectComplexity object_complexity = *iter;
// S32 obj_cost_short = llmax((S32)object_complexity.objectCost / 1000, 1);
auto attach_render_time = FSPerfStats::RecordAttachmentTime<U32>::get(object_complexity.objectId.getCRC32(), FSPerfStats::ObjStatType::RENDER_GEOMETRY);
S32 obj_cost_short = llmax((S32)object_complexity.objectCost / 1000, 1);
auto attach_render_time_raw = FSPerfStats::StatsRecorder::get(AttType, object_complexity.objectId, FSPerfStats::StatType_t::RENDER_COMBINED);
LLSD item;
item["special_id"] = object_complexity.objectId;
item["target"] = LLNameListCtrl::SPECIAL;
LLSD& row = item["columns"];
row[0]["column"] = "complex_visual";
row[0]["column"] = "art_visual";
row[0]["type"] = "bar";
LLSD& value = row[0]["value"];
value["ratio"] = (F32)attach_render_time / max_render_time;
value["ratio"] = (F32)attach_render_time_raw / att_max_render_time_raw;
value["bottom"] = BAR_BOTTOM_PAD;
value["left_pad"] = BAR_LEFT_PAD;
value["right_pad"] = BAR_RIGHT_PAD;
row[1]["column"] = "complex_value";
row[1]["column"] = "art_value";
row[1]["type"] = "text";
// row[1]["value"] = std::to_string(obj_cost_short);
row[1]["value"] = llformat("%.3f",((double)attach_render_time / 1000000));
row[1]["value"] = llformat( "%.4f", FSPerfStats::raw_to_us(attach_render_time_raw) );
row[1]["font"]["name"] = "SANSSERIF";
row[2]["column"] = "name";
row[2]["column"] = "complex_value";
row[2]["type"] = "text";
row[2]["value"] = object_complexity.objectName;
row[2]["value"] = std::to_string(obj_cost_short);
row[2]["font"]["name"] = "SANSSERIF";
row[3]["column"] = "name";
row[3]["type"] = "text";
row[3]["value"] = object_complexity.objectName;
row[3]["font"]["name"] = "SANSSERIF";
LLScrollListItem* obj = mObjectList->addElement(item);
if (obj)
{
@ -467,7 +502,7 @@ void LLFloaterPerformance::populateNearbyList()
getNearbyAvatars(valid_nearby_avs);
std::vector<LLCharacter*>::iterator char_iter = valid_nearby_avs.begin();
auto render_max = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::getMax(FSPerfStats::ObjStatType::RENDER_COMBINED);
auto av_render_max_raw = FSPerfStats::StatsRecorder::getMax(AvType, FSPerfStats::StatType_t::RENDER_COMBINED);
while (char_iter != valid_nearby_avs.end())
{
LLVOAvatar* avatar = dynamic_cast<LLVOAvatar*>(*char_iter);
@ -477,40 +512,46 @@ void LLFloaterPerformance::populateNearbyList()
if(overall_appearance == LLVOAvatar::AOA_INVISIBLE)
continue;
// S32 complexity_short = llmax((S32)avatar->getVisualComplexity() / 1000, 1);
auto render_av = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::get(avatar,FSPerfStats::ObjStatType::RENDER_COMBINED);
S32 complexity_short = llmax((S32)avatar->getVisualComplexity() / 1000, 1);
auto render_av_raw = FSPerfStats::StatsRecorder::get(AvType, avatar->getID(),FSPerfStats::StatType_t::RENDER_COMBINED);
auto is_slow = avatar->isTooSlow(true);
// auto is_slow_without_shadows = avatar->isTooSlow();
LLSD item;
item["id"] = avatar->getID();
LLSD& row = item["columns"];
row[0]["column"] = "complex_visual";
row[0]["column"] = "art_visual";
row[0]["type"] = "bar";
LLSD& value = row[0]["value"];
value["ratio"] = (double)render_av / render_max;
value["ratio"] = (double)render_av_raw / av_render_max_raw;
value["bottom"] = BAR_BOTTOM_PAD;
value["left_pad"] = BAR_LEFT_PAD;
value["right_pad"] = BAR_RIGHT_PAD;
row[1]["column"] = "complex_value";
row[1]["column"] = "art_value";
row[1]["type"] = "text";
if(is_slow)
{
row[1]["value"] = llformat("%.2f", ((double)avatar->getLastART() / 1000000));
row[1]["value"] = llformat( "%.2f", FSPerfStats::raw_to_ms( avatar->getLastART() ) );
}
else
{
row[1]["value"] = llformat("%.2f",((double)render_av / 1000000));
row[1]["value"] = llformat( "%.2f", FSPerfStats::raw_to_ms( render_av_raw ) );
}
row[1]["font"]["name"] = "SANSSERIF";
row[1]["width"] = "50";
row[2]["column"] = "name";
row[2]["column"] = "complex_value";
row[2]["type"] = "text";
row[2]["value"] = avatar->getFullname();
row[2]["value"] = std::to_string(complexity_short);
row[2]["font"]["name"] = "SANSSERIF";
row[2]["width"] = "50";
row[3]["column"] = "name";
row[3]["type"] = "text";
row[3]["value"] = avatar->getFullname();
row[3]["font"]["name"] = "SANSSERIF";
LLScrollListItem* av_item = mNearbyList->addElement(item);
if(av_item)

View File

@ -1128,6 +1128,7 @@ public:
virtual S32 frustumCheck(const LLViewerOctreeGroup* group)
{
FSZone;
S32 res = AABBInFrustumNoFarClipGroupBounds(group);
if (res != 0)
{
@ -1138,6 +1139,7 @@ public:
virtual S32 frustumCheckObjects(const LLViewerOctreeGroup* group)
{
FSZone;
S32 res = AABBInFrustumNoFarClipObjectBounds(group);
if (res != 0)
{
@ -1148,6 +1150,7 @@ public:
virtual void processGroup(LLViewerOctreeGroup* base_group)
{
FSZone;
LLSpatialGroup* group = (LLSpatialGroup*)base_group;
if (group->needsUpdate() ||
group->getVisible(LLViewerCamera::sCurCameraID) < LLDrawable::getCurrentFrame() - 1)

View File

@ -1193,7 +1193,7 @@ void display(BOOL rebuild, F32 zoom_factor, int subfield, BOOL for_snapshot)
void render_hud_attachments()
{
FSPerfStats::RecordSceneTime T (FSPerfStats::SceneStatType::RENDER_HUDS);
FSPerfStats::RecordSceneTime T (LLUUID{}, FSPerfStats::StatType_t::RENDER_HUDS);
gGL.matrixMode(LLRender::MM_PROJECTION);
gGL.pushMatrix();
gGL.matrixMode(LLRender::MM_MODELVIEW);
@ -1401,7 +1401,7 @@ bool setup_hud_matrices(const LLRect& screen_region)
void render_ui(F32 zoom_factor, int subfield)
{
FSPerfStats::RecordSceneTime T (FSPerfStats::SceneStatType::RENDER_UI);
FSPerfStats::RecordSceneTime T (const LLUUID{}, FSPerfStats::StatType_t::RENDER_UI);
LL_RECORD_BLOCK_TIME(FTM_RENDER_UI);
LLGLState::checkStates();
@ -1487,7 +1487,7 @@ static LLTrace::BlockTimerStatHandle FTM_SWAP("Swap");
void swap()
{
FSPerfStats::RecordSceneTime T (FSPerfStats::SceneStatType::RENDER_SWAP);
FSPerfStats::RecordSceneTime T (const LLUUID{}, FSPerfStats::StatType_t::RENDER_SWAP);
LL_RECORD_BLOCK_TIME(FTM_SWAP);
if (gDisplaySwapBuffers)

View File

@ -233,20 +233,12 @@ U32 LLViewerJointMesh::drawShape( F32 pixelArea, BOOL first_pass, BOOL is_dummy)
}
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
auto vobj = mFace->getViewerObject();
if(vobj && !vobj->asAvatar() && vobj->getAvatar()->isSelf())
if( vobj && vobj->isAttachment() )
{
LLViewerObject * vtop = vobj;
LLViewerObject * par = (LLViewerObject *) vobj->getParent();
while (par && !(par->asAvatar()))
{
vtop = par;
par = (LLViewerObject *)vtop->getParent();
}
vobj = vtop;
T = trackMyAttachment(vobj);
}
FSPerfStats::RecordAttachmentTime<U32> T(vobj?vobj->getAttachmentItemID().getCRC32():0, FSPerfStats::ObjStatType::RENDER_GEOMETRY);
U32 triangle_count = 0;

View File

@ -1361,6 +1361,7 @@ bool LLViewerOctreeCull::earlyFail(LLViewerOctreeGroup* group)
//virtual
void LLViewerOctreeCull::traverse(const OctreeNode* n)
{
FSZone;
LLViewerOctreeGroup* group = (LLViewerOctreeGroup*) n->getListener(0);
if (earlyFail(group))
@ -1371,14 +1372,17 @@ void LLViewerOctreeCull::traverse(const OctreeNode* n)
if (mRes == 2 ||
(mRes && group->hasState(LLViewerOctreeGroup::SKIP_FRUSTUM_CHECK)))
{ //fully in, just add everything
FSZoneN("AllInside");
OctreeTraveler::traverse(n);
}
else
{
FSZoneN("Check inside?")
mRes = frustumCheck(group);
if (mRes)
{ //at least partially in, run on down
FSZoneN("PartiallyIn");
OctreeTraveler::traverse(n);
}

View File

@ -9157,38 +9157,38 @@ bool LLVOAvatar::isTooSlow(bool combined) const
}
// Either we're not stale or we've updated.
U64 render_time;
U64 render_geom_time;
U64 render_time_raw;
U64 render_geom_time_raw;
if(!mARTCapped)
{
// no cap, so we use the live values
render_time = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::get(this,FSPerfStats::ObjStatType::RENDER_COMBINED);
render_geom_time = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::get(this,FSPerfStats::ObjStatType::RENDER_GEOMETRY);
render_time_raw = FSPerfStats::StatsRecorder::get(FSPerfStats::ObjType_t::OT_AVATAR, this->getID(), FSPerfStats::StatType_t::RENDER_COMBINED);
render_geom_time_raw = FSPerfStats::StatsRecorder::get(FSPerfStats::ObjType_t::OT_AVATAR, this->getID(), FSPerfStats::StatType_t::RENDER_GEOMETRY);
}
else
{
// use the cached values.
render_time = mRenderTime;
render_geom_time = mGeomTime;
render_time_raw = mRenderTime;
render_geom_time_raw = mGeomTime;
}
if( (LLVOAvatar::sRenderTimeCap_ns > 0) && (render_time >= LLVOAvatar::sRenderTimeCap_ns) )
if( (LLVOAvatar::sRenderTimeCap_ns > 0) && (FSPerfStats::raw_to_ns(render_time_raw) >= LLVOAvatar::sRenderTimeCap_ns) )
{
if(!mARTCapped)
{
// if we weren't capped, we are now
abuse_constness->mRenderTime = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::get(this,FSPerfStats::ObjStatType::RENDER_COMBINED);
abuse_constness->mGeomTime = FSPerfStats::RecordObjectTime<const LLVOAvatar*>::get(this,FSPerfStats::ObjStatType::RENDER_GEOMETRY);
abuse_constness->mRenderTime = render_time_raw;
abuse_constness->mGeomTime = render_geom_time_raw;
abuse_constness->mARTStale = false;
abuse_constness->mARTCapped = true;
abuse_constness->mLastARTUpdateFrame = LLFrameTimer::getFrameCount();
LL_INFOS() << this->getFullname() << " ("<< (combined?"combined":"geometry") << ") mLastART too high = " << render_time << " vs ("<< LLVOAvatar::sRenderTimeCap_ns << " set @ " << mLastARTUpdateFrame << LL_ENDL;
LL_INFOS() << this->getFullname() << " ("<< (combined?"combined":"geometry") << ") mLastART too high = " << FSPerfStats::raw_to_ns(render_time_raw) << " vs ("<< LLVOAvatar::sRenderTimeCap_ns << " set @ " << mLastARTUpdateFrame << LL_ENDL;
}
// return true only if that is the case in the context of the combined/geom_only flag.
return combined ? true : (render_geom_time >= LLVOAvatar::sRenderTimeCap_ns);
return combined ? true : (render_geom_time_raw >= LLVOAvatar::sRenderTimeCap_ns);
}
LL_INFOS() << this->getFullname() << " ("<< (combined?"combined":"geometry") << ") good render time = " << render_time << " vs ("<< LLVOAvatar::sRenderTimeCap_ns << " set @ " << mLastARTUpdateFrame << LL_ENDL;
LL_INFOS() << this->getFullname() << " ("<< (combined?"combined":"geometry") << ") good render time = " << FSPerfStats::raw_to_ns(render_time_raw) << " vs ("<< LLVOAvatar::sRenderTimeCap_ns << " set @ " << mLastARTUpdateFrame << LL_ENDL;
abuse_constness->mARTCapped = false;
return false;
}

View File

@ -5546,7 +5546,7 @@ void LLVolumeGeometryManager::registerFace(LLSpatialGroup* group, LLFace* facep,
}
}
if (type == LLRenderPass::PASS_ALPHA)
// if (type == LLRenderPass::PASS_ALPHA) // <FS:Beq> allow tracking through pipeline
{ //for alpha sorting
facep->setDrawInfo(draw_info);
}
@ -5784,6 +5784,14 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
continue;
}
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
// <FS:Beq> Capture render times
if(vobj->isAttachment())
{
T= trackMyAttachment(vobj);
}
// </FS:Beq>
//<FS:Beq> Stop doing stupid stuff we don;t need to.
// Moving this inside a debug enabled check
// std::string vobj_name = llformat("Vol%p", vobj);
@ -6382,20 +6390,14 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
if (drawablep && !drawablep->isDead() && drawablep->isState(LLDrawable::REBUILD_ALL) && !drawablep->isState(LLDrawable::RIGGED) )
{
FSZoneN("Rebuild all non-Rigged")
LLVOVolume* vobj = drawablep->getVOVolume();
LLVOVolume* rootAtt{};
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
if(vobj->isAttachment())
{
auto par = (LLVOVolume*)vobj->getParent();
rootAtt = vobj;
while( par->isAttachment() )
{
rootAtt = par;
par = (LLVOVolume*)par->getParent();
}
LL_INFOS() << "recording time for ATT@" << rootAtt << " " << (rootAtt?rootAtt->getAttachmentItemName():"null") << LL_ENDL;
T = trackMyAttachment(vobj);
}
FSPerfStats::RecordAttachmentTime<U32> T(rootAtt?rootAtt->getAttachmentItemID().getCRC32():0, FSPerfStats::ObjStatType::RENDER_GEOMETRY);
//<FS:Beq> avoid unfortunate sleep during trylock by static check
//if(debugLoggingEnabled("AnimatedObjectsLinkset"))
static auto debug_logging_on = debugLoggingEnabled("AnimatedObjectsLinkset");
@ -6808,10 +6810,18 @@ U32 LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace
U32 indices_index = 0;
U16 index_offset = 0;
std::unique_ptr<FSPerfStats::RecordAttachmentTime> T{};
LLViewerObject * lastVObj{nullptr};
while (face_iter < i)
{
//update face indices for new buffer
facep = *face_iter;
LLViewerObject* vobj = facep->getViewerObject();
if(vobj && vobj != lastVObj && vobj->isAttachment())
{
T = trackMyAttachment(vobj);
lastVObj = vobj;
}
if (buffer.isNull())
{
// Bulk allocation failed
@ -7027,8 +7037,12 @@ U32 LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace
else if (is_alpha)
{
// can we safely treat this as an alpha mask?
if (facep->getFaceColor().mV[3] <= 0.f)
// <FS:Beq> Nothing actually sets facecolor use the TE alpha instead.
// if (facep->getFaceColor().mV[3] <= 0.f)
if (te->getAlpha() <=0.f || facep->getFaceColor().mV[3] <= 0.f)
// </FS:Beq>
{ //100% transparent, don't render unless we're highlighting transparent
FSZoneN("facep->alpha -> invisible");
registerFace(group, facep, LLRenderPass::PASS_ALPHA_INVISIBLE);
}
else if (facep->canRenderAsMask())

View File

@ -3549,6 +3549,8 @@ void LLPipeline::stateSort(LLCamera& camera, LLCullResult &result)
//LLVertexBuffer::unbind();
grabReferences(result);
{
FSZoneN("checkOcclusionAndRebuildMesh");
for (LLCullResult::sg_iterator iter = sCull->beginDrawableGroups(); iter != sCull->endDrawableGroups(); ++iter)
{
LLSpatialGroup* group = *iter;
@ -3572,9 +3574,11 @@ void LLPipeline::stateSort(LLCamera& camera, LLCullResult &result)
}
}
}
}
if (LLViewerCamera::sCurCameraID == LLViewerCamera::CAMERA_WORLD)
{
FSZoneN("WorldCamera");
LLSpatialGroup* last_group = NULL;
BOOL fov_changed = LLViewerCamera::getInstance()->isDefaultFOVChanged();
for (LLCullResult::bridge_iterator i = sCull->beginVisibleBridge(); i != sCull->endVisibleBridge(); ++i)
@ -3608,7 +3612,8 @@ void LLPipeline::stateSort(LLCamera& camera, LLCullResult &result)
last_group->mLastUpdateDistance = last_group->mDistance;
}
}
{
FSZoneN("StateSort: visible groups");
for (LLCullResult::sg_iterator iter = sCull->beginVisibleGroups(); iter != sCull->endVisibleGroups(); ++iter)
{
LLSpatialGroup* group = *iter;
@ -3627,7 +3632,7 @@ void LLPipeline::stateSort(LLCamera& camera, LLCullResult &result)
group->rebuildMesh();
}
}
}
}}
{
LL_RECORD_BLOCK_TIME(FTM_STATESORT_DRAWABLE);
@ -3983,6 +3988,8 @@ void LLPipeline::postSort(LLCamera& camera)
LL_PUSH_CALLSTACKS();
//rebuild drawable geometry
{
FSZoneN("PostSort: rebuildGeom")
for (LLCullResult::sg_iterator i = sCull->beginDrawableGroups(); i != sCull->endDrawableGroups(); ++i)
{
LLSpatialGroup* group = *i;
@ -4001,6 +4008,8 @@ void LLPipeline::postSort(LLCamera& camera)
//build render map
{
FSZoneN("build render map");
for (LLCullResult::sg_iterator i = sCull->beginVisibleGroups(); i != sCull->endVisibleGroups(); ++i)
{
LLSpatialGroup* group = *i;
@ -4046,6 +4055,7 @@ void LLPipeline::postSort(LLCamera& camera)
if (hasRenderType(LLPipeline::RENDER_TYPE_PASS_ALPHA))
{
FSZone("Collect Alpha groups");
LLSpatialGroup::draw_map_t::iterator alpha = group->mDrawMap.find(LLRenderPass::PASS_ALPHA);
if (alpha != group->mDrawMap.end())
@ -4071,6 +4081,7 @@ void LLPipeline::postSort(LLCamera& camera)
}
}
}
}
//flush particle VB
if (LLVOPartGroup::sVB)
@ -4096,12 +4107,14 @@ void LLPipeline::postSort(LLCamera& camera)
glBeginQueryARB(GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN, mMeshDirtyQueryObject);
}*/
{
FSZoneN("rebuild delayed upd groups") }
//pack vertex buffers for groups that chose to delay their updates
for (LLSpatialGroup::sg_vector_t::iterator iter = mMeshDirtyGroup.begin(); iter != mMeshDirtyGroup.end(); ++iter)
{
(*iter)->rebuildMesh();
}
}
/*if (use_transform_feedback)
{
@ -4110,12 +4123,17 @@ void LLPipeline::postSort(LLCamera& camera)
mMeshDirtyGroup.clear();
{
FSZoneN("sort alpha groups")
if (!sShadowRender)
{
std::sort(sCull->beginAlphaGroups(), sCull->endAlphaGroups(), LLSpatialGroup::CompareDepthGreater());
}
}
LL_PUSH_CALLSTACKS();
{
FSZoneN("beacon rendering flags");
// only render if the flag is set. The flag is only set if we are in edit mode or the toggle is set in the menus
// Ansariel: Make beacons also show when beacons floater is closed.
if (/*LLFloaterReg::instanceVisible("beacons") &&*/ !sShadowRender)
@ -4169,6 +4187,7 @@ void LLPipeline::postSort(LLCamera& camera)
forAllVisibleDrawables(renderSoundHighlights);
}
}
}
LL_PUSH_CALLSTACKS();
// If managing your telehub, draw beacons at telehub and currently selected spawnpoint.
if (LLFloaterTelehub::renderBeacons())
@ -4178,6 +4197,7 @@ void LLPipeline::postSort(LLCamera& camera)
if (!sShadowRender)
{
FSZoneN("Render face highlights");
mSelectedFaces.clear();
LLPipeline::setRenderHighlightTextureChannel(gFloaterTools->getPanelFace()->getTextureChannelToEdit());

View File

@ -91,8 +91,12 @@
width="540">
<name_list.columns
label=""
name="complex_visual"
name="art_visual"
width="90" />
<name_list.columns
label=""
name="art_value"
width="80" />
<name_list.columns
label=""
name="complex_value"

View File

@ -84,10 +84,9 @@
<name_list.columns
label=""
name="complex_value"
width="40" />
width="80" />
<name_list.columns
label=""
name="name"/>
</name_list>
</panel>

View File

@ -148,12 +148,16 @@
width="540">
<name_list.columns
label=""
name="complex_visual"
name="art_visual"
width="90" />
<name_list.columns
label=""
name="art_value"
width="80" />
<name_list.columns
label=""
name="complex_value"
width="50" />
width="40" />
<name_list.columns
label=""
name="name"/>