Fix Tracy memory profiling overloads for aligned allocations

Fix disabling renderdoc support
Improve ll_aligned_alloc functions on darwin for 32 and 64byte aligned by utilizing posix_memalign
master
Rye 2025-02-02 02:43:46 -05:00
parent 51ed6b5424
commit 6fcd349f37
20 changed files with 168 additions and 126 deletions

View File

@ -2433,11 +2433,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>archive</key>
<map>
<key>hash</key>
<string>226225ec049826c35adc5e897e0398ed64d4bedb</string>
<string>0c3d01b7e9e39c23f0f40c56a1a04d1fba08ead0</string>
<key>hash_algorithm</key>
<string>sha1</string>
<key>url</key>
<string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-darwin64-10376230034.tar.zst</string>
<string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-darwin64-11706699176.tar.zst</string>
</map>
<key>name</key>
<string>darwin64</string>
@ -2447,11 +2447,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>archive</key>
<map>
<key>hash</key>
<string>8c5429d1a1486f40cf7e5e88a232222d1fa4f78e</string>
<string>b46cef5646a8d0471ab6256fe5119220fa238772</string>
<key>hash_algorithm</key>
<string>sha1</string>
<key>url</key>
<string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-windows64-10376230034.tar.zst</string>
<string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-windows64-11706699176.tar.zst</string>
</map>
<key>name</key>
<string>windows64</string>
@ -2461,11 +2461,11 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>archive</key>
<map>
<key>hash</key>
<string>ed0664a009aba1dcf1246d845839f524e857162e</string>
<string>beab04c9ea6036b1851a485b65c66cf6a38f0be4</string>
<key>hash_algorithm</key>
<string>sha1</string>
<key>url</key>
<string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.0%2Br1/tracy-v0.11.0.10376230034-linux64-10376230034.tar.zst</string>
<string>https://github.com/secondlife/3p-tracy/releases/download/v0.11.1-r1/tracy-v0.11.1.11706699176-linux64-11706699176.tar.zst</string>
</map>
<key>name</key>
<string>linux64</string>
@ -2478,7 +2478,7 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
<key>copyright</key>
<string>Copyright (c) 2017-2024, Bartosz Taudul (wolf@nereid.pl)</string>
<key>version</key>
<string>v0.11.0.10376230034</string>
<string>v0.11.1.11706699176</string>
<key>name</key>
<string>tracy</string>
<key>canonical_repo</key>

View File

@ -55,6 +55,7 @@ set(cmake_SOURCE_FILES
TemplateCheck.cmake
TinyEXR.cmake
TinyGLTF.cmake
Tracy.cmake
Tut.cmake
UI.cmake
UnixInstall.cmake

View File

@ -15,6 +15,7 @@ endif()
if (USE_TRACY)
option(USE_TRACY_ON_DEMAND "Use on-demand Tracy profiling." ON)
option(USE_TRACY_LOCAL_ONLY "Disallow remote Tracy profiling." OFF)
option(USE_TRACY_GPU "Use Tracy GPU profiling" OFF)
use_system_binary(tracy)
use_prebuilt_binary(tracy)
@ -31,9 +32,8 @@ if (USE_TRACY)
target_compile_definitions(ll::tracy INTERFACE -DTRACY_NO_BROADCAST=1 -DTRACY_ONLY_LOCALHOST=1)
endif ()
# GHA runners don't always provide invariant TSC support, but always build with LL_TESTS enabled
if (DARWIN AND LL_TESTS)
target_compile_definitions(ll::tracy INTERFACE -DTRACY_TIMER_FALLBACK=1)
if (USE_TRACY_GPU AND NOT DARWIN) # Tracy OpenGL mode is incompatible with macOS/iOS
target_compile_definitions(ll::tracy INTERFACE -DLL_PROFILER_ENABLE_TRACY_OPENGL=1)
endif ()
# See: indra/llcommon/llprofiler.h

View File

@ -28,12 +28,6 @@
#define LL_LINDEN_COMMON_H
#include "llprofiler.h"
#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL) // hooks for memory profiling
void *tracy_aligned_malloc(size_t size, size_t alignment);
void tracy_aligned_free(void *memblock);
#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y))
#define _aligned_free(X) tracy_aligned_free((X))
#endif
// *NOTE: Please keep includes here to a minimum!
//

View File

@ -33,23 +33,12 @@
#include "lltracethreadrecorder.h"
#include "llcleanup.h"
thread_local bool gProfilerEnabled = false;
#if (TRACY_ENABLE)
#if LL_PROFILER_CONFIGURATION >= LL_PROFILER_CONFIG_TRACY && TRACY_ENABLE
// Override new/delete for tracy memory profiling
void* ll_tracy_new(size_t size)
{
void* ptr;
if (gProfilerEnabled)
{
//LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
ptr = (malloc)(size);
}
else
{
ptr = (malloc)(size);
}
void* ptr = (malloc)(size);
if (!ptr)
{
throw std::bad_alloc();
@ -58,6 +47,29 @@ void* ll_tracy_new(size_t size)
return ptr;
}
void* ll_tracy_aligned_new(size_t size, size_t alignment)
{
void* ptr = ll_aligned_malloc_fallback(size, alignment);
if (!ptr)
{
throw std::bad_alloc();
}
LL_PROFILE_ALLOC(ptr, size);
return ptr;
}
void ll_tracy_delete(void* ptr)
{
LL_PROFILE_FREE(ptr);
(free)(ptr);
}
void ll_tracy_aligned_delete(void* ptr)
{
LL_PROFILE_FREE(ptr);
ll_aligned_free_fallback(ptr);
}
void* operator new(size_t size)
{
return ll_tracy_new(size);
@ -68,18 +80,14 @@ void* operator new[](std::size_t count)
return ll_tracy_new(count);
}
void ll_tracy_delete(void* ptr)
void* operator new(size_t size, std::align_val_t align)
{
LL_PROFILE_FREE(ptr);
if (gProfilerEnabled)
{
//LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
(free)(ptr);
}
else
{
(free)(ptr);
}
return ll_tracy_aligned_new(size, (size_t)align);
}
void* operator new[](std::size_t count, std::align_val_t align)
{
return ll_tracy_aligned_new(count, (size_t)align);
}
void operator delete(void *ptr) noexcept
@ -92,27 +100,17 @@ void operator delete[](void* ptr) noexcept
ll_tracy_delete(ptr);
}
// C-style malloc/free can't be so easily overridden, so we define tracy versions and use
// a pre-processor #define in linden_common.h to redirect to them. The parens around the native
// functions below prevents recursive substitution by the preprocessor.
//
// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at
// you, Havok), so we'll only capture the aligned version.
void *tracy_aligned_malloc(size_t size, size_t alignment)
void operator delete(void *ptr, std::align_val_t align) noexcept
{
auto ptr = ll_aligned_malloc_fallback(size, alignment);
if (ptr) LL_PROFILE_ALLOC(ptr, size);
return ptr;
ll_tracy_aligned_delete(ptr);
}
void tracy_aligned_free(void *memblock)
void operator delete[](void* ptr, std::align_val_t align) noexcept
{
LL_PROFILE_FREE(memblock);
ll_aligned_free_fallback(memblock);
ll_tracy_aligned_delete(ptr);
}
#endif
#endif // TRACY_ENABLE && !LL_PROFILER_ENABLE_TRACY_OPENGL
//static
bool LLCommon::sAprInitialized = false;

View File

@ -231,8 +231,6 @@ inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed wi
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
#if defined(LL_WINDOWS)
void* ret = _aligned_malloc(size, 32);
#elif defined(LL_DARWIN)
void* ret = ll_aligned_malloc_fallback( size, 32 );
#else
void *ret;
if (0 != posix_memalign(&ret, 32, size))
@ -248,8 +246,31 @@ inline void ll_aligned_free_32(void *p)
LL_PROFILE_FREE(p);
#if defined(LL_WINDOWS)
_aligned_free(p);
#elif defined(LL_DARWIN)
ll_aligned_free_fallback( p );
#else
free(p); // posix_memalign() is compatible with heap deallocator
#endif
}
inline void* ll_aligned_malloc_64(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
#if defined(LL_WINDOWS)
void* ret = _aligned_malloc(size, 64);
#else
void *ret;
if (0 != posix_memalign(&ret, 64, size))
return nullptr;
#endif
LL_PROFILE_ALLOC(ret, size);
return ret;
}
inline void ll_aligned_free_64(void *p)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
LL_PROFILE_FREE(p);
#if defined(LL_WINDOWS)
_aligned_free(p);
#else
free(p); // posix_memalign() is compatible with heap deallocator
#endif
@ -261,19 +282,23 @@ LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
void* ret;
if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
if constexpr (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
{
ret = malloc(size);
LL_PROFILE_ALLOC(ret, size);
}
else if (ALIGNMENT == 16)
else if constexpr (ALIGNMENT == 16)
{
ret = ll_aligned_malloc_16(size);
}
else if (ALIGNMENT == 32)
else if constexpr (ALIGNMENT == 32)
{
ret = ll_aligned_malloc_32(size);
}
else if constexpr (ALIGNMENT == 64)
{
ret = ll_aligned_malloc_64(size);
}
else
{
ret = ll_aligned_malloc_fallback(size, ALIGNMENT);
@ -285,16 +310,20 @@ template<size_t ALIGNMENT>
LL_FORCE_INLINE void ll_aligned_free(void* ptr)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
if constexpr (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
{
LL_PROFILE_FREE(ptr);
free(ptr);
}
else if (ALIGNMENT == 16)
else if constexpr (ALIGNMENT == 16)
{
ll_aligned_free_16(ptr);
}
else if (ALIGNMENT == 32)
else if constexpr (ALIGNMENT == 32)
{
return ll_aligned_free_32(ptr);
}
else if constexpr (ALIGNMENT == 64)
{
return ll_aligned_free_32(ptr);
}

View File

@ -74,23 +74,18 @@
#define LL_PROFILER_CONFIGURATION LL_PROFILER_CONFIG_FAST_TIMER
#endif
extern thread_local bool gProfilerEnabled;
#if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE)
#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
#include "tracy/Tracy.hpp"
// Enable OpenGL profiling
#define LL_PROFILER_ENABLE_TRACY_OPENGL 0
// Enable RenderDoc labeling
#define LL_PROFILER_ENABLE_RENDER_DOC 0
//#define LL_PROFILER_ENABLE_RENDER_DOC 0
#endif
#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY
#define LL_PROFILER_FRAME_END FrameMark
#define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true;
#define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name );
#define LL_RECORD_BLOCK_TIME(name) ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true );
#define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, name, true );
#define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
@ -133,7 +128,7 @@ extern thread_local bool gProfilerEnabled;
#endif
#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
#define LL_PROFILER_FRAME_END FrameMark
#define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name ); gProfilerEnabled = true;
#define LL_PROFILER_SET_THREAD_NAME( name ) tracy::SetThreadName( name );
#define LL_RECORD_BLOCK_TIME(name) ZoneNamedN(___tracy_scoped_zone, #name, true); const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
#define LL_PROFILE_ZONE_NAMED(name) ZoneNamedN( ___tracy_scoped_zone, #name, true );
#define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
@ -158,23 +153,20 @@ extern thread_local bool gProfilerEnabled;
#endif // LL_PROFILER
#if LL_PROFILER_ENABLE_TRACY_OPENGL
#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name)
#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color)
#define LL_PROFILE_GPU_ZONE(name) TracyGpuZone(name)
#define LL_PROFILE_GPU_ZONEC(name,color) TracyGpuZoneC(name,color)
#define LL_PROFILER_GPU_COLLECT TracyGpuCollect
#define LL_PROFILER_GPU_CONTEXT TracyGpuContext
// disable memory tracking (incompatible with GPU tracing
#define LL_PROFILE_ALLOC(ptr, size) (void)(ptr); (void)(size);
#define LL_PROFILE_FREE(ptr) (void)(ptr);
#define LL_PROFILER_GPU_CONTEXT_NAMED TracyGpuContextName
#else
#define LL_PROFILE_GPU_ZONE(name) (void)name;
#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color;
#define LL_PROFILE_GPU_ZONE(name) (void)name;
#define LL_PROFILE_GPU_ZONEC(name,color) (void)name;(void)color;
#define LL_PROFILER_GPU_COLLECT
#define LL_PROFILER_GPU_CONTEXT
#define LL_PROFILER_GPU_CONTEXT_NAMED(name) (void)name;
#endif // LL_PROFILER_ENABLE_TRACY_OPENGL
#define LL_LABEL_OBJECT_GL(type, name, length, label)
#if !LL_DARWIN && LL_PROFILER_CONFIGURATION > 1
#if LL_PROFILER_CONFIGURATION >= LL_PROFILER_CONFIG_TRACY
#define LL_PROFILE_ALLOC(ptr, size) TracyAlloc(ptr, size)
#define LL_PROFILE_FREE(ptr) TracyFree(ptr)
#else
@ -182,8 +174,6 @@ extern thread_local bool gProfilerEnabled;
#define LL_PROFILE_FREE(ptr) (void)(ptr);
#endif
#endif
#if LL_PROFILER_ENABLE_RENDER_DOC
#define LL_LABEL_OBJECT_GL(type, name, length, label) glObjectLabel(type, name, length, label)
#else

View File

@ -543,7 +543,7 @@ bool LLGLSLShader::createShader()
}
}
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
setLabel(mName.c_str());
#endif
@ -2061,7 +2061,7 @@ LLUUID LLGLSLShader::hash()
return hash_obj.digest();
}
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
void LLGLSLShader::setLabel(const char* label) {
LL_LABEL_OBJECT_GL(GL_PROGRAM, mProgramObject, strlen(label), label);
}

View File

@ -360,7 +360,7 @@ public:
// hacky flag used for optimization in LLDrawPoolAlpha
bool mCanBindFast = false;
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
void setLabel(const char* label);
#endif
@ -380,7 +380,7 @@ extern LLGLSLShader gSolidColorProgram;
//Alpha mask shader (declared here so llappearance can access properly)
extern LLGLSLShader gAlphaMaskProgram;
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
#define LL_SET_SHADER_LABEL(shader) shader.setLabel(#shader)
#else
#define LL_SET_SHADER_LABEL(shader, label)

View File

@ -885,7 +885,7 @@ bool LLVertexBuffer::validateRange(U32 start, U32 end, U32 count, U32 indices_of
return true;
}
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
void LLVertexBuffer::setLabel(const char* label) {
LL_LABEL_OBJECT_GL(GL_BUFFER, mGLBuffer, strlen(label), label);
}

View File

@ -279,7 +279,7 @@ public:
//for debugging, validate data in given range is valid
bool validateRange(U32 start, U32 end, U32 count, U32 offset) const;
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
void setLabel(const char* label);
#endif
@ -340,7 +340,7 @@ public:
static U32 sVertexCount;
};
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
#define LL_LABEL_VERTEX_BUFFER(buf, name) buf->setLabel(name)
#else
#define LL_LABEL_VERTEX_BUFFER(buf, name)

View File

@ -1666,6 +1666,11 @@ const S32 max_format = (S32)num_formats - 1;
return false;
}
// Setup Tracy gpu context
{
LL_PROFILER_GPU_CONTEXT;
}
// Disable vertical sync for swap
toggleVSync(enable_vsync);
@ -1697,8 +1702,6 @@ const S32 max_format = (S32)num_formats - 1;
swapBuffers();
}
LL_PROFILER_GPU_CONTEXT;
return true;
}

View File

@ -1324,6 +1324,7 @@ bool LLAppViewer::frame()
bool LLAppViewer::doFrame()
{
LL_RECORD_BLOCK_TIME(FTM_FRAME);
LL_PROFILE_GPU_ZONE("Frame");
{
// and now adjust the visuals from previous frame.
if(LLPerfStats::tunables.userAutoTuneEnabled && LLPerfStats::tunables.tuningFlag != LLPerfStats::Tunables::Nothing)
@ -1413,24 +1414,26 @@ bool LLAppViewer::doFrame()
if (!LLApp::isExiting())
{
LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard");
pingMainloopTimeout("Main:JoystickKeyboard");
// Scan keyboard for movement keys. Command keys and typing
// are handled by windows callbacks. Don't do this until we're
// done initializing. JC
if (gViewerWindow
&& (gHeadlessClient || gViewerWindow->getWindow()->getVisible())
&& gViewerWindow->getActive()
&& !gViewerWindow->getWindow()->getMinimized()
&& LLStartUp::getStartupState() == STATE_STARTED
&& (gHeadlessClient || !gViewerWindow->getShowProgress())
&& !gFocusMgr.focusLocked())
{
LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_IDLE);
joystick->scanJoystick();
gKeyboard->scanKeyboard();
gViewerInput.scanMouse();
LL_PROFILE_ZONE_NAMED_CATEGORY_APP("df JoystickKeyboard");
pingMainloopTimeout("Main:JoystickKeyboard");
// Scan keyboard for movement keys. Command keys and typing
// are handled by windows callbacks. Don't do this until we're
// done initializing. JC
if (gViewerWindow
&& (gHeadlessClient || gViewerWindow->getWindow()->getVisible())
&& gViewerWindow->getActive()
&& !gViewerWindow->getWindow()->getMinimized()
&& LLStartUp::getStartupState() == STATE_STARTED
&& (gHeadlessClient || !gViewerWindow->getShowProgress())
&& !gFocusMgr.focusLocked())
{
LLPerfStats::RecordSceneTime T(LLPerfStats::StatType_t::RENDER_IDLE);
joystick->scanJoystick();
gKeyboard->scanKeyboard();
gViewerInput.scanMouse();
}
}
// Update state based on messages, user input, object idle.

View File

@ -73,6 +73,11 @@ static void exceptionTerminateHandler()
int main( int argc, char **argv )
{
// Call Tracy first thing to have it allocate memory
// https://github.com/wolfpld/tracy/issues/196
LL_PROFILER_FRAME_END;
LL_PROFILER_SET_THREAD_NAME("App");
gArgC = argc;
gArgV = argv;

View File

@ -231,6 +231,11 @@ void infos(const std::string& message)
int main( int argc, char **argv )
{
// Call Tracy first thing to have it allocate memory
// https://github.com/wolfpld/tracy/issues/196
LL_PROFILER_FRAME_END;
LL_PROFILER_SET_THREAD_NAME("App");
// Store off the command line args for use later.
gArgC = argc;
gArgV = argv;

View File

@ -204,7 +204,7 @@ public:
NUM_RENDER_TYPES,
};
#ifdef LL_PROFILER_ENABLE_RENDER_DOC
#if LL_PROFILER_ENABLE_RENDER_DOC
static inline const char* lookupPassName(U32 pass)
{
switch (pass)
@ -340,7 +340,7 @@ public:
}
}
#else
static inline const char* lookupPass(U32 pass) { return ""; }
static inline const char* lookupPassName(U32 pass) { return ""; }
#endif
LLRenderPass(const U32 type);

View File

@ -81,6 +81,7 @@ void LLHeroProbeManager::update()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
LL_PROFILE_GPU_ZONE("hero manager update");
llassert(!gCubeSnapshot); // assert a snapshot is not in progress
if (LLAppViewer::instance()->logoutRequestSent())
{
@ -282,6 +283,9 @@ void LLHeroProbeManager::renderProbes()
// In effect this simulates single-bounce lighting.
void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool is_dynamic, F32 near_clip)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
LL_PROFILE_GPU_ZONE("hero probe update");
// hacky hot-swap of camera specific render targets
gPipeline.mRT = &gPipeline.mHeroProbeRT;
@ -352,7 +356,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool
for (int i = 0; i < mMipChain.size(); ++i)
{
LL_PROFILE_GPU_ZONE("probe mip");
LL_PROFILE_GPU_ZONE("hero probe mip");
mMipChain[i].bindTarget();
if (i == 0)
{
@ -379,7 +383,7 @@ void LLHeroProbeManager::updateProbeFace(LLReflectionMap* probe, U32 face, bool
if (mip >= 0)
{
LL_PROFILE_GPU_ZONE("probe mip copy");
LL_PROFILE_GPU_ZONE("hero probe mip copy");
mTexture->bind(0);
glCopyTexSubImage3D(GL_TEXTURE_CUBE_MAP_ARRAY, mip, 0, 0, sourceIdx * 6 + face, 0, 0, res, res);
@ -427,7 +431,7 @@ void LLHeroProbeManager::generateRadiance(LLReflectionMap* probe)
for (int i = 0; i < mMipChain.size() / 4; ++i)
{
LL_PROFILE_GPU_ZONE("probe radiance gen");
LL_PROFILE_GPU_ZONE("hero probe radiance gen");
static LLStaticHashedString sMipLevel("mipLevel");
static LLStaticHashedString sRoughness("roughness");
static LLStaticHashedString sWidth("u_width");
@ -474,6 +478,7 @@ void LLHeroProbeManager::updateUniforms()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
LL_PROFILE_GPU_ZONE("hpmu - uniforms")
LLMatrix4a modelview;
modelview.loadu(gGLModelView);

View File

@ -210,6 +210,7 @@ void LLReflectionMapManager::update()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
LL_PROFILE_GPU_ZONE("reflection manager update");
llassert(!gCubeSnapshot); // assert a snapshot is not in progress
if (LLAppViewer::instance()->logoutRequestSent())
{
@ -696,6 +697,8 @@ void LLReflectionMapManager::doProbeUpdate()
// In effect this simulates single-bounce lighting.
void LLReflectionMapManager::updateProbeFace(LLReflectionMap* probe, U32 face)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
LL_PROFILE_GPU_ZONE("probe update");
// hacky hot-swap of camera specific render targets
gPipeline.mRT = &gPipeline.mAuxillaryRT;
@ -1011,6 +1014,7 @@ void LLReflectionMapManager::updateUniforms()
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY;
LL_PROFILE_GPU_ZONE("rmmu - uniforms")
// structure for packing uniform buffer object
// see class3/deferred/reflectionProbeF.glsl

View File

@ -407,6 +407,7 @@ static void update_tp_display(bool minimized)
void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Render");
LL_PROFILE_GPU_ZONE("Render");
LLPerfStats::RecordSceneTime T (LLPerfStats::StatType_t::RENDER_DISPLAY); // render time capture - This is the main stat for overall rendering.
@ -709,6 +710,7 @@ void display(bool rebuild, F32 zoom_factor, int subfield, bool for_snapshot)
if (gPipeline.RenderMirrors && !gSnapshot)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY("Update hero probes");
LL_PROFILE_GPU_ZONE("hero manager")
gPipeline.mHeroProbeManager.update();
gPipeline.mHeroProbeManager.renderProbes();
}

View File

@ -7182,11 +7182,11 @@ extern LLPointer<LLImageGL> gEXRImage;
void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst)
{
LL_PROFILE_GPU_ZONE("tonemap");
dst->bindTarget();
// gamma correct lighting
{
LL_PROFILE_GPU_ZONE("tonemap");
static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false);
LLGLDepthTest depth(GL_FALSE, GL_FALSE);
@ -7235,11 +7235,11 @@ void LLPipeline::tonemap(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::gammaCorrect(LLRenderTarget* src, LLRenderTarget* dst)
{
LL_PROFILE_GPU_ZONE("gamma correct");
dst->bindTarget();
// gamma correct lighting
{
LL_PROFILE_GPU_ZONE("gamma correct");
LLGLDepthTest depth(GL_FALSE, GL_FALSE);
static LLCachedControl<bool> buildNoPost(gSavedSettings, "RenderDisablePostProcessing", false);
@ -7290,9 +7290,9 @@ void LLPipeline::copyScreenSpaceReflections(LLRenderTarget* src, LLRenderTarget*
void LLPipeline::generateGlow(LLRenderTarget* src)
{
LL_PROFILE_GPU_ZONE("glow generate");
if (sRenderGlow)
{
LL_PROFILE_GPU_ZONE("glow");
mGlow[2].bindTarget();
mGlow[2].clear();
@ -7401,6 +7401,7 @@ void LLPipeline::generateGlow(LLRenderTarget* src)
void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)
{
static LLCachedControl<F32> cas_sharpness(gSavedSettings, "RenderCASSharpness", 0.4f);
LL_PROFILE_GPU_ZONE("cas");
if (cas_sharpness == 0.0f || !gCASProgram.isComplete())
{
gPipeline.copyRenderTarget(src, dst);
@ -7445,6 +7446,7 @@ void LLPipeline::applyCAS(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::applyFXAA(LLRenderTarget* src, LLRenderTarget* dst)
{
LL_PROFILE_GPU_ZONE("FXAA");
{
llassert(!gCubeSnapshot);
bool multisample = RenderFSAAType == 1 && gFXAAProgram[0].isComplete() && mFXAAMap.isComplete();
@ -7536,7 +7538,7 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src)
// Present everything.
if (multisample)
{
LL_PROFILE_GPU_ZONE("aa");
LL_PROFILE_GPU_ZONE("SMAA Edge");
static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U);
U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U);
@ -7648,13 +7650,13 @@ void LLPipeline::generateSMAABuffers(LLRenderTarget* src)
void LLPipeline::applySMAA(LLRenderTarget* src, LLRenderTarget* dst)
{
LL_PROFILE_GPU_ZONE("SMAA");
llassert(!gCubeSnapshot);
bool multisample = RenderFSAAType == 2 && gSMAAEdgeDetectProgram[0].isComplete() && mFXAAMap.isComplete() && mSMAABlendBuffer.isComplete();
// Present everything.
if (multisample)
{
LL_PROFILE_GPU_ZONE("aa");
static LLCachedControl<U32> aa_quality(gSavedSettings, "RenderFSAASamples", 0U);
U32 fsaa_quality = std::clamp(aa_quality(), 0U, 3U);
@ -7732,8 +7734,9 @@ void LLPipeline::copyRenderTarget(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst)
{
// Go ahead and do our glow combine here in our destination. We blit this later into the front buffer.
LL_PROFILE_GPU_ZONE("glow combine");
// Go ahead and do our glow combine here in our destination. We blit this later into the front buffer.
dst->bindTarget();
{
@ -7752,6 +7755,7 @@ void LLPipeline::combineGlow(LLRenderTarget* src, LLRenderTarget* dst)
void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst)
{
LL_PROFILE_GPU_ZONE("dof");
{
bool dof_enabled =
(RenderDepthOfFieldInEditMode || !LLToolMgr::getInstance()->inBuildMode()) &&
@ -7762,7 +7766,6 @@ void LLPipeline::renderDoF(LLRenderTarget* src, LLRenderTarget* dst)
if (dof_enabled)
{
LL_PROFILE_GPU_ZONE("dof");
LLGLDisable blend(GL_BLEND);
// depth of field focal plane calculations