SL-18154 WIP -- CPU sampling (AMD uProf) profile guided optimizations to reduce CPU usage of background threads.

master
Dave Parks 2022-11-30 13:25:00 -06:00
parent adbd264d35
commit 87bb72a47a
5 changed files with 44 additions and 3 deletions

View File

@ -477,9 +477,14 @@ void LLQueuedThread::processRequest(LLQueuedThread::QueuedRequest* req)
mRequestQueue.post([=]
{
LL_PROFILE_ZONE_NAMED("processRequest - retry");
while (LL::WorkQueue::TimePoint::clock::now() < retry_time)
if (LL::WorkQueue::TimePoint::clock::now() < retry_time)
{
std::this_thread::yield(); //note: don't use LLThread::yield here to avoid
auto sleep_time = std::chrono::duration_cast<std::chrono::milliseconds>(retry_time - LL::WorkQueue::TimePoint::clock::now());
if (sleep_time.count() > 0)
{
ms_sleep(sleep_time.count());
}
}
processRequest(req);
});

View File

@ -42,6 +42,7 @@
#ifdef LL_WINDOWS
const DWORD MS_VC_EXCEPTION=0x406D1388;
#pragma pack(push,8)
@ -133,6 +134,15 @@ void LLThread::threadRun()
{
#ifdef LL_WINDOWS
set_thread_name(-1, mName.c_str());
#if 0 // probably a bad idea, see usage of SetThreadIdealProcessor in LLWindowWin32)
HANDLE hThread = GetCurrentThread();
if (hThread)
{
SetThreadAffinityMask(hThread, (DWORD_PTR) 0xFFFFFFFFFFFFFFFE);
}
#endif
#endif
LL_PROFILER_SET_THREAD_NAME( mName.c_str() );

View File

@ -92,6 +92,7 @@ U32 micro_sleep(U64 us, U32 max_yields)
U32 micro_sleep(U64 us, U32 max_yields)
{
LL_PROFILE_ZONE_SCOPED
#if 0
LARGE_INTEGER ft;
ft.QuadPart = -static_cast<S64>(us * 10); // '-' using relative time
@ -99,6 +100,9 @@ U32 micro_sleep(U64 us, U32 max_yields)
SetWaitableTimer(timer, &ft, 0, NULL, NULL, 0);
WaitForSingleObject(timer, INFINITE);
CloseHandle(timer);
#else
Sleep(us / 1000);
#endif
return 0;
}

View File

@ -467,7 +467,6 @@ LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
// process deprioritization during profiles
// force high thread priority
HANDLE hProcess = GetCurrentProcess();
HANDLE hThread = GetCurrentThread();
if (hProcess)
{
@ -484,6 +483,20 @@ LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
}
}
}
#endif
#if 0 // this is also probably a bad idea, but keep it in your back pocket for getting main thread off of background thread cores (see also LLThread::threadRun)
HANDLE hThread = GetCurrentThread();
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
U32 core_count = sysInfo.dwNumberOfProcessors;
if (max_cores != 0)
{
core_count = llmin(core_count, max_cores);
}
if (hThread)
{
@ -499,6 +512,9 @@ LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
{
LL_INFOS() << "Failed to set thread priority: " << std::hex << GetLastError() << LL_ENDL;
}
// tell main thread to prefer core 0
SetThreadIdealProcessor(hThread, 0);
}
}
#endif

View File

@ -2181,6 +2181,12 @@ bool LLAppViewer::initThreads()
// get the number of concurrent threads that can run
S32 cores = std::thread::hardware_concurrency();
U32 max_cores = gSavedSettings.getU32("EmulateCoreCount");
if (max_cores != 0)
{
cores = llmin(cores, (S32) max_cores);
}
// The only configurable thread count right now is ImageDecode
// The viewer typically starts around 8 threads not including image decode,
// so try to leave at least one core free