Merge remote-tracking branch 'origin/DRTVWR-546' into DRTVWR-559

2022-04-27 10:27:48 -07:00 · 2022-04-27 10:27:48 -07:00 · a3ffa9f006
parent 030d61ac58 bafa869c21
commit a3ffa9f006
416 changed files with 16376 additions and 19214 deletions
--- a/.gitignore
+++ b/.gitignore
@ -24,6 +24,7 @@ build-vc120-32/
 build-vc120-64/
 build-vc150-32/
 build-vc150-64/
+build-vc160-64/
 indra/CMakeFiles
 indra/build-vc[0-9]*
 indra/lib/mono/1.0/*.dll
--- a/autobuild.xml
+++ b/autobuild.xml
@ -1088,9 +1088,9 @@
            <key>archive</key>
            <map>
              <key>hash</key>
-              <string>650e836255b6c2ecb93d3f1f7220051c</string>
+              <string>dce3f3c01fddb400cb143c3283fe9259</string>
              <key>url</key>
-              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/ct2/55011/511905/glh_linear-0.0.0-common-538981.tar.bz2</string>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/82754/775367/glh_linear-0.0.0-common-560278.tar.bz2</string>
            </map>
            <key>name</key>
            <string>common</string>
@ -1099,100 +1099,6 @@
        <key>version</key>
        <string>0.0.0</string>
      </map>
-      <key>glod</key>
-      <map>
-        <key>copyright</key>
-        <string>Copyright 2003 Jonathan Cohen, Nat Duca, David Luebke, Brenden Schubert - Johns Hopkins University and University of Virginia</string>
-        <key>license</key>
-        <string>GLOD Open-Source License   Version 1.0</string>
-        <key>license_file</key>
-        <string>LICENSES/GLOD.txt</string>
-        <key>name</key>
-        <string>glod</string>
-        <key>platforms</key>
-        <map>
-          <key>darwin</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>71e678d70e276fc42a56926fc28a7abd</string>
-              <key>hash_algorithm</key>
-              <string>md5</string>
-              <key>url</key>
-              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/glod_3p-update-glod/rev/296895/arch/Darwin/installer/glod-1.0pre4.296895-darwin-296895.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>darwin</string>
-          </map>
-          <key>darwin64</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>a9eaa005ff9d387f946283fbcb69b3c8</string>
-              <key>url</key>
-              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/76353/727324/glod-1.0pre3.555522-darwin64-555522.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>darwin64</string>
-          </map>
-          <key>linux</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>58113bcbbacbaeb2d278f745867ae6f0</string>
-              <key>hash_algorithm</key>
-              <string>md5</string>
-              <key>url</key>
-              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/hg/repo/p64_3p-glod/rev/314201/arch/Linux/installer/glod-1.0pre4.314201-linux-314201.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>linux</string>
-          </map>
-          <key>linux64</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>9aef5cd576ace19568da01d9bc3db29c</string>
-              <key>url</key>
-              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/ct2/1625/3628/glod-1.0pre3.501614-linux64-501614.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>linux64</string>
-          </map>
-          <key>windows</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>e36c95b0d0fbaa3ff3392facaf5de447</string>
-              <key>hash_algorithm</key>
-              <string>md5</string>
-              <key>url</key>
-              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/ct2/55008/511893/glod-1.0pre3.538980-windows-538980.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>windows</string>
-          </map>
-          <key>windows64</key>
-          <map>
-            <key>archive</key>
-            <map>
-              <key>hash</key>
-              <string>6302ee1903ab419e76565d9eb6acd274</string>
-              <key>url</key>
-              <string>http://automated-builds-secondlife-com.s3.amazonaws.com/ct2/55004/511885/glod-1.0pre3.538980-windows64-538980.tar.bz2</string>
-            </map>
-            <key>name</key>
-            <string>windows64</string>
-          </map>
-        </map>
-        <key>version</key>
-        <string>1.0pre3.555522</string>
-      </map>
      <key>googlemock</key>
      <map>
        <key>copyright</key>
@ -2408,6 +2314,62 @@
        <key>version</key>
        <string>7.11.1.297294</string>
      </map>
+      <key>meshoptimizer</key>
+      <map>
+        <key>canonical_repo</key>
+        <string>https://bitbucket.org/lindenlab/3p-meshoptimizer</string>
+        <key>copyright</key>
+        <string>Copyright (c) 2016-2021 Arseny Kapoulkine</string>
+        <key>description</key>
+        <string>Meshoptimizer. Mesh optimization library.</string>
+        <key>license</key>
+        <string>meshoptimizer</string>
+        <key>license_file</key>
+        <string>LICENSES/meshoptimizer.txt</string>
+        <key>name</key>
+        <string>meshoptimizer</string>
+        <key>platforms</key>
+        <map>
+          <key>darwin64</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>30bc37db57bbd87c4b5f62634964242a</string>
+              <key>url</key>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/84218/784918/meshoptimizer-0.16.561408-darwin64-561408.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>darwin64</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>ca3684bcf0447746cd2844e94f6d1fc7</string>
+              <key>url</key>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/84219/784924/meshoptimizer-0.16.561408-windows-561408.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+          <key>windows64</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>aef28c089d20f69d13c9c3e113fb3895</string>
+              <key>url</key>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/84220/784931/meshoptimizer-0.16.561408-windows64-561408.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows64</string>
+          </map>
+        </map>
+        <key>version</key>
+        <string>0.16.561408</string>
+      </map>
      <key>nghttp2</key>
      <map>
        <key>copyright</key>
@ -3133,6 +3095,70 @@ Copyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors</string>
        <key>version</key>
        <string>0.132.2</string>
      </map>
+      <key>tracy</key>
+      <map>
+        <key>canonical_repo</key>
+        <string>https://bitbucket.org/lindenlab/3p-tracy</string>
+        <key>copyright</key>
+        <string>Copyright (c) 2017-2021, Bartosz Taudul (wolf@nereid.pl)</string>
+        <key>description</key>
+        <string>Tracy Profiler Library</string>
+        <key>license</key>
+        <string>bsd</string>
+        <key>license_file</key>
+        <string>LICENSES/tracy_license.txt</string>
+        <key>name</key>
+        <string>tracy</string>
+        <key>platforms</key>
+        <map>
+          <key>darwin64</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>da7317e4a81609f624f84780f28b07de</string>
+              <key>url</key>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/86972/801630/tracy-v0.7.8.563351-darwin64-563351.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>darwin64</string>
+          </map>
+          <key>windows</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>47c696cd2966c5cc3c8ba6115dd1f886</string>
+              <key>hash_algorithm</key>
+              <string>md5</string>
+              <key>url</key>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/86973/801641/tracy-v0.7.8.563351-windows-563351.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows</string>
+          </map>
+          <key>windows64</key>
+          <map>
+            <key>archive</key>
+            <map>
+              <key>hash</key>
+              <string>b649ee6591e67d2341e886b3fc3484a7</string>
+              <key>hash_algorithm</key>
+              <string>md5</string>
+              <key>url</key>
+              <string>https://automated-builds-secondlife-com.s3.amazonaws.com/ct2/86974/801642/tracy-v0.7.8.563351-windows64-563351.tar.bz2</string>
+            </map>
+            <key>name</key>
+            <string>windows64</string>
+          </map>
+        </map>
+        <key>source</key>
+        <string>https://bitbucket.org/lindenlab/3p-tracy</string>
+        <key>source_type</key>
+        <string>git</string>
+        <key>version</key>
+        <string>v0.7.8.563351</string>
+      </map>
      <key>tut</key>
      <map>
        <key>copyright</key>
--- a/build.sh
+++ b/build.sh
@ -155,7 +155,11 @@ pre_build()
    fi
    set -x

-    "$autobuild" configure --quiet -c $variant -- \
+    # honor autobuild_configure_parameters same as sling-buildscripts
+    eval_autobuild_configure_parameters=$(eval $(echo echo $autobuild_configure_parameters))
+
+    "$autobuild" configure --quiet -c $variant \
+     ${eval_autobuild_configure_parameters:---} \
     -DPACKAGE:BOOL=ON \
     -DHAVOK:BOOL="$HAVOK" \
     -DRELEASE_CRASH_REPORTING:BOOL="$RELEASE_CRASH_REPORTING" \
@ -205,7 +209,11 @@ build()
  if $build_viewer
  then
    begin_section "autobuild $variant"
-    "$autobuild" build --no-configure -c $variant || fatal "failed building $variant"
+    # honor autobuild_build_parameters same as sling-buildscripts
+    eval_autobuild_build_parameters=$(eval $(echo echo $autobuild_build_parameters))
+    "$autobuild" build --no-configure -c $variant \
+         $eval_autobuild_build_parameters \
+    || fatal "failed building $variant"
    echo true >"$build_dir"/build_ok
    end_section "autobuild $variant"
    
--- a/doc/contributions.txt
+++ b/doc/contributions.txt
@ -278,6 +278,7 @@ Beq Janus
 	SL-14766
 	SL-14927
 	SL-11300
+	SL-15709
 	SL-16021
 Beth Walcher
 Bezilon Kasei
@ -1370,7 +1371,7 @@ Sovereign Engineer
    MAINT-7343
    SL-11079
    OPEN-343
-    SL-11625
+	SL-11625
    BUG-229030
 	SL-14705
 	SL-14706
@ -1378,6 +1379,7 @@ Sovereign Engineer
 	SL-14731
 	SL-14732
 	SL-15096
+	SL-16127
 SpacedOut Frye
 	VWR-34
 	VWR-45
--- a/indra/CMakeLists.txt
+++ b/indra/CMakeLists.txt
@ -38,6 +38,7 @@ add_subdirectory(${LIBS_OPEN_PREFIX}llkdu)
 add_subdirectory(${LIBS_OPEN_PREFIX}llimagej2coj)
 add_subdirectory(${LIBS_OPEN_PREFIX}llinventory)
 add_subdirectory(${LIBS_OPEN_PREFIX}llmath)
+add_subdirectory(${LIBS_OPEN_PREFIX}llmeshoptimizer)
 add_subdirectory(${LIBS_OPEN_PREFIX}llmessage)
 add_subdirectory(${LIBS_OPEN_PREFIX}llprimitive)
 add_subdirectory(${LIBS_OPEN_PREFIX}llrender)
--- a/indra/cmake/00-Common.cmake
+++ b/indra/cmake/00-Common.cmake
@ -42,8 +42,8 @@ if(NON_RELEASE_CRASH_REPORTING)
  set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DLL_SEND_CRASH_REPORTS=1")
 endif()  

-# Don't bother with a MinSizeRel build.
-set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Release;Debug" CACHE STRING
+# Don't bother with MinSizeRel or Debug builds.
+set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Release" CACHE STRING
    "Supported build types." FORCE)


@ -66,16 +66,21 @@ if (WINDOWS)
  # CP changed to only append the flag for 32bit builds - on 64bit builds,
  # locally at least, the build output is spammed with 1000s of 'D9002'
  # warnings about this switch being ignored.
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")  
  if( ADDRESS_SIZE EQUAL 32 )
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /p:PreferredToolArchitecture=x64")  
  endif()
+  # Preserve first-pass-through versions (ie no FORCE overwrite). Prevents recursive addition of /Zo (04/2021)
+  set(OG_CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} CACHE STRING "OG_CXX_FLAGS_RELEASE")
+  set(OG_CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} CACHE STRING "OG_CXX_FLAGS_RELWITHDEBINFO")
+
  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO 
-      "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Zo"
+      "${OG_CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Zo"
      CACHE STRING "C++ compiler release-with-debug options" FORCE)
  set(CMAKE_CXX_FLAGS_RELEASE
-      "${CMAKE_CXX_FLAGS_RELEASE} ${LL_CXX_FLAGS} /Zo"
+      "${OG_CMAKE_CXX_FLAGS_RELEASE} ${LL_CXX_FLAGS} /Zo"
      CACHE STRING "C++ compiler release options" FORCE)
+  
  # zlib has assembly-language object files incompatible with SAFESEH
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /LARGEADDRESSAWARE /SAFESEH:NO /NODEFAULTLIB:LIBCMT /IGNORE:4099")

--- a/indra/cmake/CMakeLists.txt
+++ b/indra/cmake/CMakeLists.txt
@ -40,7 +40,6 @@ set(cmake_SOURCE_FILES
    FreeType.cmake
    GLEXT.cmake
    GLH.cmake
-    GLOD.cmake
 ##  GStreamer010Plugin.cmake
    GoogleMock.cmake
    Havok.cmake
@ -59,6 +58,7 @@ set(cmake_SOURCE_FILES
    LLKDU.cmake
    LLLogin.cmake
    LLMath.cmake
+    LLMeshOptimizer.cmake
    LLMessage.cmake
    LLPhysicsExtensions.cmake
    LLPlugin.cmake
@ -72,6 +72,7 @@ set(cmake_SOURCE_FILES
    LLXML.cmake
    Linking.cmake
    MediaPluginBase.cmake
+    MESHOPTIMIZER.cmake
    NDOF.cmake
    OPENAL.cmake
    OpenGL.cmake
--- a/indra/cmake/Copy3rdPartyLibs.cmake
+++ b/indra/cmake/Copy3rdPartyLibs.cmake
@ -57,7 +57,6 @@ if(WINDOWS)
        libaprutil-1.dll
        libapriconv-1.dll
        nghttp2.dll
-        glod.dll
        libhunspell.dll
        uriparser.dll
        )
@ -104,6 +103,8 @@ if(WINDOWS)
        set(MSVC_VER 120)
    elseif (MSVC_VERSION GREATER_EQUAL 1910 AND MSVC_VERSION LESS 1920) # Visual Studio 2017
        set(MSVC_VER 140)
+    elseif (MSVC_VERSION GREATER_EQUAL 1920 AND MSVC_VERSION LESS 1930) # Visual Studio 2019
+        set(MSVC_VER 140)
    else (MSVC80)
        MESSAGE(WARNING "New MSVC_VERSION ${MSVC_VERSION} of MSVC: adapt Copy3rdPartyLibs.cmake")
    endif (MSVC80)
@ -166,7 +167,6 @@ elseif(DARWIN)
        libaprutil-1.0.dylib
        libaprutil-1.dylib
        ${EXPAT_COPY}
-        libGLOD.dylib
        libhunspell-1.3.0.dylib
        libndofdev.dylib
        libnghttp2.dylib
@ -215,7 +215,6 @@ elseif(LINUX)
        ${EXPAT_COPY}
        libfreetype.so.6.6.2
        libfreetype.so.6
-        libGLOD.so
        libgmodule-2.0.so
        libgobject-2.0.so
        libhunspell-1.3.so.0.0.0
--- a/indra/cmake/GLOD.cmake
+++ b/indra/cmake/GLOD.cmake
@ -1,11 +0,0 @@
-# -*- cmake -*-
-include(Prebuilt)
-
-if (NOT USESYSTEMLIBS)
-  use_prebuilt_binary(glod)
-endif (NOT USESYSTEMLIBS)
-
-set(GLODLIB ON CACHE BOOL "Using GLOD library")
-
-set(GLOD_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include)
-set(GLOD_LIBRARIES GLOD)
--- a/indra/cmake/LLCommon.cmake
+++ b/indra/cmake/LLCommon.cmake
@ -3,12 +3,14 @@
 include(APR)
 include(Boost)
 include(EXPAT)
+include(Tracy)
 include(ZLIB)

 set(LLCOMMON_INCLUDE_DIRS
    ${LIBS_OPEN_DIR}/llcommon
    ${APRUTIL_INCLUDE_DIR}
    ${APR_INCLUDE_DIR}
+    ${TRACY_INCLUDE_DIR}
    )
 set(LLCOMMON_SYSTEM_INCLUDE_DIRS
    ${Boost_INCLUDE_DIRS}
@ -30,7 +32,8 @@ else (LINUX)
        ${BOOST_FIBER_LIBRARY} 
        ${BOOST_CONTEXT_LIBRARY} 
        ${BOOST_THREAD_LIBRARY} 
-        ${BOOST_SYSTEM_LIBRARY} )
+        ${BOOST_SYSTEM_LIBRARY}
+        )
 endif (LINUX)

 set(LLCOMMON_LINK_SHARED OFF CACHE BOOL "Build the llcommon target as a static library.")
--- a/indra/cmake/LLMeshOptimizer.cmake
+++ b/indra/cmake/LLMeshOptimizer.cmake
@ -0,0 +1,7 @@
+# -*- cmake -*-
+
+set(LLMESHOPTIMIZER_INCLUDE_DIRS
+    ${LIBS_OPEN_DIR}/llmeshoptimizer
+    )
+
+set(LLMESHOPTIMIZER_LIBRARIES llmeshoptimizer)
--- a/indra/cmake/MESHOPTIMIZER.cmake
+++ b/indra/cmake/MESHOPTIMIZER.cmake
@ -0,0 +1,16 @@
+# -*- cmake -*-
+
+include(Linking)
+include(Prebuilt)
+
+use_prebuilt_binary(meshoptimizer)
+
+if (WINDOWS)
+  set(MESHOPTIMIZER_LIBRARIES meshoptimizer.lib)
+elseif (LINUX)
+  set(MESHOPTIMIZER_LIBRARIES meshoptimizer.o)
+elseif (DARWIN)
+  set(MESHOPTIMIZER_LIBRARIES libmeshoptimizer.a)
+endif (WINDOWS)
+
+set(MESHOPTIMIZER_INCLUDE_DIRS ${LIBS_PREBUILT_DIR}/include/meshoptimizer)
--- a/indra/cmake/Tracy.cmake
+++ b/indra/cmake/Tracy.cmake
@ -0,0 +1,29 @@
+# -*- cmake -*-
+include(Prebuilt)
+
+set(USE_TRACY OFF CACHE BOOL "Use Tracy profiler.")
+
+if (USE_TRACY)
+  set(TRACY_INCLUDE_DIR ${LIBS_PREBUILT_DIR}/include/tracy) 
+
+# See: indra/llcommon/llprofiler.h
+  add_definitions(-DLL_PROFILER_CONFIGURATION=3)
+  use_prebuilt_binary(tracy)
+
+  if (WINDOWS)
+    MESSAGE(STATUS "Including Tracy for Windows: '${TRACY_INCLUDE_DIR}'")
+  endif (WINDOWS)
+
+  if (DARWIN)
+    MESSAGE(STATUS "Including Tracy for Darwin: '${TRACY_INCLUDE_DIR}'")
+  endif (DARWIN)
+
+  if (LINUX)
+    MESSAGE(STATUS "Including Tracy for Linux: '${TRACY_INCLUDE_DIR}'")
+  endif (LINUX)
+else (USE_TRACY)
+  # Tracy.cmake should not set LLCOMMON_INCLUDE_DIRS, let LLCommon.cmake do that
+  set(TRACY_INCLUDE_DIR "")
+  set(TRACY_LIBRARY "")
+endif (USE_TRACY)
+
--- a/indra/edit-me-to-trigger-new-build.txt
+++ b/indra/edit-me-to-trigger-new-build.txt
@ -1,3 +1,4 @@
 euclid 5/29/2020
 euclid 7/23/2020
 euclid 4/29/2021
+euclid 10/5/2021 DRTVWR-546
--- a/indra/llappearance/llavatarappearance.cpp
+++ b/indra/llappearance/llavatarappearance.cpp
@ -1590,7 +1590,7 @@ BOOL LLAvatarAppearance::allocateCollisionVolumes( U32 num )
        delete_and_clear_array(mCollisionVolumes);
        mNumCollisionVolumes = 0;

-        mCollisionVolumes = new(std::nothrow) LLAvatarJointCollisionVolume[num];
+        mCollisionVolumes = new LLAvatarJointCollisionVolume[num];
        if (!mCollisionVolumes)
        {
            LL_WARNS() << "Failed to allocate collision volumes" << LL_ENDL;
--- a/indra/llappearance/lldriverparam.h
+++ b/indra/llappearance/lldriverparam.h
@ -77,73 +77,63 @@ protected:

 //-----------------------------------------------------------------------------

-LL_ALIGN_PREFIX(16)
-class LLDriverParam : public LLViewerVisualParam
+class alignas(16) LLDriverParam : public LLViewerVisualParam
 {
+    LL_ALIGN_NEW
 private:
-	// Hide the default constructor.  Force construction with LLAvatarAppearance.
-	LLDriverParam() {}
+    // Hide the default constructor.  Force construction with LLAvatarAppearance.
+    LLDriverParam() {}
 public:
-	LLDriverParam(LLAvatarAppearance *appearance, LLWearable* wearable = NULL);
-	~LLDriverParam();
+    LLDriverParam(LLAvatarAppearance* appearance, LLWearable* wearable = NULL);
+    ~LLDriverParam();

-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
+    // Special: These functions are overridden by child classes
+    LLDriverParamInfo* getInfo() const { return (LLDriverParamInfo*)mInfo; }
+    //   This sets mInfo and calls initialization functions
+    BOOL					setInfo(LLDriverParamInfo* info);

-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
+    LLAvatarAppearance* getAvatarAppearance() { return mAvatarAppearance; }
+    const LLAvatarAppearance* getAvatarAppearance() const { return mAvatarAppearance; }

-	// Special: These functions are overridden by child classes
-	LLDriverParamInfo*		getInfo() const { return (LLDriverParamInfo*)mInfo; }
-	//   This sets mInfo and calls initialization functions
-	BOOL					setInfo(LLDriverParamInfo *info);
+    void					updateCrossDrivenParams(LLWearableType::EType driven_type);

-	LLAvatarAppearance* getAvatarAppearance() { return mAvatarAppearance; }
-	const LLAvatarAppearance* getAvatarAppearance() const { return mAvatarAppearance; }
+    /*virtual*/ LLViewerVisualParam* cloneParam(LLWearable* wearable) const;

-	void					updateCrossDrivenParams(LLWearableType::EType driven_type);
+    // LLVisualParam Virtual functions
+    /*virtual*/ void				apply(ESex sex) {} // apply is called separately for each driven param.
+    /*virtual*/ void				setWeight(F32 weight);
+    /*virtual*/ void				setAnimationTarget(F32 target_value);
+    /*virtual*/ void				stopAnimating();
+    /*virtual*/ BOOL				linkDrivenParams(visual_param_mapper mapper, BOOL only_cross_params);
+    /*virtual*/ void				resetDrivenParams();

-	/*virtual*/ LLViewerVisualParam* cloneParam(LLWearable* wearable) const;
+    // LLViewerVisualParam Virtual functions
+    /*virtual*/ F32					getTotalDistortion();
+    /*virtual*/ const LLVector4a& getAvgDistortion();
+    /*virtual*/ F32					getMaxDistortion();
+    /*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh* poly_mesh);
+    /*virtual*/ const LLVector4a* getFirstDistortion(U32* index, LLPolyMesh** poly_mesh);
+    /*virtual*/ const LLVector4a* getNextDistortion(U32* index, LLPolyMesh** poly_mesh);

-	// LLVisualParam Virtual functions
-	/*virtual*/ void				apply( ESex sex ) {} // apply is called separately for each driven param.
-	/*virtual*/ void				setWeight(F32 weight);
-	/*virtual*/ void				setAnimationTarget( F32 target_value);
-	/*virtual*/ void				stopAnimating();
-	/*virtual*/ BOOL				linkDrivenParams(visual_param_mapper mapper, BOOL only_cross_params);
-	/*virtual*/ void				resetDrivenParams();
-	
-	// LLViewerVisualParam Virtual functions
-	/*virtual*/ F32					getTotalDistortion();
-	/*virtual*/ const LLVector4a&	getAvgDistortion();
-	/*virtual*/ F32					getMaxDistortion();
-	/*virtual*/ LLVector4a			getVertexDistortion(S32 index, LLPolyMesh *poly_mesh);
-	/*virtual*/ const LLVector4a*	getFirstDistortion(U32 *index, LLPolyMesh **poly_mesh);
-	/*virtual*/ const LLVector4a*	getNextDistortion(U32 *index, LLPolyMesh **poly_mesh);
+    S32								getDrivenParamsCount() const;
+    const LLViewerVisualParam* getDrivenParam(S32 index) const;

-	S32								getDrivenParamsCount() const;
-	const LLViewerVisualParam*		getDrivenParam(S32 index) const;
-
-	typedef std::vector<LLDrivenEntry> entry_list_t;
-    entry_list_t&                   getDrivenList() { return mDriven; }
+    typedef std::vector<LLDrivenEntry> entry_list_t;
+    entry_list_t& getDrivenList() { return mDriven; }
    void                            setDrivenList(entry_list_t& driven_list) { mDriven = driven_list; }

 protected:
-	LLDriverParam(const LLDriverParam& pOther);
-	F32 getDrivenWeight(const LLDrivenEntry* driven, F32 input_weight);
-	void setDrivenWeight(LLDrivenEntry *driven, F32 driven_weight);
+    LLDriverParam(const LLDriverParam& pOther);
+    F32 getDrivenWeight(const LLDrivenEntry* driven, F32 input_weight);
+    void setDrivenWeight(LLDrivenEntry* driven, F32 driven_weight);


-	LL_ALIGN_16(LLVector4a	mDefaultVec); // temp holder
-	entry_list_t mDriven;
-	LLViewerVisualParam* mCurrentDistortionParam;
-	// Backlink only; don't make this an LLPointer.
-	LLAvatarAppearance* mAvatarAppearance;
-	LLWearable* mWearablep;
-} LL_ALIGN_POSTFIX(16);
+    LL_ALIGN_16(LLVector4a	mDefaultVec); // temp holder
+    entry_list_t mDriven;
+    LLViewerVisualParam* mCurrentDistortionParam;
+    // Backlink only; don't make this an LLPointer.
+    LLAvatarAppearance* mAvatarAppearance;
+    LLWearable* mWearablep;
+};

 #endif  // LL_LLDRIVERPARAM_H
--- a/indra/llappearance/llpolymorph.cpp
+++ b/indra/llappearance/llpolymorph.cpp
@ -539,8 +539,6 @@ F32	LLPolyMorphTarget::getMaxDistortion()
 //-----------------------------------------------------------------------------
 // apply()
 //-----------------------------------------------------------------------------
-static LLTrace::BlockTimerStatHandle FTM_APPLY_MORPH_TARGET("Apply Morph");
-
 void LLPolyMorphTarget::apply( ESex avatar_sex )
 {
 	if (!mMorphData || mNumMorphMasksPending > 0)
@ -548,7 +546,7 @@ void LLPolyMorphTarget::apply( ESex avatar_sex )
 		return;
 	}

-	LL_RECORD_BLOCK_TIME(FTM_APPLY_MORPH_TARGET);
+    LL_PROFILE_ZONE_SCOPED;

 	mLastSex = avatar_sex;

--- a/indra/llappearance/llpolymorph.h
+++ b/indra/llappearance/llpolymorph.h
@ -41,24 +41,14 @@ class LLWearable;
 //-----------------------------------------------------------------------------
 // LLPolyMorphData()
 //-----------------------------------------------------------------------------
-LL_ALIGN_PREFIX(16)
-class LLPolyMorphData
+class alignas(16) LLPolyMorphData
 {
+    LL_ALIGN_NEW
 public:
 	LLPolyMorphData(const std::string& morph_name);
 	~LLPolyMorphData();
 	LLPolyMorphData(const LLPolyMorphData &rhs);

-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
-
-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
-
 	BOOL			loadBinary(LLFILE* fp, LLPolyMeshSharedData *mesh);
 	const std::string& getName() { return mName; }

@ -76,7 +66,7 @@ public:

 	F32					mTotalDistortion;	// vertex distortion summed over entire morph
 	F32					mMaxDistortion;		// maximum single vertex distortion in a given morph
-	LL_ALIGN_16(LLVector4a			mAvgDistortion);		// average vertex distortion, to infer directionality of the morph
+	LLVector4a			mAvgDistortion;		// average vertex distortion, to infer directionality of the morph
 	LLPolyMeshSharedData*	mMesh;

 private:
@ -154,8 +144,9 @@ protected:
 // These morph targets must be topologically consistent with a given Polymesh
 // (share face sets)
 //-----------------------------------------------------------------------------
-class LLPolyMorphTarget : public LLViewerVisualParam
+class alignas(16) LLPolyMorphTarget : public LLViewerVisualParam
 {
+    LL_ALIGN_NEW
 public:
 	LLPolyMorphTarget(LLPolyMesh *poly_mesh);
 	~LLPolyMorphTarget();
@ -184,16 +175,6 @@ public:

    void    applyVolumeChanges(F32 delta_weight); // SL-315 - for resetSkeleton()

-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
-
-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
-
 protected:
 	LLPolyMorphTarget(const LLPolyMorphTarget& pOther);

--- a/indra/llappearance/llpolyskeletaldistortion.cpp
+++ b/indra/llappearance/llpolyskeletaldistortion.cpp
@ -188,11 +188,9 @@ BOOL LLPolySkeletalDistortion::setInfo(LLPolySkeletalDistortionInfo *info)
 //-----------------------------------------------------------------------------
 // apply()
 //-----------------------------------------------------------------------------
-static LLTrace::BlockTimerStatHandle FTM_POLYSKELETAL_DISTORTION_APPLY("Skeletal Distortion");
-
 void LLPolySkeletalDistortion::apply( ESex avatar_sex )
 {
-    LL_RECORD_BLOCK_TIME(FTM_POLYSKELETAL_DISTORTION_APPLY);
+    LL_PROFILE_ZONE_SCOPED;

    F32 effective_weight = ( getSex() & avatar_sex ) ? mCurWeight : getDefaultWeight();

--- a/indra/llappearance/llpolyskeletaldistortion.h
+++ b/indra/llappearance/llpolyskeletaldistortion.h
@ -62,9 +62,9 @@ struct LLPolySkeletalBoneInfo
 	BOOL mHasPositionDeformation;
 };

-LL_ALIGN_PREFIX(16)
-class LLPolySkeletalDistortionInfo : public LLViewerVisualParamInfo
+class alignas(16) LLPolySkeletalDistortionInfo : public LLViewerVisualParamInfo
 {
+    LL_ALIGN_NEW
 	friend class LLPolySkeletalDistortion;
 public:
 	
@ -73,19 +73,6 @@ public:
 	
 	/*virtual*/ BOOL parseXml(LLXmlTreeNode* node);

-
-
-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
-
-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
-
-
 protected:
 	typedef std::vector<LLPolySkeletalBoneInfo> bone_info_list_t;
 	bone_info_list_t mBoneInfoList;
@ -95,19 +82,10 @@ protected:
 // LLPolySkeletalDeformation
 // A set of joint scale data for deforming the avatar mesh
 //-----------------------------------------------------------------------------
-class LLPolySkeletalDistortion : public LLViewerVisualParam
+class alignas(16) LLPolySkeletalDistortion : public LLViewerVisualParam
 {
+    LL_ALIGN_NEW
 public:
-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
-
-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
-
 	LLPolySkeletalDistortion(LLAvatarAppearance *avatarp);
 	~LLPolySkeletalDistortion();

--- a/indra/llappearance/lltexlayer.cpp
+++ b/indra/llappearance/lltexlayer.cpp
@ -142,17 +142,8 @@ BOOL LLTexLayerSetBuffer::renderTexLayerSet(LLRenderTarget* bound_target)

 	BOOL success = TRUE;
 	
-	bool use_shaders = LLGLSLShader::sNoFixedFunction;
-
-	if (use_shaders)
-	{
-		gAlphaMaskProgram.bind();
-		gAlphaMaskProgram.setMinimumAlpha(0.004f);
-	}
-	else
-	{
-		gGL.setAlphaRejectSettings(LLRender::CF_GREATER, 0.00f);
-	}
+	gAlphaMaskProgram.bind();
+	gAlphaMaskProgram.setMinimumAlpha(0.004f);

 	LLVertexBuffer::unbind();

@ -164,10 +155,7 @@ BOOL LLTexLayerSetBuffer::renderTexLayerSet(LLRenderTarget* bound_target)

 	midRenderTexLayerSet(success);

-	if (use_shaders)
-	{
-		gAlphaMaskProgram.unbind();
-	}
+	gAlphaMaskProgram.unbind();

 	LLVertexBuffer::unbind();
 	
@ -390,8 +378,6 @@ BOOL LLTexLayerSet::render( S32 x, S32 y, S32 width, S32 height, LLRenderTarget*
 		}
 	}

-	bool use_shaders = LLGLSLShader::sNoFixedFunction;
-
 	LLGLSUIDefault gls_ui;
 	LLGLDepthTest gls_depth(GL_FALSE, GL_FALSE);
 	gGL.setColorMask(true, true);
@ -400,20 +386,14 @@ BOOL LLTexLayerSet::render( S32 x, S32 y, S32 width, S32 height, LLRenderTarget*
 	{
 		gGL.flush();
 		LLGLDisable no_alpha(GL_ALPHA_TEST);
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.0f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.0f);
 		gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 		gGL.color4f( 0.f, 0.f, 0.f, 1.f );

 		gl_rect_2d_simple( width, height );

 		gGL.flush();
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.004f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.004f);
 	}

 	if (mIsVisible)
@ -440,10 +420,7 @@ BOOL LLTexLayerSet::render( S32 x, S32 y, S32 width, S32 height, LLRenderTarget*

 		gGL.setSceneBlendType(LLRender::BT_REPLACE);
 		LLGLDisable no_alpha(GL_ALPHA_TEST);
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.f);

 		gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 		gGL.color4f( 0.f, 0.f, 0.f, 0.f );
@ -452,10 +429,7 @@ BOOL LLTexLayerSet::render( S32 x, S32 y, S32 width, S32 height, LLRenderTarget*
 		gGL.setSceneBlendType(LLRender::BT_ALPHA);

 		gGL.flush();
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.004f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.004f);
 	}

 	return success;
@ -520,10 +494,9 @@ const LLTexLayerSetBuffer* LLTexLayerSet::getComposite() const
 	return mComposite;
 }

-static LLTrace::BlockTimerStatHandle FTM_GATHER_MORPH_MASK_ALPHA("gatherMorphMaskAlpha");
 void LLTexLayerSet::gatherMorphMaskAlpha(U8 *data, S32 origin_x, S32 origin_y, S32 width, S32 height, LLRenderTarget* bound_target)
 {
-	LL_RECORD_BLOCK_TIME(FTM_GATHER_MORPH_MASK_ALPHA);
+    LL_PROFILE_ZONE_SCOPED;
 	memset(data, 255, width * height);

 	for( layer_list_t::iterator iter = mLayerList.begin(); iter != mLayerList.end(); iter++ )
@ -536,14 +509,11 @@ void LLTexLayerSet::gatherMorphMaskAlpha(U8 *data, S32 origin_x, S32 origin_y, S
 	renderAlphaMaskTextures(origin_x, origin_y, width, height, bound_target, true);
 }

-static LLTrace::BlockTimerStatHandle FTM_RENDER_ALPHA_MASK_TEXTURES("renderAlphaMaskTextures");
 void LLTexLayerSet::renderAlphaMaskTextures(S32 x, S32 y, S32 width, S32 height, LLRenderTarget* bound_target, bool forceClear)
 {
-	LL_RECORD_BLOCK_TIME(FTM_RENDER_ALPHA_MASK_TEXTURES);
+    LL_PROFILE_ZONE_SCOPED;
 	const LLTexLayerSetInfo *info = getInfo();
 	
-	bool use_shaders = LLGLSLShader::sNoFixedFunction;
-
 	gGL.setColorMask(false, true);
 	gGL.setSceneBlendType(LLRender::BT_REPLACE);
 	
@ -557,7 +527,6 @@ void LLTexLayerSet::renderAlphaMaskTextures(S32 x, S32 y, S32 width, S32 height,
 			{
 				LLGLSUIDefault gls_ui;
 				gGL.getTexUnit(0)->bind(tex);
-				gGL.getTexUnit(0)->setTextureBlendType( LLTexUnit::TB_REPLACE );
 				gl_rect_2d_simple_tex( width, height );
 			}
 		}
@ -568,20 +537,14 @@ void LLTexLayerSet::renderAlphaMaskTextures(S32 x, S32 y, S32 width, S32 height,
 		// Set the alpha channel to one (clean up after previous blending)
 		gGL.flush();
 		LLGLDisable no_alpha(GL_ALPHA_TEST);
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.f);
 		gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 		gGL.color4f( 0.f, 0.f, 0.f, 1.f );
 		
 		gl_rect_2d_simple( width, height );
 		
 		gGL.flush();
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.004f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.004f);
 	}
 	
 	// (Optional) Mask out part of the baked texture with alpha masks
@ -589,7 +552,6 @@ void LLTexLayerSet::renderAlphaMaskTextures(S32 x, S32 y, S32 width, S32 height,
 	if (mMaskLayerList.size() > 0)
 	{
 		gGL.setSceneBlendType(LLRender::BT_MULT_ALPHA);
-		gGL.getTexUnit(0)->setTextureBlendType( LLTexUnit::TB_REPLACE );
 		for (layer_list_t::iterator iter = mMaskLayerList.begin(); iter != mMaskLayerList.end(); iter++)
 		{
 			LLTexLayerInterface* layer = *iter;
@ -602,7 +564,6 @@ void LLTexLayerSet::renderAlphaMaskTextures(S32 x, S32 y, S32 width, S32 height,
 	
 	gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 	
-	gGL.getTexUnit(0)->setTextureBlendType(LLTexUnit::TB_MULT);
 	gGL.setColorMask(true, true);
 	gGL.setSceneBlendType(LLRender::BT_ALPHA);
 }
@ -1128,13 +1089,6 @@ BOOL LLTexLayer::render(S32 x, S32 y, S32 width, S32 height, LLRenderTarget* bou
 	// *TODO: Is this correct?
 	//gPipeline.disableLights();
 	stop_glerror();
-	if (!LLGLSLShader::sNoFixedFunction)
-	{
-		glDisable(GL_LIGHTING);
-	}
-	stop_glerror();
-
-	bool use_shaders = LLGLSLShader::sNoFixedFunction;

 	LLColor4 net_color;
 	BOOL color_specified = findNetColor(&net_color);
@ -1221,10 +1175,7 @@ BOOL LLTexLayer::render(S32 x, S32 y, S32 width, S32 height, LLRenderTarget* bou
 					LLGLDisable alpha_test(no_alpha_test ? GL_ALPHA_TEST : 0);
 					if (no_alpha_test)
 					{
-						if (use_shaders)
-						{
-							gAlphaMaskProgram.setMinimumAlpha(0.f);
-						}
+						gAlphaMaskProgram.setMinimumAlpha(0.f);
 					}
 					
 					LLTexUnit::eTextureAddressMode old_mode = tex->getAddressMode();
@ -1238,10 +1189,7 @@ BOOL LLTexLayer::render(S32 x, S32 y, S32 width, S32 height, LLRenderTarget* bou
 					gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 					if (no_alpha_test)
 					{
-						if (use_shaders)
-						{
-							gAlphaMaskProgram.setMinimumAlpha(0.004f);
-						}
+						gAlphaMaskProgram.setMinimumAlpha(0.004f);
 					}
 				}
 			}
@ -1275,18 +1223,12 @@ BOOL LLTexLayer::render(S32 x, S32 y, S32 width, S32 height, LLRenderTarget* bou
 		color_specified )
 	{
 		LLGLDisable no_alpha(GL_ALPHA_TEST);
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.000f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.000f);

 		gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
 		gGL.color4fv( net_color.mV );
 		gl_rect_2d_simple( width, height );
-		if (use_shaders)
-		{
-			gAlphaMaskProgram.setMinimumAlpha(0.004f);
-		}
+		gAlphaMaskProgram.setMinimumAlpha(0.004f);
 	}

 	if( alpha_mask_specified || getInfo()->mWriteAllChannels )
@ -1374,25 +1316,17 @@ BOOL LLTexLayer::blendAlphaTexture(S32 x, S32 y, S32 width, S32 height)

 	gGL.flush();
 	
-	bool use_shaders = LLGLSLShader::sNoFixedFunction;
-
 	if( !getInfo()->mStaticImageFileName.empty() )
 	{
 		LLGLTexture* tex = LLTexLayerStaticImageList::getInstance()->getTexture( getInfo()->mStaticImageFileName, getInfo()->mStaticImageIsMask );
 		if( tex )
 		{
 			LLGLSNoAlphaTest gls_no_alpha_test;
-			if (use_shaders)
-			{
-				gAlphaMaskProgram.setMinimumAlpha(0.f);
-			}
+			gAlphaMaskProgram.setMinimumAlpha(0.f);
 			gGL.getTexUnit(0)->bind(tex, TRUE);
 			gl_rect_2d_simple_tex( width, height );
 			gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
-			if (use_shaders)
-			{
-				gAlphaMaskProgram.setMinimumAlpha(0.004f);
-			}
+			gAlphaMaskProgram.setMinimumAlpha(0.004f);
 		}
 		else
 		{
@ -1407,18 +1341,11 @@ BOOL LLTexLayer::blendAlphaTexture(S32 x, S32 y, S32 width, S32 height)
 			if (tex)
 			{
 				LLGLSNoAlphaTest gls_no_alpha_test;
-				if (use_shaders)
-				{
-					gAlphaMaskProgram.setMinimumAlpha(0.f);
-				}
+				gAlphaMaskProgram.setMinimumAlpha(0.f);
 				gGL.getTexUnit(0)->bind(tex);
 				gl_rect_2d_simple_tex( width, height );
 				gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
-				success = TRUE;
-				if (use_shaders)
-				{
-					gAlphaMaskProgram.setMinimumAlpha(0.004f);
-				}
+				gAlphaMaskProgram.setMinimumAlpha(0.004f);
 			}
 		}
 	}
@ -1431,7 +1358,6 @@ BOOL LLTexLayer::blendAlphaTexture(S32 x, S32 y, S32 width, S32 height)
 	addAlphaMask(data, originX, originY, width, height, bound_target);
 }

-static LLTrace::BlockTimerStatHandle FTM_RENDER_MORPH_MASKS("renderMorphMasks");
 void LLTexLayer::renderMorphMasks(S32 x, S32 y, S32 width, S32 height, const LLColor4 &layer_color, LLRenderTarget* bound_target, bool force_render)
 {
 	if (!force_render && !hasMorph())
@ -1439,18 +1365,12 @@ void LLTexLayer::renderMorphMasks(S32 x, S32 y, S32 width, S32 height, const LLC
 		LL_DEBUGS() << "skipping renderMorphMasks for " << getUUID() << LL_ENDL;
 		return;
 	}
-	LL_RECORD_BLOCK_TIME(FTM_RENDER_MORPH_MASKS);
+    LL_PROFILE_ZONE_SCOPED;
 	BOOL success = TRUE;

 	llassert( !mParamAlphaList.empty() );

-	bool use_shaders = LLGLSLShader::sNoFixedFunction;
-
-	if (use_shaders)
-	{
-		gAlphaMaskProgram.setMinimumAlpha(0.f);
-	}
-
+	gAlphaMaskProgram.setMinimumAlpha(0.f);
 	gGL.setColorMask(false, true);

 	LLTexLayerParamAlpha* first_param = *mParamAlphaList.begin();
@ -1535,10 +1455,7 @@ void LLTexLayer::renderMorphMasks(S32 x, S32 y, S32 width, S32 height, const LLC
 		gl_rect_2d_simple( width, height );
 	}

-	if (use_shaders)
-	{
-		gAlphaMaskProgram.setMinimumAlpha(0.004f);
-	}
+	gAlphaMaskProgram.setMinimumAlpha(0.004f);

 	LLGLSUIDefault gls_ui;

@ -1637,10 +1554,9 @@ void LLTexLayer::renderMorphMasks(S32 x, S32 y, S32 width, S32 height, const LLC
 	}
 }

-static LLTrace::BlockTimerStatHandle FTM_ADD_ALPHA_MASK("addAlphaMask");
 void LLTexLayer::addAlphaMask(U8 *data, S32 originX, S32 originY, S32 width, S32 height, LLRenderTarget* bound_target)
 {
-	LL_RECORD_BLOCK_TIME(FTM_ADD_ALPHA_MASK);
+    LL_PROFILE_ZONE_SCOPED;
 	S32 size = width * height;
 	const U8* alphaData = getAlphaData();
 	if (!alphaData && hasAlphaParams())
@ -1981,10 +1897,9 @@ void LLTexLayerStaticImageList::deleteCachedImages()

 // Returns an LLImageTGA that contains the encoded data from a tga file named file_name.
 // Caches the result to speed identical subsequent requests.
-static LLTrace::BlockTimerStatHandle FTM_LOAD_STATIC_TGA("getImageTGA");
 LLImageTGA* LLTexLayerStaticImageList::getImageTGA(const std::string& file_name)
 {
-	LL_RECORD_BLOCK_TIME(FTM_LOAD_STATIC_TGA);
+    LL_PROFILE_ZONE_SCOPED;
 	const char *namekey = mImageNames.addString(file_name);
 	image_tga_map_t::const_iterator iter = mStaticImageListTGA.find(namekey);
 	if( iter != mStaticImageListTGA.end() )
@ -2011,10 +1926,9 @@ LLImageTGA* LLTexLayerStaticImageList::getImageTGA(const std::string& file_name)

 // Returns a GL Image (without a backing ImageRaw) that contains the decoded data from a tga file named file_name.
 // Caches the result to speed identical subsequent requests.
-static LLTrace::BlockTimerStatHandle FTM_LOAD_STATIC_TEXTURE("getTexture");
 LLGLTexture* LLTexLayerStaticImageList::getTexture(const std::string& file_name, BOOL is_mask)
 {
-	LL_RECORD_BLOCK_TIME(FTM_LOAD_STATIC_TEXTURE);
+    LL_PROFILE_ZONE_SCOPED;
 	LLPointer<LLGLTexture> tex;
 	const char *namekey = mImageNames.addString(file_name);

@ -2061,10 +1975,9 @@ LLGLTexture* LLTexLayerStaticImageList::getTexture(const std::string& file_name,

 // Reads a .tga file, decodes it, and puts the decoded data in image_raw.
 // Returns TRUE if successful.
-static LLTrace::BlockTimerStatHandle FTM_LOAD_IMAGE_RAW("loadImageRaw");
 BOOL LLTexLayerStaticImageList::loadImageRaw(const std::string& file_name, LLImageRaw* image_raw)
 {
-	LL_RECORD_BLOCK_TIME(FTM_LOAD_IMAGE_RAW);
+    LL_PROFILE_ZONE_SCOPED;
 	BOOL success = FALSE;
 	std::string path;
 	path = gDirUtilp->getExpandedFilename(LL_PATH_CHARACTER,file_name);
--- a/indra/llappearance/lltexlayerparams.cpp
+++ b/indra/llappearance/lltexlayerparams.cpp
@ -261,10 +261,9 @@ BOOL LLTexLayerParamAlpha::getSkip() const
 }


-static LLTrace::BlockTimerStatHandle FTM_TEX_LAYER_PARAM_ALPHA("alpha render");
 BOOL LLTexLayerParamAlpha::render(S32 x, S32 y, S32 width, S32 height)
 {
-	LL_RECORD_BLOCK_TIME(FTM_TEX_LAYER_PARAM_ALPHA);
+    LL_PROFILE_ZONE_SCOPED;
 	BOOL success = TRUE;

 	if (!mTexLayer)
--- a/indra/llappearance/lltexlayerparams.h
+++ b/indra/llappearance/lltexlayerparams.h
@ -63,23 +63,14 @@ protected:
 // 
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 LL_ALIGN_PREFIX(16)
-class LLTexLayerParamAlpha : public LLTexLayerParam
+class alignas(16) LLTexLayerParamAlpha : public LLTexLayerParam
 {
+    LL_ALIGN_NEW
 public:
 	LLTexLayerParamAlpha( LLTexLayerInterface* layer );
 	LLTexLayerParamAlpha( LLAvatarAppearance* appearance );
 	/*virtual*/ ~LLTexLayerParamAlpha();

-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
-
-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
-
 	/*virtual*/ LLViewerVisualParam* cloneParam(LLWearable* wearable = NULL) const;

 	// LLVisualParam Virtual functions
@ -146,9 +137,9 @@ private:
 //
 //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-LL_ALIGN_PREFIX(16)
-class LLTexLayerParamColor : public LLTexLayerParam
+class alignas(16) LLTexLayerParamColor : public LLTexLayerParam
 {
+    LL_ALIGN_NEW
 public:
 	enum EColorOperation
 	{
@ -161,16 +152,6 @@ public:
 	LLTexLayerParamColor( LLTexLayerInterface* layer );
 	LLTexLayerParamColor( LLAvatarAppearance* appearance );

-	void* operator new(size_t size)
-	{
-		return ll_aligned_malloc_16(size);
-	}
-
-	void operator delete(void* ptr)
-	{
-		ll_aligned_free_16(ptr);
-	}
-
 	/* virtual */ ~LLTexLayerParamColor();

 	/*virtual*/ LLViewerVisualParam* cloneParam(LLWearable* wearable = NULL) const;
@ -198,8 +179,8 @@ protected:

 	virtual void onGlobalColorChanged() {}
 private:
-	LL_ALIGN_16(LLVector4a				mAvgDistortionVec);
-} LL_ALIGN_POSTFIX(16);
+	LLVector4a				mAvgDistortionVec;
+};

 class LLTexLayerParamColorInfo : public LLViewerVisualParamInfo
 {
--- a/indra/llcharacter/llcharacter.cpp
+++ b/indra/llcharacter/llcharacter.cpp
@ -188,20 +188,15 @@ void LLCharacter::requestStopMotion( LLMotion* motion)
 //-----------------------------------------------------------------------------
 // updateMotions()
 //-----------------------------------------------------------------------------
-static LLTrace::BlockTimerStatHandle FTM_UPDATE_ANIMATION("Update Animation");
-static LLTrace::BlockTimerStatHandle FTM_UPDATE_HIDDEN_ANIMATION("Update Hidden Anim");
-static LLTrace::BlockTimerStatHandle FTM_UPDATE_MOTIONS("Update Motions");
-
 void LLCharacter::updateMotions(e_update_t update_type)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	if (update_type == HIDDEN_UPDATE)
 	{
-		LL_RECORD_BLOCK_TIME(FTM_UPDATE_HIDDEN_ANIMATION);
 		mMotionController.updateMotionsMinimal();
 	}
 	else
 	{
-		LL_RECORD_BLOCK_TIME(FTM_UPDATE_ANIMATION);
 		// unpause if the number of outstanding pause requests has dropped to the initial one
 		if (mMotionController.isPaused() && mPauseRequest->getNumRefs() == 1)
 		{
@ -209,7 +204,6 @@ void LLCharacter::updateMotions(e_update_t update_type)
 		}
 		bool force_update = (update_type == FORCE_UPDATE);
 		{
-			LL_RECORD_BLOCK_TIME(FTM_UPDATE_MOTIONS);
 			mMotionController.updateMotions(force_update);
 		}
 	}
--- a/indra/llcharacter/lleditingmotion.cpp
+++ b/indra/llcharacter/lleditingmotion.cpp
@ -163,6 +163,7 @@ BOOL LLEditingMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLEditingMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	LLVector3 focus_pt;
 	LLVector3* pointAtPt = (LLVector3*)mCharacter->getAnimationData("PointAtPoint");

--- a/indra/llcharacter/lleditingmotion.h
+++ b/indra/llcharacter/lleditingmotion.h
@ -42,9 +42,11 @@
 //-----------------------------------------------------------------------------
 // class LLEditingMotion
 //-----------------------------------------------------------------------------
+LL_ALIGN_PREFIX(16)
 class LLEditingMotion :
 	public LLMotion
 {
+    LL_ALIGN_NEW
 public:
 	// Constructor
 	LLEditingMotion(const LLUUID &id);
@ -108,6 +110,13 @@ public:
 	//-------------------------------------------------------------------------
 	// joint states to be animated
 	//-------------------------------------------------------------------------
+    LL_ALIGN_16(LLJoint				mParentJoint);
+    LL_ALIGN_16(LLJoint				mShoulderJoint);
+    LL_ALIGN_16(LLJoint				mElbowJoint);
+    LL_ALIGN_16(LLJoint				mWristJoint);
+    LL_ALIGN_16(LLJoint				mTarget);
+    LLJointSolverRP3	mIKSolver;
+
 	LLCharacter			*mCharacter;
 	LLVector3			mWristOffset;

@ -117,17 +126,10 @@ public:
 	LLPointer<LLJointState> mWristState;
 	LLPointer<LLJointState> mTorsoState;

-	LLJoint				mParentJoint;
-	LLJoint				mShoulderJoint;
-	LLJoint				mElbowJoint;
-	LLJoint				mWristJoint;
-	LLJoint				mTarget;
-	LLJointSolverRP3	mIKSolver;
-
 	static S32			sHandPose;
 	static S32			sHandPosePriority;
 	LLVector3			mLastSelectPt;
-};
+} LL_ALIGN_POSTFIX(16);

 #endif // LL_LLKEYFRAMEMOTION_H

--- a/indra/llcharacter/llhandmotion.cpp
+++ b/indra/llcharacter/llhandmotion.cpp
@ -121,6 +121,7 @@ BOOL LLHandMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLHandMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	eHandPose *requestedHandPose;

 	F32 timeDelta = time - mLastTime;
--- a/indra/llcharacter/llheadrotmotion.cpp
+++ b/indra/llcharacter/llheadrotmotion.cpp
@ -175,6 +175,7 @@ BOOL LLHeadRotMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLHeadRotMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	LLQuaternion	targetHeadRotWorld;
 	LLQuaternion	currentRootRotWorld = mRootJoint->getWorldRotation();
 	LLQuaternion	currentInvRootRotWorld = ~currentRootRotWorld;
@ -458,6 +459,7 @@ void LLEyeMotion::adjustEyeTarget(LLVector3* targetPos, LLJointState& left_eye_s
 //-----------------------------------------------------------------------------
 BOOL LLEyeMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	//calculate jitter
 	if (mEyeJitterTimer.getElapsedTimeF32() > mEyeJitterTime)
 	{
--- a/indra/llcharacter/lljoint.cpp
+++ b/indra/llcharacter/lljoint.cpp
@ -922,6 +922,13 @@ const LLMatrix4 &LLJoint::getWorldMatrix()
 	return mXform.getWorldMatrix();
 }

+const LLMatrix4a& LLJoint::getWorldMatrix4a()
+{
+    updateWorldMatrixParent();
+
+    return mWorldMatrix;
+}
+

 //--------------------------------------------------------------------
 // setWorldMatrix()
@ -1003,6 +1010,7 @@ void LLJoint::updateWorldMatrix()
 	{
 		sNumUpdates++;
 		mXform.updateMatrix(FALSE);
+        mWorldMatrix.loadu(mXform.getWorldMatrix());
 		mDirtyFlags = 0x0;
 	}
 }
--- a/indra/llcharacter/lljoint.h
+++ b/indra/llcharacter/lljoint.h
@ -38,6 +38,7 @@
 #include "m4math.h"
 #include "llquaternion.h"
 #include "xform.h"
+#include "llmatrix4a.h"

 const S32 LL_CHARACTER_MAX_JOINTS_PER_MESH = 15;
 // Need to set this to count of animate-able joints,
@ -85,8 +86,10 @@ inline bool operator!=(const LLVector3OverrideMap& a, const LLVector3OverrideMap
 //-----------------------------------------------------------------------------
 // class LLJoint
 //-----------------------------------------------------------------------------
+LL_ALIGN_PREFIX(16)
 class LLJoint
 {
+    LL_ALIGN_NEW
 public:
 	// priority levels, from highest to lowest
 	enum JointPriority
@ -114,16 +117,17 @@ public:
        SUPPORT_EXTENDED
    };
 protected:
-	std::string	mName;
+    // explicit transformation members
+    LL_ALIGN_16(LLMatrix4a          mWorldMatrix);
+    LLXformMatrix       mXform;
+	
+    std::string	mName;

 	SupportCategory mSupport;

 	// parent joint
 	LLJoint	*mParent;

-	// explicit transformation members
-	LLXformMatrix		mXform;
-
    LLVector3       mDefaultPosition;
    LLVector3       mDefaultScale;
    
@ -259,6 +263,8 @@ public:
 	const LLMatrix4 &getWorldMatrix();
 	void setWorldMatrix( const LLMatrix4& mat );

+    const LLMatrix4a& getWorldMatrix4a();
+
 	void updateWorldMatrixChildren();
 	void updateWorldMatrixParent();

@ -296,6 +302,6 @@ public:
    // These are used in checks of whether a pos/scale override is considered significant.
    bool aboveJointPosThreshold(const LLVector3& pos) const;
    bool aboveJointScaleThreshold(const LLVector3& scale) const;
-};
+} LL_ALIGN_POSTFIX(16);
 #endif // LL_LLJOINT_H

--- a/indra/llcharacter/lljointsolverrp3.cpp
+++ b/indra/llcharacter/lljointsolverrp3.cpp
@ -1,6 +1,6 @@
 /** 
 * @file lljointsolverrp3.cpp
- * @brief Implementation of LLJointSolverRP3 class.
+ * @brief Implementation of Joint Solver in 3D Real Projective space (RP3). See: https://en.wikipedia.org/wiki/Real_projective_space
 *
 * $LicenseInfo:firstyear=2001&license=viewerlgpl$
 * Second Life Viewer Source Code
@ -35,6 +35,11 @@

 #define F_EPSILON 0.00001f

+#if LL_RELEASE
+    #define DEBUG_JOINT_SOLVER 0
+#else
+    #define DEBUG_JOINT_SOLVER 1
+#endif

 //-----------------------------------------------------------------------------
 // Constructor
@ -150,6 +155,7 @@ void LLJointSolverRP3::solve()
 	LLVector3 cPos = mJointC->getWorldPosition();
 	LLVector3 gPos = mJointGoal->getWorldPosition();

+#if DEBUG_JOINT_SOLVER
 	LL_DEBUGS("JointSolver") << "LLJointSolverRP3::solve()" << LL_NEWLINE
 							<< "bPosLocal = " << mJointB->getPosition() << LL_NEWLINE
 							<< "cPosLocal = " << mJointC->getPosition() << LL_NEWLINE
@ -159,6 +165,7 @@ void LLJointSolverRP3::solve()
 							<< "bPos : " << bPos << LL_NEWLINE
 							<< "cPos : " << cPos << LL_NEWLINE
 							<< "gPos : " << gPos << LL_ENDL;
+#endif

 	//-------------------------------------------------------------------------
 	// get the poleVector in world space
@ -194,6 +201,7 @@ void LLJointSolverRP3::solve()
 	//-------------------------------------------------------------------------
 	LLVector3 abacCompOrthoVec = abVec - acVec * ((abVec * acVec)/(acVec * acVec));

+#if DEBUG_JOINT_SOLVER
 	LL_DEBUGS("JointSolver") << "abVec : " << abVec << LL_NEWLINE
 		<< "bcVec : " << bcVec << LL_NEWLINE
 		<< "acVec : " << acVec << LL_NEWLINE
@ -202,6 +210,7 @@ void LLJointSolverRP3::solve()
 		<< "bcLen : " << bcLen << LL_NEWLINE
 		<< "agLen : " << agLen << LL_NEWLINE
 		<< "abacCompOrthoVec : " << abacCompOrthoVec << LL_ENDL;
+#endif

 	//-------------------------------------------------------------------------
 	// compute the normal of the original ABC plane (and store for later)
@ -269,6 +278,7 @@ void LLJointSolverRP3::solve()

 	LLQuaternion bRot(theta - abbcAng, abbcOrthoVec);

+#if DEBUG_JOINT_SOLVER
 	LL_DEBUGS("JointSolver") << "abbcAng      : " << abbcAng << LL_NEWLINE
 							<< "abbcOrthoVec : " << abbcOrthoVec << LL_NEWLINE
 							<< "agLenSq      : " << agLenSq << LL_NEWLINE
@ -280,6 +290,7 @@ void LLJointSolverRP3::solve()
 								<< abbcAng*180.0f/F_PI << " " 
 								<< (theta - abbcAng)*180.0f/F_PI 
 	<< LL_ENDL;
+#endif

 	//-------------------------------------------------------------------------
 	// compute rotation that rotates new A->C to A->G
@ -293,9 +304,11 @@ void LLJointSolverRP3::solve()
 	LLQuaternion cgRot;
 	cgRot.shortestArc( acVec, agVec );

+#if DEBUG_JOINT_SOLVER
 	LL_DEBUGS("JointSolver") << "bcVec : " << bcVec << LL_NEWLINE
 							<< "acVec : " << acVec << LL_NEWLINE
 							<< "cgRot : " << cgRot << LL_ENDL;
+#endif

 	// update A->B and B->C with rotation from C to G
 	abVec = abVec * cgRot;
@ -358,11 +371,13 @@ void LLJointSolverRP3::solve()
 	//-------------------------------------------------------------------------
 	LLQuaternion twistRot( mTwist, agVec );

+#if DEBUG_JOINT_SOLVER
 	LL_DEBUGS("JointSolver") << "abcNorm = " << abcNorm << LL_NEWLINE
 							<< "apgNorm = " << apgNorm << LL_NEWLINE
 							<< "pRot = " << pRot << LL_NEWLINE
 							<< "twist    : " << mTwist*180.0/F_PI << LL_NEWLINE
 							<< "twistRot : " << twistRot << LL_ENDL;
+#endif

 	//-------------------------------------------------------------------------
 	// compute rotation of A
--- a/indra/llcharacter/llkeyframefallmotion.cpp
+++ b/indra/llcharacter/llkeyframefallmotion.cpp
@ -121,6 +121,7 @@ BOOL LLKeyframeFallMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLKeyframeFallMotion::onUpdate(F32 activeTime, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	BOOL result = LLKeyframeMotion::onUpdate(activeTime, joint_mask);
 	F32  slerp_amt = clamp_rescale(activeTime / getDuration(), 0.5f, 0.75f, 0.f, 1.f);

--- a/indra/llcharacter/llkeyframemotion.cpp
+++ b/indra/llcharacter/llkeyframemotion.cpp
@ -677,6 +677,7 @@ BOOL LLKeyframeMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLKeyframeMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	// llassert(time >= 0.f);		// This will fire
 	time = llmax(0.f, time);

--- a/indra/llcharacter/llkeyframemotionparam.cpp
+++ b/indra/llcharacter/llkeyframemotionparam.cpp
@ -158,6 +158,7 @@ BOOL LLKeyframeMotionParam::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLKeyframeMotionParam::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	F32 weightFactor = 1.f / (F32)mParameterizedMotions.size();

 	// zero out all pose weights
--- a/indra/llcharacter/llkeyframestandmotion.h
+++ b/indra/llcharacter/llkeyframestandmotion.h
@ -37,9 +37,11 @@
 //-----------------------------------------------------------------------------
 // class LLKeyframeStandMotion
 //-----------------------------------------------------------------------------
+LL_ALIGN_PREFIX(16)
 class LLKeyframeStandMotion :
 	public LLKeyframeMotion
 {
+    LL_ALIGN_NEW
 public:
 	// Constructor
 	LLKeyframeStandMotion(const LLUUID &id);
@ -69,6 +71,18 @@ public:
 	//-------------------------------------------------------------------------
 	// Member Data
 	//-------------------------------------------------------------------------
+    LLJoint				mPelvisJoint;
+
+    LLJoint				mHipLeftJoint;
+    LLJoint				mKneeLeftJoint;
+    LLJoint				mAnkleLeftJoint;
+    LLJoint				mTargetLeft;
+
+    LLJoint				mHipRightJoint;
+    LLJoint				mKneeRightJoint;
+    LLJoint				mAnkleRightJoint;
+    LLJoint				mTargetRight;
+
 	LLCharacter	*mCharacter;

 	BOOL				mFlipFeet;
@ -83,18 +97,6 @@ public:
 	LLPointer<LLJointState>	mKneeRightState;
 	LLPointer<LLJointState>	mAnkleRightState;

-	LLJoint				mPelvisJoint;
-
-	LLJoint				mHipLeftJoint;
-	LLJoint				mKneeLeftJoint;
-	LLJoint				mAnkleLeftJoint;
-	LLJoint				mTargetLeft;
-
-	LLJoint				mHipRightJoint;
-	LLJoint				mKneeRightJoint;
-	LLJoint				mAnkleRightJoint;
-	LLJoint				mTargetRight;
-
 	LLJointSolverRP3	mIKLeft;
 	LLJointSolverRP3	mIKRight;

@ -110,7 +112,7 @@ public:
 	BOOL				mTrackAnkles;

 	S32					mFrameNum;
-};
+} LL_ALIGN_POSTFIX(16);

 #endif // LL_LLKEYFRAMESTANDMOTION_H

--- a/indra/llcharacter/llkeyframewalkmotion.cpp
+++ b/indra/llcharacter/llkeyframewalkmotion.cpp
@ -105,6 +105,7 @@ void LLKeyframeWalkMotion::onDeactivate()
 //-----------------------------------------------------------------------------
 BOOL LLKeyframeWalkMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	// compute time since last update
 	F32 deltaTime = time - mRealTimeLast;

@ -198,6 +199,7 @@ BOOL LLWalkAdjustMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLWalkAdjustMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	// delta_time is guaranteed to be non zero
 	F32 delta_time = llclamp(time - mLastTime, TIME_EPSILON, MAX_TIME_DELTA);
 	mLastTime = time;
@ -373,6 +375,7 @@ BOOL LLFlyAdjustMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLFlyAdjustMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	LLVector3 ang_vel = mCharacter->getCharacterAngularVelocity() * mCharacter->getTimeDilation();
 	F32 speed = mCharacter->getCharacterVelocity().magVec();

--- a/indra/llcharacter/llmotioncontroller.cpp
+++ b/indra/llcharacter/llmotioncontroller.cpp
@ -503,6 +503,7 @@ void LLMotionController::resetJointSignatures()
 //-----------------------------------------------------------------------------
 void LLMotionController::updateIdleMotion(LLMotion* motionp)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	if (motionp->isStopped() && mAnimTime > motionp->getStopTime() + motionp->getEaseOutDuration())
 	{
 		deactivateMotionInstance(motionp);
@ -541,6 +542,7 @@ void LLMotionController::updateIdleMotion(LLMotion* motionp)
 //-----------------------------------------------------------------------------
 void LLMotionController::updateIdleActiveMotions()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	for (motion_list_t::iterator iter = mActiveMotions.begin();
 		 iter != mActiveMotions.end(); )
 	{
@ -553,10 +555,9 @@ void LLMotionController::updateIdleActiveMotions()
 //-----------------------------------------------------------------------------
 // updateMotionsByType()
 //-----------------------------------------------------------------------------
-static LLTrace::BlockTimerStatHandle FTM_MOTION_ON_UPDATE("Motion onUpdate");
-
 void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_type)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	BOOL update_result = TRUE;
 	U8 last_joint_signature[LL_CHARACTER_MAX_ANIMATED_JOINTS];

@ -712,7 +713,6 @@ void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_ty

 			// perform motion update
 			{
-				LL_RECORD_BLOCK_TIME(FTM_MOTION_ON_UPDATE);
 				update_result = motionp->onUpdate(mAnimTime - motionp->mActivationTimestamp, last_joint_signature);
 			}
 		}
@ -768,6 +768,7 @@ void LLMotionController::updateMotionsByType(LLMotion::LLMotionBlendType anim_ty
 //-----------------------------------------------------------------------------
 void LLMotionController::updateLoadingMotions()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	// query pending motions for completion
 	for (motion_set_t::iterator iter = mLoadingMotions.begin();
 		 iter != mLoadingMotions.end(); )
@ -815,6 +816,7 @@ void LLMotionController::updateLoadingMotions()
 //-----------------------------------------------------------------------------
 void LLMotionController::updateMotions(bool force_update)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
    // SL-763: "Distant animated objects run at super fast speed"
    // The use_quantum optimization or possibly the associated code in setTimeStamp()
    // does not work as implemented.
@ -907,6 +909,7 @@ void LLMotionController::updateMotions(bool force_update)
 //-----------------------------------------------------------------------------
 void LLMotionController::updateMotionsMinimal()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	// Always update mPrevTimerElapsed
 	mPrevTimerElapsed = mTimer.getElapsedTimeF32();

@ -924,6 +927,7 @@ void LLMotionController::updateMotionsMinimal()
 //-----------------------------------------------------------------------------
 BOOL LLMotionController::activateMotionInstance(LLMotion *motion, F32 time)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
 	// It's not clear why the getWeight() line seems to be crashing this, but
 	// hopefully this fixes it.
 	if (motion == NULL || motion->getPose() == NULL)
--- a/indra/llcharacter/llpose.h
+++ b/indra/llcharacter/llpose.h
@ -80,8 +80,10 @@ public:

 const S32 JSB_NUM_JOINT_STATES = 6;

+LL_ALIGN_PREFIX(16)
 class LLJointStateBlender
 {
+    LL_ALIGN_NEW
 protected:
 	LLPointer<LLJointState>	mJointStates[JSB_NUM_JOINT_STATES];
 	S32				mPriorities[JSB_NUM_JOINT_STATES];
@ -96,8 +98,8 @@ public:
 	void resetCachedJoint();

 public:
-	LLJoint mJointCache;
-};
+	LL_ALIGN_16(LLJoint mJointCache);
+} LL_ALIGN_POSTFIX(16);

 class LLMotion;

--- a/indra/llcharacter/lltargetingmotion.cpp
+++ b/indra/llcharacter/lltargetingmotion.cpp
@ -103,6 +103,7 @@ BOOL LLTargetingMotion::onActivate()
 //-----------------------------------------------------------------------------
 BOOL LLTargetingMotion::onUpdate(F32 time, U8* joint_mask)
 {
+    LL_PROFILE_ZONE_SCOPED;
 	F32 slerp_amt = LLSmoothInterpolation::getInterpolant(TORSO_TARGET_HALF_LIFE);

 	LLVector3 target;
--- a/indra/llcommon/CMakeLists.txt
+++ b/indra/llcommon/CMakeLists.txt
@ -12,6 +12,7 @@ include(JsonCpp)
 include(Copy3rdPartyLibs)
 include(ZLIB)
 include(URIPARSER)
+include(Tracy)

 include_directories(
    ${EXPAT_INCLUDE_DIRS}
@ -19,6 +20,7 @@ include_directories(
    ${JSONCPP_INCLUDE_DIR}
    ${ZLIB_INCLUDE_DIRS}
    ${URIPARSER_INCLUDE_DIRS}
+    ${TRACY_INCLUDE_DIR}
    )

 # add_executable(lltreeiterators lltreeiterators.cpp)
@ -117,14 +119,16 @@ set(llcommon_SOURCE_FILES
    lluriparser.cpp
    lluuid.cpp
    llworkerthread.cpp
-    timing.cpp
    u64.cpp
+    threadpool.cpp
+    workqueue.cpp
    StackWalker.cpp
    )
    
 set(llcommon_HEADER_FILES
    CMakeLists.txt

+    chrono.h
    ctype_workaround.h
    fix_macros.h
    indra_constants.h
@ -197,6 +201,8 @@ set(llcommon_HEADER_FILES
    llmortician.h
    llnametable.h
    llpointer.h
+    llprofiler.h
+    llprofilercategories.h
    llpounceable.h
    llpredicate.h
    llpreprocessor.h
@ -251,8 +257,12 @@ set(llcommon_HEADER_FILES
    lockstatic.h
    stdtypes.h
    stringize.h
+    threadpool.h
+    threadsafeschedule.h
    timer.h
+    tuple.h
    u64.h
+    workqueue.h
    StackWalker.h
    )

@ -299,6 +309,7 @@ target_link_libraries(
    ${BOOST_SYSTEM_LIBRARY}
    ${GOOGLE_PERFTOOLS_LIBRARIES}
    ${URIPARSER_LIBRARIES}
+    ${TRACY_LIBRARY}
    )

 if (DARWIN)
@ -355,6 +366,9 @@ if (LL_TESTS)
  LL_ADD_INTEGRATION_TEST(lluri "" "${test_libs}")
  LL_ADD_INTEGRATION_TEST(llunits "" "${test_libs}")
  LL_ADD_INTEGRATION_TEST(stringize "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(threadsafeschedule "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(tuple "" "${test_libs}")
+  LL_ADD_INTEGRATION_TEST(workqueue "" "${test_libs}")

 ## llexception_test.cpp isn't a regression test, and doesn't need to be run
 ## every build. It's to help a developer make implementation choices about
--- a/indra/llcommon/chrono.h
+++ b/indra/llcommon/chrono.h
@ -0,0 +1,65 @@
+/**
+ * @file   chrono.h
+ * @author Nat Goodspeed
+ * @date   2021-10-05
+ * @brief  supplement <chrono> with utility functions
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_CHRONO_H)
+#define LL_CHRONO_H
+
+#include <chrono>
+#include <type_traits>              // std::enable_if
+
+namespace LL
+{
+
+// time_point_cast() is derived from https://stackoverflow.com/a/35293183
+// without the iteration: we think errors in the ~1 microsecond range are
+// probably acceptable.
+
+// This variant is for the optimal case when the source and dest use the same
+// clock: that case is handled by std::chrono.
+template <typename DestTimePoint, typename SrcTimePoint,
+          typename std::enable_if<std::is_same<typename DestTimePoint::clock,
+                                               typename SrcTimePoint::clock>::value,
+                                  bool>::type = true>
+DestTimePoint time_point_cast(const SrcTimePoint& time)
+{
+    return std::chrono::time_point_cast<typename DestTimePoint::duration>(time);
+}
+
+// This variant is for when the source and dest use different clocks -- see
+// the linked StackOverflow answer, also Howard Hinnant's, for more context.
+template <typename DestTimePoint, typename SrcTimePoint,
+          typename std::enable_if<! std::is_same<typename DestTimePoint::clock,
+                                                 typename SrcTimePoint::clock>::value,
+                                  bool>::type = true>
+DestTimePoint time_point_cast(const SrcTimePoint& time)
+{
+    // The basic idea is that we must adjust the passed time_point by the
+    // difference between the clocks' epochs. But since time_point doesn't
+    // expose its epoch, we fall back on what each of them thinks is now().
+    // However, since we necessarily make sequential calls to those now()
+    // functions, the answers differ not only by the cycles spent executing
+    // those calls, but by potential OS interruptions between them. Try to
+    // reduce that error by capturing the source clock time both before and
+    // after the dest clock, and splitting the difference. Of course an
+    // interruption between two of these now() calls without a comparable
+    // interruption between the other two will skew the result, but better is
+    // more expensive.
+    const auto src_before = typename SrcTimePoint::clock::now();
+    const auto dest_now   = typename DestTimePoint::clock::now();
+    const auto src_after  = typename SrcTimePoint::clock::now();
+    const auto src_diff   = src_after - src_before;
+    const auto src_now    = src_before + src_diff / 2;
+    return dest_now + (time - src_now);
+}
+
+} // namespace LL
+
+#endif /* ! defined(LL_CHRONO_H) */
--- a/indra/llcommon/linden_common.h
+++ b/indra/llcommon/linden_common.h
@ -27,6 +27,14 @@
 #ifndef LL_LINDEN_COMMON_H
 #define LL_LINDEN_COMMON_H

+#include "llprofiler.h"
+#if TRACY_ENABLE && !defined(LL_PROFILER_ENABLE_TRACY_OPENGL)  // hooks for memory profiling
+void *tracy_aligned_malloc(size_t size, size_t alignment);
+void  tracy_aligned_free(void *memblock);
+#define _aligned_malloc(X, Y) tracy_aligned_malloc((X), (Y))
+#define _aligned_free(X)      tracy_aligned_free((X))
+#endif
+
 // *NOTE:  Please keep includes here to a minimum!
 //
 // Files included here are included in every library .cpp file and
--- a/indra/llcommon/llcommon.cpp
+++ b/indra/llcommon/llcommon.cpp
@ -33,6 +33,66 @@
 #include "lltracethreadrecorder.h"
 #include "llcleanup.h"

+thread_local bool gProfilerEnabled = false;
+
+#if (TRACY_ENABLE)
+// Override new/delete for tracy memory profiling
+void *operator new(size_t size)
+{
+    void* ptr;
+    if (gProfilerEnabled)
+    {
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+        ptr = (malloc)(size);
+    }
+    else
+    {
+        ptr = (malloc)(size);
+    }
+    if (!ptr)
+    {
+        throw std::bad_alloc();
+    }
+    TracyAlloc(ptr, size);
+    return ptr;
+}
+
+void operator delete(void *ptr) noexcept
+{
+    TracyFree(ptr);
+    if (gProfilerEnabled)
+    {
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+        (free)(ptr);
+    }
+    else
+    {
+        (free)(ptr);
+    }
+}
+
+// C-style malloc/free can't be so easily overridden, so we define tracy versions and use
+// a pre-processor #define in linden_common.h to redirect to them. The parens around the native
+// functions below prevents recursive substitution by the preprocessor.
+//
+// Unaligned mallocs are rare in LL code but hooking them causes problems in 3p lib code (looking at
+// you, Havok), so we'll only capture the aligned version.
+
+void *tracy_aligned_malloc(size_t size, size_t alignment)
+{
+    auto ptr = ll_aligned_malloc_fallback(size, alignment);
+    if (ptr) TracyAlloc(ptr, size);
+    return ptr;
+}
+
+void tracy_aligned_free(void *memblock)
+{
+    TracyFree(memblock);
+    ll_aligned_free_fallback(memblock);
+}
+
+#endif
+
 //static
 BOOL LLCommon::sAprInitialized = FALSE;

--- a/indra/llcommon/llcond.h
+++ b/indra/llcommon/llcond.h
@ -53,6 +53,8 @@ private:
    LLCoros::Mutex mMutex;
    // Use LLCoros::ConditionVariable for the same reason.
    LLCoros::ConditionVariable mCond;
+    using LockType = LLCoros::LockType;
+    using cv_status = LLCoros::cv_status;

 public:
    /// LLCond can be explicitly initialized with a specific value for mData if
@ -65,10 +67,29 @@ public:
    LLCond(const LLCond&) = delete;
    LLCond& operator=(const LLCond&) = delete;

-    /// get() returns a const reference to the stored DATA. The only way to
-    /// get a non-const reference -- to modify the stored DATA -- is via
-    /// update_one() or update_all().
-    const value_type& get() const { return mData; }
+    /**
+     * get() returns the stored DATA by value -- so to use get(), DATA must
+     * be copyable. The only way to get a non-const reference -- to modify
+     * the stored DATA -- is via update_one() or update_all().
+     */
+    value_type get()
+    {
+        LockType lk(mMutex);
+        return mData;
+    }
+
+    /**
+     * get(functor) returns whatever the functor returns. It allows us to peek
+     * at the stored DATA without copying the whole thing. The functor must
+     * accept a const reference to DATA. If you want to modify DATA, call
+     * update_one() or update_all() instead.
+     */
+    template <typename FUNC>
+    auto get(FUNC&& func)
+    {
+        LockType lk(mMutex);
+        return std::forward<FUNC>(func)(const_data());
+    }

    /**
     * Pass update_one() an invocable accepting non-const (DATA&). The
@ -80,11 +101,11 @@ public:
     * update_one() when DATA is a struct or class.
     */
    template <typename MODIFY>
-    void update_one(MODIFY modify)
+    void update_one(MODIFY&& modify)
    {
        { // scope of lock can/should end before notify_one()
-            LLCoros::LockType lk(mMutex);
-            modify(mData);
+            LockType lk(mMutex);
+            std::forward<MODIFY>(modify)(mData);
        }
        mCond.notify_one();
    }
@ -99,11 +120,11 @@ public:
     * update_all() when DATA is a struct or class.
     */
    template <typename MODIFY>
-    void update_all(MODIFY modify)
+    void update_all(MODIFY&& modify)
    {
        { // scope of lock can/should end before notify_all()
-            LLCoros::LockType lk(mMutex);
-            modify(mData);
+            LockType lk(mMutex);
+            std::forward<MODIFY>(modify)(mData);
        }
        mCond.notify_all();
    }
@ -116,9 +137,9 @@ public:
     * wait() on the condition_variable.
     */
    template <typename Pred>
-    void wait(Pred pred)
+    void wait(Pred&& pred)
    {
-        LLCoros::LockType lk(mMutex);
+        LockType lk(mMutex);
        // We must iterate explicitly since the predicate accepted by
        // condition_variable::wait() requires a different signature:
        // condition_variable::wait() calls its predicate with no arguments.
@ -127,7 +148,7 @@ public:
        // But what if they instead pass a predicate accepting non-const
        // (DATA&)? Such a predicate could modify mData, which would be Bad.
        // Forbid that.
-        while (! pred(const_cast<const value_type&>(mData)))
+        while (! std::forward<Pred>(pred)(const_data()))
        {
            mCond.wait(lk);
        }
@ -144,7 +165,7 @@ public:
     * returning true.
     */
    template <typename Rep, typename Period, typename Pred>
-    bool wait_for(const std::chrono::duration<Rep, Period>& timeout_duration, Pred pred)
+    bool wait_for(const std::chrono::duration<Rep, Period>& timeout_duration, Pred&& pred)
    {
        // Instead of replicating wait_until() logic, convert duration to
        // time_point and just call wait_until().
@ -153,7 +174,8 @@ public:
        // wrong! We'd keep pushing the timeout time farther and farther into
        // the future. This way, we establish a definite timeout time and
        // stick to it.
-        return wait_until(std::chrono::steady_clock::now() + timeout_duration, pred);
+        return wait_until(std::chrono::steady_clock::now() + timeout_duration,
+                          std::forward<Pred>(pred));
    }

    /**
@ -163,9 +185,9 @@ public:
     * generic wait_for() method.
     */
    template <typename Pred>
-    bool wait_for(F32Milliseconds timeout_duration, Pred pred)
+    bool wait_for(F32Milliseconds timeout_duration, Pred&& pred)
    {
-        return wait_for(convert(timeout_duration), pred);
+        return wait_for(convert(timeout_duration), std::forward<Pred>(pred));
    }

 protected:
@ -183,6 +205,10 @@ protected:
    }

 private:
+    // It's important to pass a const ref to certain user-specified functors
+    // that aren't supposed to be able to modify mData.
+    const value_type& const_data() const { return mData; }
+
    /**
     * Pass wait_until() a chrono::time_point, indicating the time at which we
     * should stop waiting, and a predicate accepting (const DATA&), returning
@ -203,21 +229,21 @@ private:
     * honoring a fixed timeout.
     */
    template <typename Clock, typename Duration, typename Pred>
-    bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred pred)
+    bool wait_until(const std::chrono::time_point<Clock, Duration>& timeout_time, Pred&& pred)
    {
-        LLCoros::LockType lk(mMutex);
+        LockType lk(mMutex);
        // We advise the caller to pass a predicate accepting (const DATA&).
        // But what if they instead pass a predicate accepting non-const
        // (DATA&)? Such a predicate could modify mData, which would be Bad.
        // Forbid that.
-        while (! pred(const_cast<const value_type&>(mData)))
+        while (! std::forward<Pred>(pred)(const_data()))
        {
-            if (LLCoros::cv_status::timeout == mCond.wait_until(lk, timeout_time))
+            if (cv_status::timeout == mCond.wait_until(lk, timeout_time))
            {
                // It's possible that wait_until() timed out AND the predicate
                // became true more or less simultaneously. Even though
                // wait_until() timed out, check the predicate one more time.
-                return pred(const_cast<const value_type&>(mData));
+                return std::forward<Pred>(pred)(const_data());
            }
        }
        return true;
--- a/indra/llcommon/lldate.cpp
+++ b/indra/llcommon/lldate.cpp
@ -86,11 +86,9 @@ std::string LLDate::asRFC1123() const
 	return toHTTPDateString (std::string ("%A, %d %b %Y %H:%M:%S GMT"));
 }

-LLTrace::BlockTimerStatHandle FT_DATE_FORMAT("Date Format");
-
 std::string LLDate::toHTTPDateString (std::string fmt) const
 {
-	LL_RECORD_BLOCK_TIME(FT_DATE_FORMAT);
+    LL_PROFILE_ZONE_SCOPED;
 	
 	time_t locSeconds = (time_t) mSecondsSinceEpoch;
 	struct tm * gmt = gmtime (&locSeconds);
@ -99,7 +97,7 @@ std::string LLDate::toHTTPDateString (std::string fmt) const

 std::string LLDate::toHTTPDateString (tm * gmt, std::string fmt)
 {
-	LL_RECORD_BLOCK_TIME(FT_DATE_FORMAT);
+    LL_PROFILE_ZONE_SCOPED;

 	// avoid calling setlocale() unnecessarily - it's expensive.
 	static std::string prev_locale = "";
--- a/indra/llcommon/llerror.cpp
+++ b/indra/llcommon/llerror.cpp
@ -109,6 +109,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 									const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
 			int syslogPriority = LOG_CRIT;
 			switch (level) {
 				case LLError::LEVEL_DEBUG:	syslogPriority = LOG_DEBUG;	break;
@ -166,6 +167,7 @@ namespace {
        virtual void recordMessage(LLError::ELevel level,
                                    const std::string& message) override
        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
            if (LLError::getAlwaysFlush())
            {
                mFile << message << std::endl;
@ -194,7 +196,7 @@ namespace {
        {
            return LLError::getEnabledLogTypesMask() & 0x04;
        }
-
+        
        LL_FORCE_INLINE std::string createBoldANSI()
        {
            std::string ansi_code;
@ -220,10 +222,10 @@ namespace {
        LL_FORCE_INLINE std::string createANSI(const std::string& color)
        {
            std::string ansi_code;
-            ansi_code += '\033';
-            ansi_code += "[";
+            ansi_code  += '\033';
+            ansi_code  += "[";
            ansi_code += "38;5;";
-            ansi_code += color;
+            ansi_code  += color;
            ansi_code += "m";

            return ansi_code;
@ -232,6 +234,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 					   const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
            // The default colors for error, warn and debug are now a bit more pastel
            // and easier to read on the default (black) terminal background but you 
            // now have the option to set the color of each via an environment variables:
@ -261,6 +264,7 @@ namespace {
 			}
            else
            {
+                LL_PROFILE_ZONE_NAMED("fprintf");
                 fprintf(stderr, "%s\n", message.c_str());
            }
 		}
@ -270,6 +274,7 @@ namespace {

        LL_FORCE_INLINE void writeANSI(const std::string& ansi_code, const std::string& message)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
            static std::string s_ansi_bold = createBoldANSI();  // bold text
            static std::string s_ansi_reset = createResetANSI();  // reset
 			// ANSI color code escape sequence, message, and reset in one fprintf call
@ -306,6 +311,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 								   const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
 			mBuffer->addLine(message);
 		}
 	
@ -332,6 +338,7 @@ namespace {
 		virtual void recordMessage(LLError::ELevel level,
 								   const std::string& message) override
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
 			debugger_print(message);
 		}
 	};
@ -1210,6 +1217,7 @@ namespace

 	void writeToRecorders(const LLError::CallSite& site, const std::string& message)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
 		LLError::ELevel level = site.mLevel;
 		SettingsConfigPtr s = Globals::getInstance()->getSettingsConfig();

@ -1344,6 +1352,7 @@ namespace LLError

 	bool Log::shouldLog(CallSite& site)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
 		LLMutexTrylock lock(getMutex<LOG_MUTEX>(), 5);
 		if (!lock.isLocked())
 		{
@ -1388,6 +1397,7 @@ namespace LLError

 	void Log::flush(const std::ostringstream& out, const CallSite& site)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
 		LLMutexTrylock lock(getMutex<LOG_MUTEX>(),5);
 		if (!lock.isLocked())
 		{
--- a/indra/llcommon/llerror.h
+++ b/indra/llcommon/llerror.h
@ -35,7 +35,9 @@

 #include "stdtypes.h"

+#include "llprofiler.h"
 #include "llpreprocessor.h"
+
 #include <boost/static_assert.hpp>

 const int LL_ERR_NOERR = 0;
@ -348,7 +350,8 @@ typedef LLError::NoClassInfo _LL_CLASS_TO_LOG;
 // if (condition) LL_INFOS() << "True" << LL_ENDL; else LL_INFOS()() << "False" << LL_ENDL;

 #define lllog(level, once, ...)                                         \
-	do {                                                                \
+    do {                                                                \
+        LL_PROFILE_ZONE_NAMED("lllog");                                 \
 		const char* tags[] = {"", ##__VA_ARGS__};                       \
 		static LLError::CallSite _site(lllog_site_args_(level, once, tags)); \
 		lllog_test_()
--- a/indra/llcommon/llerrorcontrol.h
+++ b/indra/llcommon/llerrorcontrol.h
@ -190,6 +190,7 @@ namespace LLError
        {}
        void recordMessage(LLError::ELevel level, const std::string& message) override
        {
+            LL_PROFILE_ZONE_SCOPED
            mCallable(level, message);
        }
    private:
--- a/indra/llcommon/lleventfilter.h
+++ b/indra/llcommon/lleventfilter.h
@ -429,6 +429,8 @@ public:
    // path, then stores it to mTarget.
    virtual bool post(const LLSD& event)
    {
+        LL_PROFILE_ZONE_SCOPED
+
        // Extract the element specified by 'mPath' from 'event'. To perform a
        // generic type-appropriate store through mTarget, construct an
        // LLSDParam<T> and store that, thus engaging LLSDParam's custom
--- a/indra/llcommon/llexception.cpp
+++ b/indra/llcommon/llexception.cpp
@ -97,6 +97,11 @@ static const U32 STATUS_MSC_EXCEPTION = 0xE06D7363; // compiler specific

 U32 msc_exception_filter(U32 code, struct _EXCEPTION_POINTERS *exception_infop)
 {
+    const auto stack = to_string(boost::stacktrace::stacktrace());
+    LL_WARNS() << "SEH Exception handled (that probably shouldn't be): Code " << code 
+        << "\n Stack trace: \n" 
+        << stack << LL_ENDL;
+
    if (code == STATUS_MSC_EXCEPTION)
    {
        // C++ exception, go on
--- a/indra/llcommon/llfasttimer.cpp
+++ b/indra/llcommon/llfasttimer.cpp
@ -191,29 +191,30 @@ TimeBlockTreeNode& BlockTimerStatHandle::getTreeNode() const
 }


+
 void BlockTimer::bootstrapTimerTree()
 {
-	for (auto& base : BlockTimerStatHandle::instance_snapshot())
-	{
-		// because of indirect derivation from LLInstanceTracker, have to downcast
-		BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(base);
-		if (&timer == &BlockTimer::getRootTimeBlock()) continue;
+    for (auto& base : BlockTimerStatHandle::instance_snapshot())
+    {
+        // because of indirect derivation from LLInstanceTracker, have to downcast
+        BlockTimerStatHandle& timer = static_cast<BlockTimerStatHandle&>(base);
+        if (&timer == &BlockTimer::getRootTimeBlock()) continue;

-		// bootstrap tree construction by attaching to last timer to be on stack
-		// when this timer was called
-		if (timer.getParent() == &BlockTimer::getRootTimeBlock())
-		{
-			TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();
+        // bootstrap tree construction by attaching to last timer to be on stack
+        // when this timer was called
+        if (timer.getParent() == &BlockTimer::getRootTimeBlock())
+        {
+            TimeBlockAccumulator& accumulator = timer.getCurrentAccumulator();

-			if (accumulator.mLastCaller)
-			{
-				timer.setParent(accumulator.mLastCaller);
-				accumulator.mParent = accumulator.mLastCaller;
-			}
-			// no need to push up tree on first use, flag can be set spuriously
-			accumulator.mMoveUpTree = false;
-		}
-	}
+            if (accumulator.mLastCaller)
+            {
+                timer.setParent(accumulator.mLastCaller);
+                accumulator.mParent = accumulator.mLastCaller;
+            }
+            // no need to push up tree on first use, flag can be set spuriously
+            accumulator.mMoveUpTree = false;
+        }
+    }
 }

 // bump timers up tree if they have been flagged as being in the wrong place
@ -221,6 +222,7 @@ void BlockTimer::bootstrapTimerTree()
 // this preserves partial order derived from current frame's observations
 void BlockTimer::incrementalUpdateTimerTree()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	for(block_timer_tree_df_post_iterator_t it = begin_block_timer_tree_df_post(BlockTimer::getRootTimeBlock());
 		it != end_block_timer_tree_df_post();
 		++it)
@ -260,7 +262,8 @@ void BlockTimer::incrementalUpdateTimerTree()


 void BlockTimer::updateTimes()
-	{
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	// walk up stack of active timers and accumulate current time while leaving timing structures active
 	BlockTimerStackRecord* stack_record	= LLThreadLocalSingletonPointer<BlockTimerStackRecord>::getInstance();
 	if (!stack_record) return;
@ -271,7 +274,7 @@ void BlockTimer::updateTimes()

 	while(cur_timer 
 		&& cur_timer->mParentTimerData.mActiveTimer != cur_timer) // root defined by parent pointing to self
-		{
+	{
 		U64 cumulative_time_delta = cur_time - cur_timer->mStartTime;
 		cur_timer->mStartTime = cur_time;

--- a/indra/llcommon/llfasttimer.h
+++ b/indra/llcommon/llfasttimer.h
@ -38,7 +38,10 @@
 #define LL_FAST_TIMER_ON 1
 #define LL_FASTTIMER_USE_RDTSC 1

+// NOTE: Also see llprofiler.h
+#if !defined(LL_PROFILER_CONFIGURATION)
 #define LL_RECORD_BLOCK_TIME(timer_stat) const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(timer_stat)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+#endif // LL_PROFILER_CONFIGURATION

 namespace LLTrace
 {
--- a/indra/llcommon/llframetimer.cpp
+++ b/indra/llcommon/llframetimer.cpp
@ -29,6 +29,11 @@

 #include "llframetimer.h"

+// We don't bother building a stand alone lib; we just need to include the one source file for Tracy support
+#if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
+	#include "TracyClient.cpp"
+#endif // LL_PROFILER_CONFIGURATION
+
 // Static members
 //LLTimer	LLFrameTimer::sInternalTimer;
 U64 LLFrameTimer::sStartTotalTime = totalTime();
--- a/indra/llcommon/llinstancetracker.h
+++ b/indra/llcommon/llinstancetracker.h
@ -83,13 +83,34 @@ class LLInstanceTracker
    typedef llthread::LockStatic<StaticData> LockStatic;

 public:
+    using ptr_t  = std::shared_ptr<T>;
+    using weak_t = std::weak_ptr<T>;
+
+    /**
+     * Storing a dumb T* somewhere external is a bad idea, since
+     * LLInstanceTracker subclasses are explicitly destroyed rather than
+     * managed by smart pointers. It's legal to declare stack instances of an
+     * LLInstanceTracker subclass. But it's reasonable to store a
+     * std::weak_ptr<T>, which will become invalid when the T instance is
+     * destroyed.
+     */
+    weak_t getWeak()
+    {
+        return mSelf;
+    }
+
+    static S32 instanceCount() 
+    { 
+        return LockStatic()->mMap.size(); 
+    }
+    
    // snapshot of std::pair<const KEY, std::shared_ptr<T>> pairs
    class snapshot
    {
        // It's very important that what we store in this snapshot are
        // weak_ptrs, NOT shared_ptrs. That's how we discover whether any
        // instance has been deleted during the lifespan of a snapshot.
-        typedef std::vector<std::pair<const KEY, std::weak_ptr<T>>> VectorType;
+        typedef std::vector<std::pair<const KEY, weak_t>> VectorType;
        // Dereferencing our iterator produces a std::shared_ptr for each
        // instance that still exists. Since we store weak_ptrs, that involves
        // two chained transformations:
@ -98,7 +119,7 @@ public:
        // It is very important that we filter lazily, that is, during
        // traversal. Any one of our stored weak_ptrs might expire during
        // traversal.
-        typedef std::pair<const KEY, std::shared_ptr<T>> strong_pair;
+        typedef std::pair<const KEY, ptr_t> strong_pair;
        // Note for future reference: nat has not yet had any luck (up to
        // Boost 1.67) trying to use boost::transform_iterator with a hand-
        // coded functor, only with actual functions. In my experience, an
@ -202,17 +223,12 @@ public:
        iterator end()   { return iterator(snapshot::end(),   key_getter); }
    };

-    static T* getInstance(const KEY& k)
+    static ptr_t getInstance(const KEY& k)
    {
        LockStatic lock;
        const InstanceMap& map(lock->mMap);
        typename InstanceMap::const_iterator found = map.find(k);
-        return (found == map.end()) ? NULL : found->second.get();
-    }
-
-    static S32 instanceCount() 
-    { 
-        return LockStatic()->mMap.size(); 
+        return (found == map.end()) ? NULL : found->second;
    }

 protected:
@ -222,7 +238,9 @@ protected:
        // shared_ptr, so give it a no-op deleter. We store shared_ptrs in our
        // InstanceMap specifically so snapshot can store weak_ptrs so we can
        // detect deletions during traversals.
-        std::shared_ptr<T> ptr(static_cast<T*>(this), [](T*){});
+        ptr_t ptr(static_cast<T*>(this), [](T*){});
+        // save corresponding weak_ptr for future reference
+        mSelf = ptr;
        LockStatic lock;
        add_(lock, key, ptr);
    }
@ -257,7 +275,7 @@ private:
    static std::string report(const char* key) { return report(std::string(key)); }

    // caller must instantiate LockStatic
-    void add_(LockStatic& lock, const KEY& key, const std::shared_ptr<T>& ptr) 
+    void add_(LockStatic& lock, const KEY& key, const ptr_t& ptr) 
    { 
        mInstanceKey = key; 
        InstanceMap& map = lock->mMap;
@ -281,7 +299,7 @@ private:
            break;
        }
    }
-    std::shared_ptr<T> remove_(LockStatic& lock)
+    ptr_t remove_(LockStatic& lock)
    {
        InstanceMap& map = lock->mMap;
        typename InstanceMap::iterator iter = map.find(mInstanceKey);
@ -295,6 +313,9 @@ private:
    }

 private:
+    // Storing a weak_ptr to self is a bit like deriving from
+    // std::enable_shared_from_this(), except more explicit.
+    weak_t mSelf;
    KEY mInstanceKey;
 };

@ -326,6 +347,9 @@ class LLInstanceTracker<T, void, KEY_COLLISION_BEHAVIOR>
    typedef llthread::LockStatic<StaticData> LockStatic;

 public:
+    using ptr_t  = std::shared_ptr<T>;
+    using weak_t = std::weak_ptr<T>;
+
    /**
     * Storing a dumb T* somewhere external is a bad idea, since
     * LLInstanceTracker subclasses are explicitly destroyed rather than
@ -334,12 +358,15 @@ public:
     * std::weak_ptr<T>, which will become invalid when the T instance is
     * destroyed.
     */
-    std::weak_ptr<T> getWeak()
+    weak_t getWeak()
    {
        return mSelf;
    }
    
-    static S32 instanceCount() { return LockStatic()->mSet.size(); }
+    static S32 instanceCount()
+    {
+        return LockStatic()->mSet.size();
+    }

    // snapshot of std::shared_ptr<T> pointers
    class snapshot
@ -347,7 +374,7 @@ public:
        // It's very important that what we store in this snapshot are
        // weak_ptrs, NOT shared_ptrs. That's how we discover whether any
        // instance has been deleted during the lifespan of a snapshot.
-        typedef std::vector<std::weak_ptr<T>> VectorType;
+        typedef std::vector<weak_t> VectorType;
        // Dereferencing our iterator produces a std::shared_ptr for each
        // instance that still exists. Since we store weak_ptrs, that involves
        // two chained transformations:
@ -453,7 +480,7 @@ protected:
 private:
    // Storing a weak_ptr to self is a bit like deriving from
    // std::enable_shared_from_this(), except more explicit.
-    std::weak_ptr<T> mSelf;
+    weak_t mSelf;
 };

 #endif
--- a/indra/llcommon/llleaplistener.cpp
+++ b/indra/llcommon/llleaplistener.cpp
@ -220,7 +220,7 @@ void LLLeapListener::getAPI(const LLSD& request) const
 {
    Response reply(LLSD(), request);

-    LLEventAPI* found = LLEventAPI::getInstance(request["api"]);
+    auto found = LLEventAPI::getInstance(request["api"]);
    if (found)
    {
        reply["name"] = found->getName();
--- a/indra/llcommon/llmemory.cpp
+++ b/indra/llcommon/llmemory.cpp
@ -82,6 +82,7 @@ void LLMemory::initMaxHeapSizeGB(F32Gigabytes max_heap_size)
 //static 
 void LLMemory::updateMemoryInfo() 
 {
+	LL_PROFILE_ZONE_SCOPED
 #if LL_WINDOWS
 	PROCESS_MEMORY_COUNTERS counters;

@ -145,6 +146,7 @@ void* LLMemory::tryToAlloc(void* address, U32 size)
 //static 
 void LLMemory::logMemoryInfo(BOOL update)
 {
+	LL_PROFILE_ZONE_SCOPED
 	if(update)
 	{
 		updateMemoryInfo() ;
--- a/indra/llcommon/llmemory.h
+++ b/indra/llcommon/llmemory.h
@ -101,6 +101,29 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)

 #define LL_ALIGN_16(var) LL_ALIGN_PREFIX(16) var LL_ALIGN_POSTFIX(16)

+#define LL_ALIGN_NEW                        \
+public:                                     \
+    void* operator new(size_t size)         \
+    {                                       \
+        return ll_aligned_malloc_16(size);  \
+    }                                       \
+                                            \
+    void operator delete(void* ptr)         \
+    {                                       \
+        ll_aligned_free_16(ptr);            \
+    }                                       \
+                                            \
+    void* operator new[](size_t size)       \
+    {                                       \
+        return ll_aligned_malloc_16(size);  \
+    }                                       \
+                                            \
+    void operator delete[](void* ptr)       \
+    {                                       \
+        ll_aligned_free_16(ptr);            \
+    }
+
+
 //------------------------------------------------------------------------------------------------
 //------------------------------------------------------------------------------------------------
 	// for enable buffer overrun detection predefine LL_DEBUG_BUFFER_OVERRUN in current library
@ -113,8 +136,9 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)
 #else
 	inline void* ll_aligned_malloc_fallback( size_t size, int align )
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
 	#if defined(LL_WINDOWS)
-		return _aligned_malloc(size, align);
+        void* ret = _aligned_malloc(size, align);
 	#else
        char* aligned = NULL;
 		void* mem = malloc( size + (align - 1) + sizeof(void*) );
@ -125,12 +149,16 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)

            ((void**)aligned)[-1] = mem;
        }
-		return aligned;
+		void* ret = aligned;
 	#endif
+        LL_PROFILE_ALLOC(ret, size);
+        return ret;
 	}

 	inline void ll_aligned_free_fallback( void* ptr )
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+        LL_PROFILE_FREE(ptr);
 	#if defined(LL_WINDOWS)
 		_aligned_free(ptr);
 	#else
@ -146,21 +174,24 @@ template <typename T> T* LL_NEXT_ALIGNED_ADDRESS_64(T* address)

 inline void* ll_aligned_malloc_16(size_t size) // returned hunk MUST be freed with ll_aligned_free_16().
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
 #if defined(LL_WINDOWS)
-	return _aligned_malloc(size, 16);
+	void* ret = _aligned_malloc(size, 16);
 #elif defined(LL_DARWIN)
-	return malloc(size); // default osx malloc is 16 byte aligned.
+	void* ret = malloc(size); // default osx malloc is 16 byte aligned.
 #else
-	void *rtn;
-	if (LL_LIKELY(0 == posix_memalign(&rtn, 16, size)))
-		return rtn;
-	else // bad alignment requested, or out of memory
-		return NULL;
+	void *ret;
+    if (0 != posix_memalign(&ret, 16, size))
+        return nullptr;
 #endif
+    LL_PROFILE_ALLOC(ret, size);
+    return ret;
 }

 inline void ll_aligned_free_16(void *p)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+    LL_PROFILE_FREE(p);
 #if defined(LL_WINDOWS)
 	_aligned_free(p);
 #elif defined(LL_DARWIN)
@ -172,10 +203,12 @@ inline void ll_aligned_free_16(void *p)

 inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // returned hunk MUST be freed with ll_aligned_free_16().
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+    LL_PROFILE_FREE(ptr);
 #if defined(LL_WINDOWS)
-	return _aligned_realloc(ptr, size, 16);
+	void* ret = _aligned_realloc(ptr, size, 16);
 #elif defined(LL_DARWIN)
-	return realloc(ptr,size); // default osx malloc is 16 byte aligned.
+	void* ret = realloc(ptr,size); // default osx malloc is 16 byte aligned.
 #else
 	//FIXME: memcpy is SLOW
 	void* ret = ll_aligned_malloc_16(size);
@ -188,27 +221,31 @@ inline void* ll_aligned_realloc_16(void* ptr, size_t size, size_t old_size) // r
 		}
 		ll_aligned_free_16(ptr);
 	}
-	return ret;
 #endif
+    LL_PROFILE_ALLOC(ptr, size);
+    return ret;
 }

 inline void* ll_aligned_malloc_32(size_t size) // returned hunk MUST be freed with ll_aligned_free_32().
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
 #if defined(LL_WINDOWS)
-	return _aligned_malloc(size, 32);
+	void* ret = _aligned_malloc(size, 32);
 #elif defined(LL_DARWIN)
-	return ll_aligned_malloc_fallback( size, 32 );
+	void* ret = ll_aligned_malloc_fallback( size, 32 );
 #else
-	void *rtn;
-	if (LL_LIKELY(0 == posix_memalign(&rtn, 32, size)))
-		return rtn;
-	else // bad alignment requested, or out of memory
-		return NULL;
+	void *ret;
+    if (0 != posix_memalign(&ret, 32, size))
+        return nullptr;
 #endif
+    LL_PROFILE_ALLOC(ret, size);
+    return ret;
 }

 inline void ll_aligned_free_32(void *p)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+    LL_PROFILE_FREE(p);
 #if defined(LL_WINDOWS)
 	_aligned_free(p);
 #elif defined(LL_DARWIN)
@ -222,29 +259,35 @@ inline void ll_aligned_free_32(void *p)
 template<size_t ALIGNMENT>
 LL_FORCE_INLINE void* ll_aligned_malloc(size_t size)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
+    void* ret;
 	if (LL_DEFAULT_HEAP_ALIGN % ALIGNMENT == 0)
 	{
-		return malloc(size);
+		ret = malloc(size);
+        LL_PROFILE_ALLOC(ret, size);
 	}
 	else if (ALIGNMENT == 16)
 	{
-		return ll_aligned_malloc_16(size);
+		ret = ll_aligned_malloc_16(size);
 	}
 	else if (ALIGNMENT == 32)
 	{
-		return ll_aligned_malloc_32(size);
+		ret = ll_aligned_malloc_32(size);
 	}
 	else
 	{
-		return ll_aligned_malloc_fallback(size, ALIGNMENT);
+		ret = ll_aligned_malloc_fallback(size, ALIGNMENT);
 	}
+    return ret;
 }

 template<size_t ALIGNMENT>
 LL_FORCE_INLINE void ll_aligned_free(void* ptr)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
 	if (ALIGNMENT == LL_DEFAULT_HEAP_ALIGN)
 	{
+        LL_PROFILE_FREE(ptr);
 		free(ptr);
 	}
 	else if (ALIGNMENT == 16)
@ -266,6 +309,7 @@ LL_FORCE_INLINE void ll_aligned_free(void* ptr)
 //
 inline void ll_memcpy_nonaliased_aligned_16(char* __restrict dst, const char* __restrict src, size_t bytes)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY;
 	assert(src != NULL);
 	assert(dst != NULL);
 	assert(bytes > 0);
--- a/indra/llcommon/llmutex.cpp
+++ b/indra/llcommon/llmutex.cpp
@ -44,6 +44,7 @@ LLMutex::~LLMutex()

 void LLMutex::lock()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	if(isSelfLocked())
 	{ //redundant lock
 		mCount++;
@ -65,6 +66,7 @@ void LLMutex::lock()

 void LLMutex::unlock()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	if (mCount > 0)
 	{ //not the root unlock
 		mCount--;
@ -85,6 +87,7 @@ void LLMutex::unlock()

 bool LLMutex::isLocked()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	if (!mMutex.try_lock())
 	{
 		return true;
@ -108,6 +111,7 @@ LLThread::id_t LLMutex::lockingThread() const

 bool LLMutex::trylock()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	if(isSelfLocked())
 	{ //redundant lock
 		mCount++;
@ -146,17 +150,20 @@ LLCondition::~LLCondition()

 void LLCondition::wait()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	std::unique_lock< std::mutex > lock(mMutex);
 	mCond.wait(lock);
 }

 void LLCondition::signal()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	mCond.notify_one();
 }

 void LLCondition::broadcast()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	mCond.notify_all();
 }

@ -166,6 +173,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex)
    : mMutex(mutex),
    mLocked(false)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    if (mMutex)
        mLocked = mMutex->trylock();
 }
@ -174,6 +182,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms)
    : mMutex(mutex),
    mLocked(false)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    if (!mMutex)
        return;

@ -188,6 +197,7 @@ LLMutexTrylock::LLMutexTrylock(LLMutex* mutex, U32 aTries, U32 delay_ms)

 LLMutexTrylock::~LLMutexTrylock()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    if (mMutex && mLocked)
        mMutex->unlock();
 }
@ -199,6 +209,7 @@ LLMutexTrylock::~LLMutexTrylock()
 //
 LLScopedLock::LLScopedLock(std::mutex* mutex) : mMutex(mutex)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	if(mutex)
 	{
 		mutex->lock();
@ -217,6 +228,7 @@ LLScopedLock::~LLScopedLock()

 void LLScopedLock::unlock()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
 	if(mLocked)
 	{
 		mMutex->unlock();
--- a/indra/llcommon/llpreprocessor.h
+++ b/indra/llcommon/llpreprocessor.h
@ -171,7 +171,9 @@
 #define LL_DLLIMPORT
 #endif // LL_WINDOWS

-#if ! defined(LL_WINDOWS)
+#if __clang__ || ! defined(LL_WINDOWS)
+// Only on Windows, and only with the Microsoft compiler (vs. clang) is
+// wchar_t potentially not a distinct type.
 #define LL_WCHAR_T_NATIVE 1
 #else  // LL_WINDOWS
 // https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros
--- a/indra/llcommon/llprocessor.cpp
+++ b/indra/llcommon/llprocessor.cpp
@ -44,20 +44,6 @@

 #include "llsd.h"

-#if LL_MSVC && _M_X64
-#      define LL_X86_64 1
-#      define LL_X86 1
-#elif LL_MSVC && _M_IX86
-#      define LL_X86 1
-#elif LL_GNUC && ( defined(__amd64__) || defined(__x86_64__) )
-#      define LL_X86_64 1
-#      define LL_X86 1
-#elif LL_GNUC && ( defined(__i386__) )
-#      define LL_X86 1
-#elif LL_GNUC && ( defined(__powerpc__) || defined(__ppc__) )
-#      define LL_PPC 1
-#endif
-
 class LLProcessorInfoImpl; // foward declaration for the mImpl;

 namespace 
--- a/indra/llcommon/llprocessor.h
+++ b/indra/llcommon/llprocessor.h
@ -29,6 +29,20 @@
 #define LLPROCESSOR_H
 #include "llunits.h"

+#if LL_MSVC && _M_X64
+#      define LL_X86_64 1
+#      define LL_X86 1
+#elif LL_MSVC && _M_IX86
+#      define LL_X86 1
+#elif LL_GNUC && ( defined(__amd64__) || defined(__x86_64__) )
+#      define LL_X86_64 1
+#      define LL_X86 1
+#elif LL_GNUC && ( defined(__i386__) )
+#      define LL_X86 1
+#elif LL_GNUC && ( defined(__powerpc__) || defined(__ppc__) )
+#      define LL_PPC 1
+#endif
+
 class LLProcessorInfoImpl;

 class LL_COMMON_API LLProcessorInfo
--- a/indra/llcommon/llprofiler.h
+++ b/indra/llcommon/llprofiler.h
@ -0,0 +1,151 @@
+/**
+ * @file llprofiler.h
+ * @brief Wrapper for Tracy and/or other profilers
+ *
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2021, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_PROFILER_H
+#define LL_PROFILER_H
+
+// If you use the default macros LL_PROFILE_ZONE_SCOPED and LL_PROFILE_ZONE_NAMED to profile code ...
+//
+//     void foo()
+//     {
+//         LL_PROFILE_ZONE_SCOPED;
+//         :
+//
+//         {
+//             LL_PROFILE_ZONE_NAMED("widget bar");
+//             :
+//         }
+//         {
+//             LL_PROFILE_ZONE_NAMED("widget qux");
+//             :
+//         }
+//     }
+//
+// ... please be aware that ALL these will show up in a Tracy capture which can quickly exhaust memory.
+// Instead, use LL_PROFILE_ZONE_SCOPED_CATEGORY_* and LL_PROFILE_ZONE_NAMED_CATEGORY_* to profile code ...
+//
+//     void foo()
+//     {
+//         LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
+//         :
+//
+//         {
+//             LL_PROFILE_ZONE_NAMED_CATEGORY_UI("widget bar");
+//             :
+//         }
+//         {
+//             LL_PROFILE_ZONE_NAMED_CATEGORY_UI("widget qux");
+//             :
+//         }
+//     }
+//
+// ... as these can be selectively turned on/off.  This will minimize memory usage and visual clutter in a Tracy capture.
+// See llprofiler_categories.h for more details on profiling categories.
+
+#define LL_PROFILER_CONFIG_NONE             0  // No profiling
+#define LL_PROFILER_CONFIG_FAST_TIMER       1  // Profiling on: Only Fast Timers
+#define LL_PROFILER_CONFIG_TRACY            2  // Profiling on: Only Tracy
+#define LL_PROFILER_CONFIG_TRACY_FAST_TIMER 3  // Profiling on: Fast Timers + Tracy
+
+#ifndef LL_PROFILER_CONFIGURATION
+#define LL_PROFILER_CONFIGURATION           LL_PROFILER_CONFIG_FAST_TIMER
+#endif
+
+extern thread_local bool gProfilerEnabled;
+
+#if defined(LL_PROFILER_CONFIGURATION) && (LL_PROFILER_CONFIGURATION > LL_PROFILER_CONFIG_NONE)
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY || LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
+        #define TRACY_ENABLE         1
+// Normally these would be enabled but we want to be able to build any viewer with Tracy enabled and run the Tracy server on another machine
+// They must be undefined in order to work across multiple machines
+//      #define TRACY_NO_BROADCAST   1
+//      #define TRACY_ONLY_LOCALHOST 1
+        #define TRACY_ONLY_IPV4      1
+        #include "Tracy.hpp"
+
+        // Mutually exclusive with detailed memory tracing
+        #define LL_PROFILER_ENABLE_TRACY_OPENGL 0
+    #endif
+
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY
+        #define LL_PROFILER_FRAME_END                   FrameMark
+        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );    gProfilerEnabled = true;
+        #define LL_RECORD_BLOCK_TIME(name)              ZoneScoped // Want descriptive names; was: ZoneNamedN( ___tracy_scoped_zone, #name, true );
+        #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, name, true );
+        #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
+        #define LL_PROFILE_ZONE_SCOPED                  ZoneScoped
+
+        #define LL_PROFILE_ZONE_NUM( val )              ZoneValue( val )
+        #define LL_PROFILE_ZONE_TEXT( text, size )      ZoneText( text, size )
+
+        #define LL_PROFILE_ZONE_ERR(name)               LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000  )  // RGB yellow
+        #define LL_PROFILE_ZONE_INFO(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF  )  // RGB cyan
+        #define LL_PROFILE_ZONE_WARN(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 )  // RGB red
+        #define LL_PROFILE_ALLOC(ptr, size)             TracyAlloc(ptr, size)
+        #define LL_PROFILE_FREE(ptr)                    TracyFree(ptr)
+    #endif
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_FAST_TIMER
+        #define LL_PROFILER_FRAME_END
+        #define LL_PROFILER_SET_THREAD_NAME( name )      (void)(name)
+        #define LL_RECORD_BLOCK_TIME(name)                                                                  const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+        #define LL_PROFILE_ZONE_NAMED(name)             // LL_PROFILE_ZONE_NAMED is a no-op when Tracy is disabled
+        #define LL_PROFILE_ZONE_SCOPED                  // LL_PROFILE_ZONE_SCOPED is a no-op when Tracy is disabled
+        #define LL_PROFILE_ZONE_COLOR(name,color)       // LL_RECORD_BLOCK_TIME(name)
+
+        #define LL_PROFILE_ZONE_NUM( val )              (void)( val );                // Not supported
+        #define LL_PROFILE_ZONE_TEXT( text, size )      (void)( text ); void( size ); // Not supported
+
+        #define LL_PROFILE_ZONE_ERR(name)               (void)(name); // Not supported
+        #define LL_PROFILE_ZONE_INFO(name)              (void)(name); // Not supported
+        #define LL_PROFILE_ZONE_WARN(name)              (void)(name); // Not supported
+        #define LL_PROFILE_ALLOC(ptr, size)             (void)(ptr); (void)(size);
+        #define LL_PROFILE_FREE(ptr)                    (void)(ptr);
+    #endif
+    #if LL_PROFILER_CONFIGURATION == LL_PROFILER_CONFIG_TRACY_FAST_TIMER
+        #define LL_PROFILER_FRAME_END                   FrameMark
+        #define LL_PROFILER_SET_THREAD_NAME( name )     tracy::SetThreadName( name );    gProfilerEnabled = true;
+        #define LL_RECORD_BLOCK_TIME(name)              ZoneNamedN(___tracy_scoped_zone, #name, true);   const LLTrace::BlockTimer& LL_GLUE_TOKENS(block_time_recorder, __LINE__)(LLTrace::timeThisBlock(name)); (void)LL_GLUE_TOKENS(block_time_recorder, __LINE__);
+        #define LL_PROFILE_ZONE_NAMED(name)             ZoneNamedN( ___tracy_scoped_zone, #name, true );
+        #define LL_PROFILE_ZONE_NAMED_COLOR(name,color) ZoneNamedNC( ___tracy_scopped_zone, name, color, true ) // RGB
+        #define LL_PROFILE_ZONE_SCOPED                  ZoneScoped
+
+        #define LL_PROFILE_ZONE_NUM( val )              ZoneValue( val )
+        #define LL_PROFILE_ZONE_TEXT( text, size )      ZoneText( text, size )
+
+        #define LL_PROFILE_ZONE_ERR(name)               LL_PROFILE_ZONE_NAMED_COLOR( name, 0XFF0000  )  // RGB yellow
+        #define LL_PROFILE_ZONE_INFO(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0X00FFFF  )  // RGB cyan
+        #define LL_PROFILE_ZONE_WARN(name)              LL_PROFILE_ZONE_NAMED_COLOR( name, 0x0FFFF00 )  // RGB red
+        #define LL_PROFILE_ALLOC(ptr, size)             TracyAlloc(ptr, size)
+        #define LL_PROFILE_FREE(ptr)                    TracyFree(ptr)
+    #endif
+#else
+    #define LL_PROFILER_FRAME_END
+    #define LL_PROFILER_SET_THREAD_NAME( name ) (void)(name)
+#endif // LL_PROFILER
+
+#include "llprofilercategories.h"
+
+#endif // LL_PROFILER_H
--- a/indra/llcommon/llprofilercategories.h
+++ b/indra/llcommon/llprofilercategories.h
@ -0,0 +1,280 @@
+/**
+ * @file llprofiler_ategories.h
+ * @brief Profiling categories to minimize Tracy memory usage when viewing captures.
+ *
+ * $LicenseInfo:firstyear=2022&license=viewerlgpl$
+ * Second Life Viewer Source Code
+ * Copyright (C) 2022, Linden Research, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License only.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * Linden Research, Inc., 945 Battery Street, San Francisco, CA  94111  USA
+ * $/LicenseInfo$
+ */
+
+#ifndef LL_PROFILER_CATEGORIES_H
+#define LL_PROFILER_CATEGORIES_H
+
+// A Tracy capture can quickly consume memory.  Use these defines to selectively turn on/off Tracy profiling for these categories.
+// The biggest memory usage ones are:
+//
+//    LL_PROFILER_CATEGORY_ENABLE_DRAWPOOL
+//    LL_PROFILER_CATEGORY_ENABLE_LLSD
+//    LL_PROFILER_CATEGORY_ENABLE_MEMORY
+//    LL_PROFILER_CATEGORY_ENABLE_SHADERS
+//
+// NOTE: You can still manually use:
+//     LL_PROFILE_ZONE_SCOPED();
+//     LL_PROFILE_ZONE_NAMED("name");
+// but just be aware that those will ALWAYS show up in a Tracy capture
+//  a) using more memory, and
+//  b) adding visual clutter.
+#define LL_PROFILER_CATEGORY_ENABLE_APP         1
+#define LL_PROFILER_CATEGORY_ENABLE_AVATAR      1
+#define LL_PROFILER_CATEGORY_ENABLE_DISPLAY     1
+#define LL_PROFILER_CATEGORY_ENABLE_DRAWABLE    1
+#define LL_PROFILER_CATEGORY_ENABLE_DRAWPOOL    1
+#define LL_PROFILER_CATEGORY_ENABLE_ENVIRONMENT 1
+#define LL_PROFILER_CATEGORY_ENABLE_FACE        1
+#define LL_PROFILER_CATEGORY_ENABLE_LLSD        1
+#define LL_PROFILER_CATEGORY_ENABLE_LOGGING     1
+#define LL_PROFILER_CATEGORY_ENABLE_MATERIAL    1
+#define LL_PROFILER_CATEGORY_ENABLE_MEDIA       1
+#define LL_PROFILER_CATEGORY_ENABLE_MEMORY      1
+#define LL_PROFILER_CATEGORY_ENABLE_NETWORK     1
+#define LL_PROFILER_CATEGORY_ENABLE_OCTREE      1
+#define LL_PROFILER_CATEGORY_ENABLE_PIPELINE    1
+#define LL_PROFILER_CATEGORY_ENABLE_SHADER      1
+#define LL_PROFILER_CATEGORY_ENABLE_SPATIAL     1
+#define LL_PROFILER_CATEGORY_ENABLE_STATS       1
+#define LL_PROFILER_CATEGORY_ENABLE_STRING      1
+#define LL_PROFILER_CATEGORY_ENABLE_TEXTURE     1
+#define LL_PROFILER_CATEGORY_ENABLE_THREAD      1
+#define LL_PROFILER_CATEGORY_ENABLE_UI          1
+#define LL_PROFILER_CATEGORY_ENABLE_VIEWER      1
+#define LL_PROFILER_CATEGORY_ENABLE_VERTEX      1
+#define LL_PROFILER_CATEGORY_ENABLE_VOLUME      1
+#define LL_PROFILER_CATEGORY_ENABLE_WIN32       1
+
+#if LL_PROFILER_CATEGORY_ENABLE_APP
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_APP  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_APP LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_APP(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_APP
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_AVATAR
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_DISPLAY
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DISPLAY(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DISPLAY
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_DRAWABLE
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWABLE  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWABLE LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWABLE(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWABLE
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_DRAWPOOL
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_DRAWPOOL(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_DRAWPOOL
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_ENVIRONMENT
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_ENVIRONMENT  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_ENVIRONMENT LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_ENVIRONMENT(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_ENVIRONMENT
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_FACE
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_FACE  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_FACE(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_FACE
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_LLSD
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LLSD  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LLSD(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_LOGGING
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LOGGING  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_LOGGING(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_LOGGING
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_MATERIAL
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_MEDIA
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEDIA  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEDIA LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEDIA(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEDIA
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_MEMORY
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEMORY  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_MEMORY(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_MEMORY
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_NETWORK
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_NETWORK  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_NETWORK LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_NETWORK(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_NETWORK
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_OCTREE
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_OCTREE  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_OCTREE LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_OCTREE(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_OCTREE
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_PIPELINE
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_PIPELINE  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_PIPELINE(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_PIPELINE
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_SHADER
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SHADER  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SHADER(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_SPATIAL
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SPATIAL  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SPATIAL LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_SPATIAL(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_SPATIAL
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_STATS
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STATS  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STATS(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_STRING
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STRING  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_STRING(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_TEXTURE
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_THREAD
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_THREAD  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_THREAD(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_UI
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_UI  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_UI LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_UI(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_UI
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_VERTEX
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_VIEWER
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VIEWER  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VIEWER(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VIEWER
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_VOLUME
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_VOLUME(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_VOLUME
+#endif
+
+#if LL_PROFILER_CATEGORY_ENABLE_WIN32
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_WIN32  LL_PROFILE_ZONE_NAMED
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_WIN32 LL_PROFILE_ZONE_SCOPED
+#else
+    #define LL_PROFILE_ZONE_NAMED_CATEGORY_WIN32(name)
+    #define LL_PROFILE_ZONE_SCOPED_CATEGORY_WIN32
+#endif
+
+#endif // LL_PROFILER_CATEGORIES_H
+
--- a/indra/llcommon/llrefcount.cpp
+++ b/indra/llcommon/llrefcount.cpp
@ -29,6 +29,9 @@

 #include "llerror.h"

+// maximum reference count before sounding memory leak alarm
+const S32 gMaxRefCount = 65536;
+
 LLRefCount::LLRefCount(const LLRefCount& other)
 :	mRef(0)
 {
@ -47,7 +50,7 @@ LLRefCount::LLRefCount() :

 LLRefCount::~LLRefCount()
 { 
-	if (mRef != 0)
+	if (mRef != LL_REFCOUNT_FREE && mRef != 0)
 	{
 		LL_ERRS() << "deleting non-zero reference" << LL_ENDL;
 	}
--- a/indra/llcommon/llrefcount.h
+++ b/indra/llcommon/llrefcount.h
@ -37,6 +37,10 @@ class LLMutex;
 // see llthread.h for LLThreadSafeRefCount
 //----------------------------------------------------------------------------

+//nonsense but recognizable value for freed LLRefCount (aids in debugging)
+#define LL_REFCOUNT_FREE 1234567890
+extern const S32 gMaxRefCount;
+
 class LL_COMMON_API LLRefCount
 {
 protected:
@ -47,17 +51,25 @@ protected:
 public:
 	LLRefCount();

+    inline void validateRefCount() const
+    {
+        llassert(mRef > 0); // ref count below 0, likely corrupted
+        llassert(mRef < gMaxRefCount); // ref count excessive, likely memory leak
+    }
+
 	inline void ref() const
 	{ 
 		mRef++; 
+        validateRefCount();
 	} 

 	inline S32 unref() const
 	{
-		llassert(mRef >= 1);
+        validateRefCount();
 		if (0 == --mRef)
 		{
-			delete this; 
+            mRef = LL_REFCOUNT_FREE; // set to nonsense yet recognizable value to aid in debugging
+			delete this;
 			return 0;
 		}
 		return mRef;
--- a/indra/llcommon/llsd.cpp
+++ b/indra/llcommon/llsd.cpp
@ -400,6 +400,7 @@ namespace
 	
 	ImplMap& ImplMap::makeMap(LLSD::Impl*& var)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 		if (shared())
 		{
 			ImplMap* i = new ImplMap(mData);
@ -414,18 +415,21 @@ namespace
 	
 	bool ImplMap::has(const LLSD::String& k) const
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 		DataMap::const_iterator i = mData.find(k);
 		return i != mData.end();
 	}
 	
 	LLSD ImplMap::get(const LLSD::String& k) const
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 		DataMap::const_iterator i = mData.find(k);
 		return (i != mData.end()) ? i->second : LLSD();
 	}

 	LLSD ImplMap::getKeys() const
 	{ 
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 		LLSD keys = LLSD::emptyArray();
 		DataMap::const_iterator iter = mData.begin();
 		while (iter != mData.end())
@ -438,11 +442,13 @@ namespace

 	void ImplMap::insert(const LLSD::String& k, const LLSD& v)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 		mData.insert(DataMap::value_type(k, v));
 	}
 	
 	void ImplMap::erase(const LLSD::String& k)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 		mData.erase(k);
 	}
 	
@ -684,6 +690,7 @@ const LLSD::Impl& LLSD::Impl::safe(const Impl* impl)

 ImplMap& LLSD::Impl::makeMap(Impl*& var)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
 	ImplMap* im = new ImplMap;
 	reset(var, im);
 	return *im;
@ -887,11 +894,16 @@ LLSD& LLSD::with(const String& k, const LLSD& v)
 										}
 void LLSD::erase(const String& k)		{ makeMap(impl).erase(k); }

-LLSD&		LLSD::operator[](const String& k)
-										{ return makeMap(impl).ref(k); }
+LLSD& LLSD::operator[](const String& k)
+{ 
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
+    return makeMap(impl).ref(k); 
+}
 const LLSD& LLSD::operator[](const String& k) const
-										{ return safe(impl).ref(k); }
-
+{ 
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
+    return safe(impl).ref(k); 
+}

 LLSD LLSD::emptyArray()
 {
@ -914,10 +926,16 @@ LLSD& LLSD::with(Integer i, const LLSD& v)
 LLSD& LLSD::append(const LLSD& v)		{ return makeArray(impl).append(v); }
 void LLSD::erase(Integer i)				{ makeArray(impl).erase(i); }

-LLSD&		LLSD::operator[](Integer i)
-										{ return makeArray(impl).ref(i); }
+LLSD& LLSD::operator[](Integer i)
+{ 
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
+    return makeArray(impl).ref(i); 
+}
 const LLSD& LLSD::operator[](Integer i) const
-										{ return safe(impl).ref(i); }
+{ 
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
+    return safe(impl).ref(i);
+}

 static const char *llsd_dump(const LLSD &llsd, bool useXMLFormat)
 {
--- a/indra/llcommon/llsd.h
+++ b/indra/llcommon/llsd.h
@ -290,9 +290,17 @@ public:
 		LLSD& with(const String&, const LLSD&);
 		
 		LLSD& operator[](const String&);
-		LLSD& operator[](const char* c)			{ return (*this)[String(c)]; }
+		LLSD& operator[](const char* c)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
+            return (*this)[String(c)];
+        }
 		const LLSD& operator[](const String&) const;
-		const LLSD& operator[](const char* c) const	{ return (*this)[String(c)]; }
+		const LLSD& operator[](const char* c) const	
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_LLSD;
+            return (*this)[String(c)];
+        }
 	//@}
 	
 	/** @name Array Values */
--- a/indra/llcommon/llsdparam.cpp
+++ b/indra/llcommon/llsdparam.cpp
@ -37,8 +37,6 @@ static 	LLInitParam::Parser::parser_write_func_map_t sWriteFuncs;
 static 	LLInitParam::Parser::parser_inspect_func_map_t sInspectFuncs;
 static const LLSD NO_VALUE_MARKER;

-LLTrace::BlockTimerStatHandle FTM_SD_PARAM_ADAPTOR("LLSD to LLInitParam conversion");
-
 //
 // LLParamSDParser
 //
--- a/indra/llcommon/llsdparam.h
+++ b/indra/llcommon/llsdparam.h
@ -110,7 +110,6 @@ private:
 };


-extern LL_COMMON_API LLTrace::BlockTimerStatHandle FTM_SD_PARAM_ADAPTOR;
 template<typename T>
 class LLSDParamAdapter : public T
 {
@ -118,7 +117,7 @@ public:
 	LLSDParamAdapter() {}
 	LLSDParamAdapter(const LLSD& sd)
 	{
-		LL_RECORD_BLOCK_TIME(FTM_SD_PARAM_ADAPTOR);
+        LL_PROFILE_ZONE_SCOPED;
 		LLParamSDParser parser;
 		// don't spam for implicit parsing of LLSD, as we want to allow arbitrary freeform data and ignore most of it
 		bool parse_silently = true;
--- a/indra/llcommon/llsdutil.cpp
+++ b/indra/llcommon/llsdutil.cpp
@ -215,6 +215,8 @@ BOOL compare_llsd_with_template(
 	const LLSD& template_llsd,
 	LLSD& resultant_llsd)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (
 		llsd_to_test.isUndefined() &&
 		template_llsd.isDefined() )
@ -336,6 +338,8 @@ bool filter_llsd_with_template(
 	const LLSD & template_llsd,
 	LLSD & resultant_llsd)
 {
+    LL_PROFILE_ZONE_SCOPED
+
 	if (llsd_to_test.isUndefined() && template_llsd.isDefined())
 	{
 		resultant_llsd = template_llsd;
@ -530,6 +534,8 @@ class TypeLookup
 public:
    TypeLookup()
    {
+        LL_PROFILE_ZONE_SCOPED
+
        for (const Data *di(boost::begin(typedata)), *dend(boost::end(typedata)); di != dend; ++di)
        {
            mMap[di->type] = di->name;
@ -538,6 +544,8 @@ public:

    std::string lookup(LLSD::Type type) const
    {
+        LL_PROFILE_ZONE_SCOPED
+
        MapType::const_iterator found = mMap.find(type);
        if (found != mMap.end())
        {
@ -588,6 +596,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type()
                               LLSD::Type actual,        // type we're checking
                               const std::string& pfx)   // as for llsd_matches
 {
+    LL_PROFILE_ZONE_SCOPED
+
    // Trivial case: if the actual type is exactly what we expect, we're good.
    if (actual == expect)
        return "";
@ -625,6 +635,8 @@ static std::string match_types(LLSD::Type expect, // prototype.type()
 // see docstring in .h file
 std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::string& pfx)
 {
+    LL_PROFILE_ZONE_SCOPED
+
    // An undefined prototype means that any data is valid.
    // An undefined slot in an array or map prototype means that any data
    // may fill that slot.
@ -757,6 +769,8 @@ std::string llsd_matches(const LLSD& prototype, const LLSD& data, const std::str

 bool llsd_equals(const LLSD& lhs, const LLSD& rhs, int bits)
 {
+    LL_PROFILE_ZONE_SCOPED
+
    // We're comparing strict equality of LLSD representation rather than
    // performing any conversions. So if the types aren't equal, the LLSD
    // values aren't equal.
@ -865,6 +879,8 @@ namespace llsd

 LLSD& drill_ref(LLSD& blob, const LLSD& rawPath)
 {
+    LL_PROFILE_ZONE_SCOPED
+
    // Treat rawPath uniformly as an array. If it's not already an array,
    // store it as the only entry in one. (But let's say Undefined means an
    // empty array.)
@ -890,6 +906,8 @@ LLSD& drill_ref(LLSD& blob, const LLSD& rawPath)
    // path entry that's bad.
    for (LLSD::Integer i = 0; i < path.size(); ++i)
    {
+        LL_PROFILE_ZONE_NUM( i )
+
        const LLSD& key{path[i]};
        if (key.isString())
        {
@ -918,6 +936,8 @@ LLSD& drill_ref(LLSD& blob, const LLSD& rawPath)

 LLSD drill(const LLSD& blob, const LLSD& path)
 {
+    LL_PROFILE_ZONE_SCOPED
+
    // drill_ref() does exactly what we want. Temporarily cast away
    // const-ness and use that.
    return drill_ref(const_cast<LLSD&>(blob), path);
@ -930,6 +950,8 @@ LLSD drill(const LLSD& blob, const LLSD& path)
 // filter may be include to exclude/include keys in a map. 
 LLSD llsd_clone(LLSD value, LLSD filter)
 {
+    LL_PROFILE_ZONE_SCOPED
+
    LLSD clone;
    bool has_filter(filter.isMap());

--- a/indra/llcommon/llsingleton.h
+++ b/indra/llcommon/llsingleton.h
@ -455,6 +455,7 @@ public:

    static DERIVED_TYPE* getInstance()
    {
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
        // We know the viewer has LLSingleton dependency circularities. If you
        // feel strongly motivated to eliminate them, cheers and good luck.
        // (At that point we could consider a much simpler locking mechanism.)
@ -838,4 +839,36 @@ private:                                                                \
    /* LLSINGLETON() is carefully implemented to permit exactly this */ \
    LLSINGLETON_C11(DERIVED_CLASS) {}

+// Relatively unsafe singleton implementation that is much faster
+// and simpler than LLSingleton, but has no dependency tracking
+// or inherent thread safety and requires manual invocation of 
+// createInstance before first use.
+template<class T>
+class LLSimpleton
+{
+public:
+    template <typename... ARGS>
+    static void createInstance(ARGS&&... args)
+    {
+        llassert(sInstance == nullptr);
+        sInstance = new T(std::forward<ARGS>(args)...);
+    }
+
+    static inline T* getInstance() { return sInstance; }
+    static inline T& instance() { return *getInstance(); }
+    static inline bool instanceExists() { return sInstance != nullptr; }
+
+    static void deleteSingleton()
+    {
+        delete sInstance;
+        sInstance = nullptr;
+    }
+
+private:
+    static T* sInstance;
+};
+
+template <class T>
+T* LLSimpleton<T>::sInstance{ nullptr };
+
 #endif
--- a/indra/llcommon/llstring.cpp
+++ b/indra/llcommon/llstring.cpp
@ -37,9 +37,6 @@
 #include <winnls.h> // for WideCharToMultiByte
 #endif

-LLTrace::BlockTimerStatHandle FT_STRING_FORMAT("String Format");
-
-
 std::string ll_safe_string(const char* in)
 {
 	if(in) return std::string(in);
@ -215,7 +212,7 @@ S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
 	return inchars - base;
 }

-llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
+llutf16string wstring_to_utf16str(const llwchar* utf32str, size_t len)
 {
 	llutf16string out;

@ -237,27 +234,19 @@ llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
 	return out;
 }

-llutf16string wstring_to_utf16str(const LLWString &utf32str)
+llutf16string utf8str_to_utf16str( const char* utf8str, size_t len )
 {
-	const S32 len = (S32)utf32str.length();
-	return wstring_to_utf16str(utf32str, len);
-}
-
-llutf16string utf8str_to_utf16str ( const std::string& utf8str )
-{
-	LLWString wstr = utf8str_to_wstring ( utf8str );
+	LLWString wstr = utf8str_to_wstring ( utf8str, len );
 	return wstring_to_utf16str ( wstr );
 }

-
-LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
+LLWString utf16str_to_wstring(const U16* utf16str, size_t len)
 {
 	LLWString wout;
-	if((len <= 0) || utf16str.empty()) return wout;
+	if (len == 0) return wout;

 	S32 i = 0;
-	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
-	const U16* chars16 = &(*(utf16str.begin()));
+	const U16* chars16 = utf16str;
 	while (i < len)
 	{
 		llwchar cur_char;
@ -267,12 +256,6 @@ LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
 	return wout;
 }

-LLWString utf16str_to_wstring(const llutf16string &utf16str)
-{
-	const S32 len = (S32)utf16str.length();
-	return utf16str_to_wstring(utf16str, len);
-}
-
 // Length in llwchar (UTF-32) of the first len units (16 bits) of the given UTF-16 string.
 S32 utf16str_wstring_length(const llutf16string &utf16str, const S32 utf16_len)
 {
@ -392,8 +375,7 @@ S32 wstring_utf8_length(const LLWString& wstr)
 	return len;
 }

-
-LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
+LLWString utf8str_to_wstring(const char* utf8str, size_t len)
 {
 	LLWString wout;

@ -481,13 +463,7 @@ LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
 	return wout;
 }

-LLWString utf8str_to_wstring(const std::string& utf8str)
-{
-	const S32 len = (S32)utf8str.length();
-	return utf8str_to_wstring(utf8str, len);
-}
-
-std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
+std::string wstring_to_utf8str(const llwchar* utf32str, size_t len)
 {
 	std::string out;

@ -503,20 +479,9 @@ std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
 	return out;
 }

-std::string wstring_to_utf8str(const LLWString& utf32str)
+std::string utf16str_to_utf8str(const U16* utf16str, size_t len)
 {
-	const S32 len = (S32)utf32str.length();
-	return wstring_to_utf8str(utf32str, len);
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str)
-{
-	return wstring_to_utf8str(utf16str_to_wstring(utf16str));
-}
-
-std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
-{
-	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
+	return wstring_to_utf8str(utf16str_to_wstring(utf16str, len));
 }

 std::string utf8str_trim(const std::string& utf8str)
@ -657,17 +622,16 @@ std::string utf8str_removeCRLF(const std::string& utf8str)
 }

 #if LL_WINDOWS
-std::string ll_convert_wide_to_string(const wchar_t* in)
+unsigned int ll_wstring_default_code_page()
 {
-	return ll_convert_wide_to_string(in, CP_UTF8);
+    return CP_UTF8;
 }

-std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
+std::string ll_convert_wide_to_string(const wchar_t* in, size_t len_in, unsigned int code_page)
 {
 	std::string out;
 	if(in)
 	{
-		int len_in = wcslen(in);
 		int len_out = WideCharToMultiByte(
 			code_page,
 			0,
@ -699,12 +663,7 @@ std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page)
 	return out;
 }

-std::wstring ll_convert_string_to_wide(const std::string& in)
-{
-	return ll_convert_string_to_wide(in, CP_UTF8);
-}
-
-std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_page)
+std::wstring ll_convert_string_to_wide(const char* in, size_t len, unsigned int code_page)
 {
 	// From review:
 	// We can preallocate a wide char buffer that is the same length (in wchar_t elements) as the utf8 input,
@ -716,10 +675,10 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_

 	// reserve an output buffer that will be destroyed on exit, with a place
 	// to put NULL terminator
-	std::vector<wchar_t> w_out(in.length() + 1);
+	std::vector<wchar_t> w_out(len + 1);

 	memset(&w_out[0], 0, w_out.size());
-	int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(), in.length(),
+	int real_output_str_len = MultiByteToWideChar(code_page, 0, in, len,
 												  &w_out[0], w_out.size() - 1);

 	//looks like MultiByteToWideChar didn't add null terminator to converted string, see EXT-4858.
@ -729,30 +688,32 @@ std::wstring ll_convert_string_to_wide(const std::string& in, unsigned int code_
 	return {&w_out[0]};
 }

-LLWString ll_convert_wide_to_wstring(const std::wstring& in)
+LLWString ll_convert_wide_to_wstring(const wchar_t* in, size_t len)
 {
-    // This function, like its converse, is a placeholder, encapsulating a
-    // guilty little hack: the only "official" way nat has found to convert
-    // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
-    // by using iconv, which we've avoided so far. It kinda sorta works to
-    // just copy individual characters...
-    // The point is that if/when we DO introduce some more official way to
-    // perform such conversions, we should only have to call it here.
-    return { in.begin(), in.end() };
+    // Whether or not std::wstring and llutf16string are distinct types, they
+    // both hold UTF-16LE characters. (See header file comments.) Pretend this
+    // wchar_t* sequence is really a U16* sequence and use the conversion we
+    // define above.
+    return utf16str_to_wstring(reinterpret_cast<const U16*>(in), len);
 }

-std::wstring ll_convert_wstring_to_wide(const LLWString& in)
+std::wstring ll_convert_wstring_to_wide(const llwchar* in, size_t len)
 {
-    // See comments in ll_convert_wide_to_wstring()
-    return { in.begin(), in.end() };
+    // first, convert to llutf16string, for which we have a real implementation
+    auto utf16str{ wstring_to_utf16str(in, len) };
+    // then, because each U16 char must be UTF-16LE encoded, pretend the U16*
+    // string pointer is a wchar_t* and instantiate a std::wstring of the same
+    // length.
+    return { reinterpret_cast<const wchar_t*>(utf16str.c_str()), utf16str.length() };
 }

 std::string ll_convert_string_to_utf8_string(const std::string& in)
 {
-	auto w_mesg = ll_convert_string_to_wide(in, CP_ACP);
-	std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8));
-
-	return out_utf8;
+	// If you pass code_page, you must also pass length, otherwise the code
+	// page parameter will be mistaken for length.
+	auto w_mesg = ll_convert_string_to_wide(in, in.length(), CP_ACP);
+	// CP_UTF8 is default -- see ll_wstring_default_code_page() above.
+	return ll_convert_wide_to_string(w_mesg);
 }

 namespace
@ -1356,7 +1317,7 @@ bool LLStringUtil::formatDatetime(std::string& replacement, std::string token,
 template<> 
 S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
 {
-	LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT);
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
 	S32 res = 0;

 	std::string output;
@ -1429,7 +1390,7 @@ S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
 template<> 
 S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
 {
-	LL_RECORD_BLOCK_TIME(FT_STRING_FORMAT);
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STRING;
 	S32 res = 0;

 	if (!substitutions.isMap()) 
--- a/indra/llcommon/llstring.h
+++ b/indra/llcommon/llstring.h
@ -27,9 +27,11 @@
 #ifndef LL_LLSTRING_H
 #define LL_LLSTRING_H

+#include <boost/call_traits.hpp>
 #include <boost/optional/optional.hpp>
 #include <string>
 #include <cstdio>
+#include <cwchar>                   // std::wcslen()
 //#include <locale>
 #include <iomanip>
 #include <algorithm>
@ -527,14 +529,71 @@ struct ll_convert_impl<T, T>
    T operator()(const T& in) const { return in; }
 };

+// simple construction from char*
+template<typename T>
+struct ll_convert_impl<T, const typename T::value_type*>
+{
+    T operator()(const typename T::value_type* in) const { return { in }; }
+};
+
 // specialize ll_convert_impl<TO, FROM> to return EXPR
 #define ll_convert_alias(TO, FROM, EXPR)                    \
 template<>                                                  \
 struct ll_convert_impl<TO, FROM>                            \
 {                                                           \
-    TO operator()(const FROM& in) const { return EXPR; }    \
+    /* param_type optimally passes both char* and string */ \
+    TO operator()(typename boost::call_traits<FROM>::param_type in) const { return EXPR; } \
 }

+// If all we're doing is copying characters, pass this to ll_convert_alias as
+// EXPR. Since it expands into the 'return EXPR' slot in the ll_convert_impl
+// specialization above, it implies TO{ in.begin(), in.end() }.
+#define LL_CONVERT_COPY_CHARS { in.begin(), in.end() }
+
+// Generic name for strlen() / wcslen() - the default implementation should
+// (!) work with U16 and llwchar, but we don't intend to engage it.
+template <typename CHARTYPE>
+size_t ll_convert_length(const CHARTYPE* zstr)
+{
+    const CHARTYPE* zp;
+    // classic C string scan
+    for (zp = zstr; *zp; ++zp)
+        ;
+    return (zp - zstr);
+}
+
+// specialize where we have a library function; may use intrinsic operations
+template <>
+inline size_t ll_convert_length<wchar_t>(const wchar_t* zstr) { return std::wcslen(zstr); }
+template <>
+inline size_t ll_convert_length<char>   (const char*    zstr) { return std::strlen(zstr); }
+
+// ll_convert_forms() is short for a bunch of boilerplate. It defines
+// longname(const char*, len), longname(const char*), longname(const string&)
+// and longname(const string&, len) so calls written pre-ll_convert() will
+// work. Most of these overloads will be unified once we turn on C++17 and can
+// use std::string_view.
+// It also uses aliasmacro to ensure that both ll_convert<OUTSTR>(const char*)
+// and ll_convert<OUTSTR>(const string&) will work.
+#define ll_convert_forms(aliasmacro, OUTSTR, INSTR, longname)           \
+LL_COMMON_API OUTSTR longname(const INSTR::value_type* in, size_t len); \
+inline auto longname(const INSTR& in, size_t len)                       \
+{                                                                       \
+    return longname(in.c_str(), len);                                   \
+}                                                                       \
+inline auto longname(const INSTR::value_type* in)                       \
+{                                                                       \
+    return longname(in, ll_convert_length(in));                         \
+}                                                                       \
+inline auto longname(const INSTR& in)                                   \
+{                                                                       \
+    return longname(in.c_str(), in.length());                           \
+}                                                                       \
+/* string param */                                                      \
+aliasmacro(OUTSTR, INSTR, longname(in));                                \
+/* char* param */                                                       \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))
+
 // Make the incoming string a utf8 string. Replaces any unknown glyph
 // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
 // of the data may not be recovered.
@ -571,63 +630,47 @@ LL_COMMON_API std::string rawstr_to_utf8(const std::string& raw);
 // LL_WCHAR_T_NATIVE.
 typedef std::basic_string<U16> llutf16string;

-#if ! defined(LL_WCHAR_T_NATIVE)
-// wchar_t is identical to U16, and std::wstring is identical to llutf16string.
-// Defining an ll_convert alias involving llutf16string would collide with the
-// comparable preferred alias involving std::wstring. (In this scenario, if
-// you pass llutf16string, it will engage the std::wstring specialization.)
-#define ll_convert_u16_alias(TO, FROM, EXPR) // nothing
-#else  // defined(LL_WCHAR_T_NATIVE)
-// wchar_t is a distinct native type, so llutf16string is also a distinct
-// type, and there IS a point to converting separately to/from llutf16string.
-// (But why? Windows APIs are still defined in terms of wchar_t, and
-// in this scenario llutf16string won't work for them!)
-#define ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+// Considering wchar_t, llwchar and U16, there are three relevant cases:
+#if LLWCHAR_IS_WCHAR_T         // every which way but Windows
+// llwchar is identical to wchar_t, LLWString is identical to std::wstring.
+// U16 is distinct, llutf16string is distinct (though pretty useless).
+// Given conversions to/from LLWString and to/from llutf16string, conversions
+// involving std::wstring would collide.
+#define ll_convert_wstr_alias(TO, FROM, EXPR) // nothing
+// but we can define conversions involving llutf16string without collisions
+#define  ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)

-#if LL_WINDOWS
-// LL_WCHAR_T_NATIVE is defined on non-Windows systems because, in fact,
-// wchar_t is native. Everywhere but Windows, we use it for llwchar (see
-// stdtypes.h). That makes LLWString identical to std::wstring, so these
-// aliases for std::wstring would collide with those for LLWString. Only
-// define on Windows, where converting between std::wstring and llutf16string
-// means copying chars.
-ll_convert_alias(llutf16string, std::wstring, llutf16string(in.begin(), in.end()));
-ll_convert_alias(std::wstring, llutf16string,  std::wstring(in.begin(), in.end()));
-#endif // LL_WINDOWS
-#endif // defined(LL_WCHAR_T_NATIVE)
+#elif defined(LL_WCHAR_T_NATIVE)    // Windows, either clang or MS /Zc:wchar_t
+// llwchar (32-bit), wchar_t (16-bit) and U16 are all different types.
+// Conversions to/from LLWString, to/from std::wstring and to/from llutf16string
+// can all be defined.
+#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+#define  ll_convert_u16_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)

-LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len);
-LL_COMMON_API LLWString utf16str_to_wstring(const llutf16string &utf16str);
-ll_convert_u16_alias(LLWString, llutf16string, utf16str_to_wstring(in));
+#else  // ! LL_WCHAR_T_NATIVE: Windows with MS /Zc:wchar_t-
+// wchar_t is identical to U16, std::wstring is identical to llutf16string.
+// Given conversions to/from LLWString and to/from std::wstring, conversions
+// involving llutf16string would collide.
+#define  ll_convert_u16_alias(TO, FROM, EXPR) // nothing
+// but we can define conversions involving std::wstring without collisions
+#define ll_convert_wstr_alias(TO, FROM, EXPR) ll_convert_alias(TO, FROM, EXPR)
+#endif

-LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len);
-LL_COMMON_API llutf16string wstring_to_utf16str(const LLWString &utf32str);
-ll_convert_u16_alias(llutf16string, LLWString, wstring_to_utf16str(in));
+ll_convert_forms(ll_convert_u16_alias, LLWString,     llutf16string, utf16str_to_wstring);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, LLWString,     wstring_to_utf16str);
+ll_convert_forms(ll_convert_u16_alias, llutf16string, std::string,   utf8str_to_utf16str);
+ll_convert_forms(ll_convert_alias,     LLWString,     std::string,   utf8str_to_wstring);

-LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str, S32 len);
-LL_COMMON_API llutf16string utf8str_to_utf16str ( const std::string& utf8str );
-ll_convert_u16_alias(llutf16string, std::string, utf8str_to_utf16str(in));
-
-LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str, S32 len);
-LL_COMMON_API LLWString utf8str_to_wstring(const std::string &utf8str);
 // Same function, better name. JC
 inline LLWString utf8string_to_wstring(const std::string& utf8_string) { return utf8str_to_wstring(utf8_string); }
-// best name of all
-ll_convert_alias(LLWString, std::string, utf8string_to_wstring(in));

-//
 LL_COMMON_API S32 wchar_to_utf8chars(llwchar inchar, char* outchars);

-LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str, S32 len);
-LL_COMMON_API std::string wstring_to_utf8str(const LLWString &utf32str);
-ll_convert_alias(std::string, LLWString, wstring_to_utf8str(in));
-LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str, S32 len);
-LL_COMMON_API std::string utf16str_to_utf8str(const llutf16string &utf16str);
-ll_convert_u16_alias(std::string, llutf16string, utf16str_to_utf8str(in));
+ll_convert_forms(ll_convert_alias,     std::string, LLWString,     wstring_to_utf8str);
+ll_convert_forms(ll_convert_u16_alias, std::string, llutf16string, utf16str_to_utf8str);

-#if LL_WINDOWS
+// an older alias for utf16str_to_utf8str(llutf16string)
 inline std::string wstring_to_utf8str(const llutf16string &utf16str) { return utf16str_to_utf8str(utf16str);}
-#endif

 // Length of this UTF32 string in bytes when transformed to UTF8
 LL_COMMON_API S32 wstring_utf8_length(const LLWString& wstr); 
@ -701,42 +744,48 @@ LL_COMMON_API std::string utf8str_removeCRLF(const std::string& utf8str);
 //@{

 /**
- * @brief Convert a wide string to std::string
+ * @brief Convert a wide string to/from std::string
+ * Convert a Windows wide string to/from our LLWString
 *
 * This replaces the unsafe W2A macro from ATL.
 */
-LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in, unsigned int code_page);
-LL_COMMON_API std::string ll_convert_wide_to_string(const wchar_t* in); // default CP_UTF8
-inline std::string ll_convert_wide_to_string(const std::wstring& in, unsigned int code_page)
-{
-    return ll_convert_wide_to_string(in.c_str(), code_page);
-}
-inline std::string ll_convert_wide_to_string(const std::wstring& in)
-{
-    return ll_convert_wide_to_string(in.c_str());
-}
-ll_convert_alias(std::string, std::wstring, ll_convert_wide_to_string(in));
+// Avoid requiring this header to #include the Windows header file declaring
+// our actual default code_page by delegating this function to our .cpp file.
+LL_COMMON_API unsigned int ll_wstring_default_code_page();

-/**
- * Converts a string to wide string.
- */
-LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in,
-                                                     unsigned int code_page);
-LL_COMMON_API std::wstring ll_convert_string_to_wide(const std::string& in);
-                                                     // default CP_UTF8
-ll_convert_alias(std::wstring, std::string, ll_convert_string_to_wide(in));
+// This is like ll_convert_forms(), with the added complexity of a code page
+// parameter that may or may not be passed.
+#define ll_convert_cp_forms(aliasmacro, OUTSTR, INSTR, longname)    \
+/* declare the only nontrivial implementation (in .cpp file) */     \
+LL_COMMON_API OUTSTR longname(                                      \
+    const INSTR::value_type* in,                                    \
+    size_t len,                                                     \
+    unsigned int code_page=ll_wstring_default_code_page());         \
+/* if passed only a char pointer, scan for nul terminator */        \
+inline auto longname(const INSTR::value_type* in)                   \
+{                                                                   \
+    return longname(in, ll_convert_length(in));                     \
+}                                                                   \
+/* if passed string and length, extract its char pointer */         \
+inline auto longname(                                               \
+    const INSTR& in,                                                \
+    size_t len,                                                     \
+    unsigned int code_page=ll_wstring_default_code_page())          \
+{                                                                   \
+    return longname(in.c_str(), len, code_page);                    \
+}                                                                   \
+/* if passed only a string object, no scan, pass known length */    \
+inline auto longname(const INSTR& in)                               \
+{                                                                   \
+    return longname(in.c_str(), in.length());                       \
+}                                                                   \
+aliasmacro(OUTSTR, INSTR, longname(in));                            \
+aliasmacro(OUTSTR, const INSTR::value_type*, longname(in))

-/**
- * Convert a Windows wide string to our LLWString
- */
-LL_COMMON_API LLWString ll_convert_wide_to_wstring(const std::wstring& in);
-ll_convert_alias(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
-
-/**
- * Convert LLWString to Windows wide string
- */
-LL_COMMON_API std::wstring ll_convert_wstring_to_wide(const LLWString& in);
-ll_convert_alias(std::wstring, LLWString, ll_convert_wstring_to_wide(in));
+ll_convert_cp_forms(ll_convert_wstr_alias, std::string,  std::wstring, ll_convert_wide_to_string);
+ll_convert_cp_forms(ll_convert_wstr_alias, std::wstring, std::string,  ll_convert_string_to_wide);
+   ll_convert_forms(ll_convert_wstr_alias, LLWString,    std::wstring, ll_convert_wide_to_wstring);
+   ll_convert_forms(ll_convert_wstr_alias, std::wstring, LLWString,    ll_convert_wstring_to_wide);

 /**
 * Converts incoming string into utf8 string
@ -1937,4 +1986,14 @@ void LLStringUtilBase<T>::truncate(string_type& string, size_type count)
 	string.resize(count < cur_size ? count : cur_size);
 }

+// The good thing about *declaration* macros, vs. usage macros, is that now
+// we're done with them: we don't need them to bleed into the consuming source
+// file.
+#undef ll_convert_alias
+#undef ll_convert_u16_alias
+#undef ll_convert_wstr_alias
+#undef LL_CONVERT_COPY_CHARS
+#undef ll_convert_forms
+#undef ll_convert_cp_forms
+
 #endif  // LL_STRING_H
--- a/indra/llcommon/llsys.cpp
+++ b/indra/llcommon/llsys.cpp
@ -843,6 +843,7 @@ LLSD LLMemoryInfo::getStatsMap() const

 LLMemoryInfo& LLMemoryInfo::refresh()
 {
+	LL_PROFILE_ZONE_SCOPED
 	mStatsMap = loadStatsMap();

 	LL_DEBUGS("LLMemoryInfo") << "Populated mStatsMap:\n";
@ -852,11 +853,9 @@ LLMemoryInfo& LLMemoryInfo::refresh()
 	return *this;
 }

-static LLTrace::BlockTimerStatHandle FTM_MEMINFO_LOAD_STATS("MemInfo Load Stats");
-
 LLSD LLMemoryInfo::loadStatsMap()
 {
-	LL_RECORD_BLOCK_TIME(FTM_MEMINFO_LOAD_STATS);
+    LL_PROFILE_ZONE_SCOPED;

 	// This implementation is derived from stream() code (as of 2011-06-29).
 	Stats stats;
--- a/indra/llcommon/llthread.cpp
+++ b/indra/llcommon/llthread.cpp
@ -135,6 +135,8 @@ void LLThread::threadRun()
    set_thread_name(-1, mName.c_str());
 #endif

+    LL_PROFILER_SET_THREAD_NAME( mName.c_str() );
+
    // this is the first point at which we're actually running in the new thread
    mID = currentID();

@ -331,6 +333,7 @@ bool LLThread::runCondition(void)
 // Stop thread execution if requested until unpaused.
 void LLThread::checkPause()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    mDataLock->lock();

    // This is in a while loop because the pthread API allows for spurious wakeups.
@ -362,17 +365,20 @@ void LLThread::setQuitting()
 // static
 LLThread::id_t LLThread::currentID()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    return std::this_thread::get_id();
 }

 // static
 void LLThread::yield()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    std::this_thread::yield();
 }

 void LLThread::wake()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    mDataLock->lock();
    if(!shouldSleep())
    {
@ -383,6 +389,7 @@ void LLThread::wake()

 void LLThread::wakeLocked()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    if(!shouldSleep())
    {
        mRunCondition->signal();
@ -391,11 +398,13 @@ void LLThread::wakeLocked()

 void LLThread::lockData()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    mDataLock->lock();
 }

 void LLThread::unlockData()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD
    mDataLock->unlock();
 }

--- a/indra/llcommon/llthreadsafequeue.h
+++ b/indra/llcommon/llthreadsafequeue.h
@ -1,6 +1,6 @@
 /** 
 * @file llthreadsafequeue.h
- * @brief Base classes for thread, mutex and condition handling.
+ * @brief Queue protected with mutexes for cross-thread use
 *
 * $LicenseInfo:firstyear=2004&license=viewerlgpl$
 * Second Life Viewer Source Code
@ -27,16 +27,19 @@
 #ifndef LL_LLTHREADSAFEQUEUE_H
 #define LL_LLTHREADSAFEQUEUE_H

-#include "llexception.h"
-#include <deque>
-#include <string>
-#include <chrono>
-#include "mutex.h"
 #include "llcoros.h"
 #include LLCOROS_MUTEX_HEADER
 #include <boost/fiber/timed_mutex.hpp>
 #include LLCOROS_CONDVAR_HEADER
+#include "llexception.h"
+#include "mutex.h"
+#include <chrono>
+#include <queue>
+#include <string>

+/*****************************************************************************
+*   LLThreadSafeQueue
+*****************************************************************************/
 //
 // A general queue exception.
 //
@ -66,70 +69,116 @@ public:
 	}
 };

-//
-// Implements a thread safe FIFO.
-//
-template<typename ElementT>
+/**
+ * Implements a thread safe FIFO.
+ */
+// Let the default std::queue default to underlying std::deque. Override if
+// desired.
+template<typename ElementT, typename QueueT=std::queue<ElementT>>
 class LLThreadSafeQueue
 {
 public:
 	typedef ElementT value_type;
-	
-	// If the pool is set to NULL one will be allocated and managed by this
-	// queue.
+
+	// Limiting the number of pending items prevents unbounded growth of the
+	// underlying queue.
 	LLThreadSafeQueue(U32 capacity = 1024);
-	
-	// Add an element to the front of queue (will block if the queue has
-	// reached capacity).
+	virtual ~LLThreadSafeQueue() {}
+
+	// Add an element to the queue (will block if the queue has reached
+	// capacity).
 	//
 	// This call will raise an interrupt error if the queue is closed while
 	// the caller is blocked.
-	void pushFront(ElementT const & element);
-	
-	// Try to add an element to the front of queue without blocking. Returns
-	// true only if the element was actually added.
-	bool tryPushFront(ElementT const & element);
+	template <typename T>
+	void push(T&& element);
+	// legacy name
+	void pushFront(ElementT const & element) { return push(element); }

-	// Try to add an element to the front of queue, blocking if full but with
-	// timeout. Returns true if the element was added.
+	// Add an element to the queue (will block if the queue has reached
+	// capacity). Return false if the queue is closed before push is possible.
+	template <typename T>
+	bool pushIfOpen(T&& element);
+
+	// Try to add an element to the queue without blocking. Returns
+	// true only if the element was actually added.
+	template <typename T>
+	bool tryPush(T&& element);
+	// legacy name
+	bool tryPushFront(ElementT const & element) { return tryPush(element); }
+
+	// Try to add an element to the queue, blocking if full but with timeout
+	// after specified duration. Returns true if the element was added.
 	// There are potentially two different timeouts involved: how long to try
 	// to lock the mutex, versus how long to wait for the queue to stop being
 	// full. Careful settings for each timeout might be orders of magnitude
 	// apart. However, this method conflates them.
+	template <typename Rep, typename Period, typename T>
+	bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+					T&& element);
+	// legacy name
 	template <typename Rep, typename Period>
 	bool tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout,
-						 ElementT const & element);
+						 ElementT const & element) { return tryPushFor(timeout, element); }

-	// Pop the element at the end of the queue (will block if the queue is
+	// Try to add an element to the queue, blocking if full but with
+	// timeout at specified time_point. Returns true if the element was added.
+	template <typename Clock, typename Duration, typename T>
+	bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+					  T&& element);
+	// no legacy name because this is a newer method
+
+	// Pop the element at the head of the queue (will block if the queue is
 	// empty).
 	//
 	// This call will raise an interrupt error if the queue is closed while
 	// the caller is blocked.
-	ElementT popBack(void);
-	
-	// Pop an element from the end of the queue if there is one available.
+	ElementT pop(void);
+	// legacy name
+	ElementT popBack(void) { return pop(); }
+
+	// Pop an element from the head of the queue if there is one available.
 	// Returns true only if an element was popped.
-	bool tryPopBack(ElementT & element);
-	
+	bool tryPop(ElementT & element);
+	// legacy name
+	bool tryPopBack(ElementT & element) { return tryPop(element); }
+
+	// Pop the element at the head of the queue, blocking if empty, with
+	// timeout after specified duration. Returns true if an element was popped.
+	template <typename Rep, typename Period>
+	bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, ElementT& element);
+	// no legacy name because this is a newer method
+
+	// Pop the element at the head of the queue, blocking if empty, with
+	// timeout at specified time_point. Returns true if an element was popped.
+	template <typename Clock, typename Duration>
+	bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+					 ElementT& element);
+	// no legacy name because this is a newer method
+
 	// Returns the size of the queue.
 	size_t size();

+    //Returns the capacity of the queue.
+    U32 capacity() { return mCapacity; }
+
 	// closes the queue:
-	// - every subsequent pushFront() call will throw LLThreadSafeQueueInterrupt
-	// - every subsequent tryPushFront() call will return false
-	// - popBack() calls will return normally until the queue is drained, then
-	//   every subsequent popBack() will throw LLThreadSafeQueueInterrupt
-	// - tryPopBack() calls will return normally until the queue is drained,
-	//   then every subsequent tryPopBack() call will return false
+	// - every subsequent push() call will throw LLThreadSafeQueueInterrupt
+	// - every subsequent tryPush() call will return false
+	// - pop() calls will return normally until the queue is drained, then
+	//   every subsequent pop() will throw LLThreadSafeQueueInterrupt
+	// - tryPop() calls will return normally until the queue is drained,
+	//   then every subsequent tryPop() call will return false
 	void close();

-	// detect closed state
+	// producer end: are we prevented from pushing any additional items?
 	bool isClosed();
-	// inverse of isClosed()
-	explicit operator bool();
+	// consumer end: are we done, is the queue entirely drained?
+	bool done();

-private:
-	std::deque< ElementT > mStorage;
+protected:
+	typedef QueueT queue_type;
+	QueueT mStorage;
 	U32 mCapacity;
 	bool mClosed;

@ -137,37 +186,154 @@ private:
 	typedef std::unique_lock<decltype(mLock)> lock_t;
 	boost::fibers::condition_variable_any mCapacityCond;
 	boost::fibers::condition_variable_any mEmptyCond;
+
+	enum pop_result { EMPTY, DONE, WAITING, POPPED };
+	// implementation logic, suitable for passing to tryLockUntil()
+	template <typename Clock, typename Duration>
+	pop_result tryPopUntil_(lock_t& lock,
+							const std::chrono::time_point<Clock, Duration>& until,
+							ElementT& element);
+	// if we're able to lock immediately, do so and run the passed callable,
+	// which must accept lock_t& and return bool
+	template <typename CALLABLE>
+	bool tryLock(CALLABLE&& callable);
+	// if we're able to lock before the passed time_point, do so and run the
+	// passed callable, which must accept lock_t& and return bool
+	template <typename Clock, typename Duration, typename CALLABLE>
+	bool tryLockUntil(const std::chrono::time_point<Clock, Duration>& until,
+					  CALLABLE&& callable);
+	// while lock is locked, really push the passed element, if we can
+	template <typename T>
+	bool push_(lock_t& lock, T&& element);
+	// while lock is locked, really pop the head element, if we can
+	pop_result pop_(lock_t& lock, ElementT& element);
+	// Is the current head element ready to pop? We say yes; subclass can
+	// override as needed.
+	virtual bool canPop(const ElementT& head) const { return true; }
 };

-// LLThreadSafeQueue
-//-----------------------------------------------------------------------------
+/*****************************************************************************
+*   PriorityQueueAdapter
+*****************************************************************************/
+namespace LL
+{
+    /**
+     * std::priority_queue's API is almost like std::queue, intentionally of
+     * course, but you must access the element about to pop() as top() rather
+     * than as front(). Make an adapter for use with LLThreadSafeQueue.
+     */
+    template <typename T, typename Container=std::vector<T>,
+              typename Compare=std::less<typename Container::value_type>>
+    class PriorityQueueAdapter
+    {
+    public:
+        // publish all the same types
+        typedef std::priority_queue<T, Container, Compare> queue_type;
+        typedef typename queue_type::container_type  container_type;
+        typedef typename queue_type::value_compare   value_compare;
+        typedef typename queue_type::value_type      value_type;
+        typedef typename queue_type::size_type       size_type;
+        typedef typename queue_type::reference       reference;
+        typedef typename queue_type::const_reference const_reference;

-template<typename ElementT>
-LLThreadSafeQueue<ElementT>::LLThreadSafeQueue(U32 capacity) :
+        // Although std::queue defines both const and non-const front()
+        // methods, std::priority_queue defines only const top().
+        const_reference front() const { return mQ.top(); }
+        // std::priority_queue has no equivalent to back(), so it's good that
+        // LLThreadSafeQueue doesn't use it.
+
+        // All the rest of these merely forward to the corresponding
+        // queue_type methods.
+        bool empty() const                 { return mQ.empty(); }
+        size_type size() const             { return mQ.size(); }
+        void push(const value_type& value) { mQ.push(value); }
+        void push(value_type&& value)      { mQ.push(std::move(value)); }
+        template <typename... Args>
+        void emplace(Args&&... args)       { mQ.emplace(std::forward<Args>(args)...); }
+        void pop()                         { mQ.pop(); }
+
+    private:
+        queue_type mQ;
+    };
+} // namespace LL
+
+
+/*****************************************************************************
+*   LLThreadSafeQueue implementation
+*****************************************************************************/
+template<typename ElementT, typename QueueT>
+LLThreadSafeQueue<ElementT, QueueT>::LLThreadSafeQueue(U32 capacity) :
    mCapacity(capacity),
    mClosed(false)
 {
 }


-template<typename ElementT>
-void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element)
+// if we're able to lock immediately, do so and run the passed callable, which
+// must accept lock_t& and return bool
+template <typename ElementT, typename QueueT>
+template <typename CALLABLE>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryLock(CALLABLE&& callable)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    lock_t lock1(mLock, std::defer_lock);
+    if (!lock1.try_lock())
+        return false;
+
+    return std::forward<CALLABLE>(callable)(lock1);
+}
+
+
+// if we're able to lock before the passed time_point, do so and run the
+// passed callable, which must accept lock_t& and return bool
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration, typename CALLABLE>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryLockUntil(
+    const std::chrono::time_point<Clock, Duration>& until,
+    CALLABLE&& callable)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    lock_t lock1(mLock, std::defer_lock);
+    if (!lock1.try_lock_until(until))
+        return false;
+
+    return std::forward<CALLABLE>(callable)(lock1);
+}
+
+
+// while lock is locked, really push the passed element, if we can
+template <typename ElementT, typename QueueT>
+template <typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::push_(lock_t& lock, T&& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    if (mStorage.size() >= mCapacity)
+        return false;
+
+    mStorage.push(std::forward<T>(element));
+    lock.unlock();
+    // now that we've pushed, if somebody's been waiting to pop, signal them
+    mEmptyCond.notify_one();
+    return true;
+}
+
+
+template <typename ElementT, typename QueueT>
+template <typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::pushIfOpen(T&& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
    lock_t lock1(mLock);
    while (true)
    {
+        // On the producer side, it doesn't matter whether the queue has been
+        // drained or not: the moment either end calls close(), further push()
+        // operations will fail.
        if (mClosed)
-        {
-            LLTHROW(LLThreadSafeQueueInterrupt());
-        }
+            return false;

-        if (mStorage.size() < mCapacity)
-        {
-            mStorage.push_front(element);
-            lock1.unlock();
-            mEmptyCond.notify_one();
-            return;
-        }
+        if (push_(lock1, std::forward<T>(element)))
+            return true;

        // Storage Full. Wait for signal.
        mCapacityCond.wait(lock1);
@ -175,39 +341,205 @@ void LLThreadSafeQueue<ElementT>::pushFront(ElementT const & element)
 }


-template <typename ElementT>
-template <typename Rep, typename Period>
-bool LLThreadSafeQueue<ElementT>::tryPushFrontFor(const std::chrono::duration<Rep, Period>& timeout,
-                                                  ElementT const & element)
+template <typename ElementT, typename QueueT>
+template<typename T>
+void LLThreadSafeQueue<ElementT, QueueT>::push(T&& element)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    if (! pushIfOpen(std::forward<T>(element)))
+    {
+        LLTHROW(LLThreadSafeQueueInterrupt());
+    }
+}
+
+
+template<typename ElementT, typename QueueT>
+template<typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPush(T&& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    return tryLock(
+        [this, element=std::move(element)](lock_t& lock)
+        {
+            if (mClosed)
+                return false;
+            return push_(lock, std::move(element));
+        });
+}
+
+
+template <typename ElementT, typename QueueT>
+template <typename Rep, typename Period, typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPushFor(
+    const std::chrono::duration<Rep, Period>& timeout,
+    T&& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
    // Convert duration to time_point: passing the same timeout duration to
    // each of multiple calls is wrong.
-    auto endpoint = std::chrono::steady_clock::now() + timeout;
+    return tryPushUntil(std::chrono::steady_clock::now() + timeout,
+                        std::forward<T>(element));
+}

-    lock_t lock1(mLock, std::defer_lock);
-    if (!lock1.try_lock_until(endpoint))
-        return false;

+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration, typename T>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPushUntil(
+    const std::chrono::time_point<Clock, Duration>& until,
+    T&& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    return tryLockUntil(
+        until,
+        [this, until, element=std::move(element)](lock_t& lock)
+        {
+            while (true)
+            {
+                if (mClosed)
+                {
+                    return false;
+                }
+
+                if (push_(lock, std::move(element)))
+                    return true;
+
+                // Storage Full. Wait for signal.
+                if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock, until))
+                {
+                    // timed out -- formally we might recheck both conditions above
+                    return false;
+                }
+                // If we didn't time out, we were notified for some reason. Loop back
+                // to check.
+            }
+        });
+}
+
+
+// while lock is locked, really pop the head element, if we can
+template <typename ElementT, typename QueueT>
+typename LLThreadSafeQueue<ElementT, QueueT>::pop_result
+LLThreadSafeQueue<ElementT, QueueT>::pop_(lock_t& lock, ElementT& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    // If mStorage is empty, there's no head element.
+    if (mStorage.empty())
+        return mClosed? DONE : EMPTY;
+
+    // If there's a head element, pass it to canPop() to see if it's ready to pop. 
+    if (! canPop(mStorage.front()))
+        return WAITING;
+
+    // std::queue::front() is the element about to pop()
+    element = mStorage.front();
+    mStorage.pop();
+    lock.unlock();
+    // now that we've popped, if somebody's been waiting to push, signal them
+    mCapacityCond.notify_one();
+    return POPPED;
+}
+
+
+template<typename ElementT, typename QueueT>
+ElementT LLThreadSafeQueue<ElementT, QueueT>::pop(void)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    lock_t lock1(mLock);
+    ElementT value;
    while (true)
    {
-        if (mClosed)
+        // On the consumer side, we always try to pop before checking mClosed
+        // so we can finish draining the queue.
+        pop_result popped = pop_(lock1, value);
+        if (popped == POPPED)
+            return std::move(value);
+
+        // Once the queue is DONE, there will never be any more coming.
+        if (popped == DONE)
        {
-            return false;
+            LLTHROW(LLThreadSafeQueueInterrupt());
        }

-        if (mStorage.size() < mCapacity)
+        // If we didn't pop because WAITING, i.e. canPop() returned false,
+        // then even if the producer end has been closed, there's still at
+        // least one item to drain: wait for it. Or we might be EMPTY, with
+        // the queue still open. Either way, wait for signal.
+        mEmptyCond.wait(lock1);
+    }
+}
+
+
+template<typename ElementT, typename QueueT>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPop(ElementT & element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    return tryLock(
+        [this, &element](lock_t& lock)
        {
-            mStorage.push_front(element);
-            lock1.unlock();
-            mEmptyCond.notify_one();
-            return true;
+            // conflate EMPTY, DONE, WAITING: tryPop() behavior when the queue
+            // is closed is implemented by simple inability to push any new
+            // elements
+            return pop_(lock, element) == POPPED;
+        });
+}
+
+
+template <typename ElementT, typename QueueT>
+template <typename Rep, typename Period>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPopFor(
+    const std::chrono::duration<Rep, Period>& timeout,
+    ElementT& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    // Convert duration to time_point: passing the same timeout duration to
+    // each of multiple calls is wrong.
+    return tryPopUntil(std::chrono::steady_clock::now() + timeout, element);
+}
+
+
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration>
+bool LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil(
+    const std::chrono::time_point<Clock, Duration>& until,
+    ElementT& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    return tryLockUntil(
+        until,
+        [this, until, &element](lock_t& lock)
+        {
+            // conflate EMPTY, DONE, WAITING
+            return tryPopUntil_(lock, until, element) == POPPED;
+        });
+}
+
+
+// body of tryPopUntil(), called once we have the lock
+template <typename ElementT, typename QueueT>
+template <typename Clock, typename Duration>
+typename LLThreadSafeQueue<ElementT, QueueT>::pop_result
+LLThreadSafeQueue<ElementT, QueueT>::tryPopUntil_(
+    lock_t& lock,
+    const std::chrono::time_point<Clock, Duration>& until,
+    ElementT& element)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    while (true)
+    {
+        pop_result popped = pop_(lock, element);
+        if (popped == POPPED || popped == DONE)
+        {
+            // If we succeeded, great! If we've drained the last item, so be
+            // it. Either way, break the loop and tell caller.
+            return popped;
        }

-        // Storage Full. Wait for signal.
-        if (LLCoros::cv_status::timeout == mCapacityCond.wait_until(lock1, endpoint))
+        // EMPTY or WAITING: wait for signal.
+        if (LLCoros::cv_status::timeout == mEmptyCond.wait_until(lock, until))
        {
-            // timed out -- formally we might recheck both conditions above
-            return false;
+            // timed out -- formally we might recheck
+            // as it is, break loop
+            return popped;
        }
        // If we didn't time out, we were notified for some reason. Loop back
        // to check.
@ -215,102 +547,44 @@ bool LLThreadSafeQueue<ElementT>::tryPushFrontFor(const std::chrono::duration<Re
 }


-template<typename ElementT>
-bool LLThreadSafeQueue<ElementT>::tryPushFront(ElementT const & element)
-{
-    lock_t lock1(mLock, std::defer_lock);
-    if (!lock1.try_lock())
-        return false;
-
-    if (mClosed)
-        return false;
-
-    if (mStorage.size() >= mCapacity)
-        return false;
-
-    mStorage.push_front(element);
-    lock1.unlock();
-    mEmptyCond.notify_one();
-    return true;
-}
-
-
-template<typename ElementT>
-ElementT LLThreadSafeQueue<ElementT>::popBack(void)
-{
-    lock_t lock1(mLock);
-    while (true)
-    {
-        if (!mStorage.empty())
-        {
-            ElementT value = mStorage.back();
-            mStorage.pop_back();
-            lock1.unlock();
-            mCapacityCond.notify_one();
-            return value;
-        }
-
-        if (mClosed)
-        {
-            LLTHROW(LLThreadSafeQueueInterrupt());
-        }
-
-        // Storage empty. Wait for signal.
-        mEmptyCond.wait(lock1);
-    }
-}
-
-
-template<typename ElementT>
-bool LLThreadSafeQueue<ElementT>::tryPopBack(ElementT & element)
-{
-    lock_t lock1(mLock, std::defer_lock);
-    if (!lock1.try_lock())
-        return false;
-
-    // no need to check mClosed: tryPopBack() behavior when the queue is
-    // closed is implemented by simple inability to push any new elements
-    if (mStorage.empty())
-        return false;
-
-    element = mStorage.back();
-    mStorage.pop_back();
-    lock1.unlock();
-    mCapacityCond.notify_one();
-    return true;
-}
-
-
-template<typename ElementT>
-size_t LLThreadSafeQueue<ElementT>::size(void)
+template<typename ElementT, typename QueueT>
+size_t LLThreadSafeQueue<ElementT, QueueT>::size(void)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
    lock_t lock(mLock);
    return mStorage.size();
 }

-template<typename ElementT>
-void LLThreadSafeQueue<ElementT>::close()
+
+template<typename ElementT, typename QueueT>
+void LLThreadSafeQueue<ElementT, QueueT>::close()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
    lock_t lock(mLock);
    mClosed = true;
    lock.unlock();
-    // wake up any blocked popBack() calls
+    // wake up any blocked pop() calls
    mEmptyCond.notify_all();
-    // wake up any blocked pushFront() calls
+    // wake up any blocked push() calls
    mCapacityCond.notify_all();
 }

-template<typename ElementT>
-bool LLThreadSafeQueue<ElementT>::isClosed()
+
+template<typename ElementT, typename QueueT>
+bool LLThreadSafeQueue<ElementT, QueueT>::isClosed()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
    lock_t lock(mLock);
-    return mClosed && mStorage.size() == 0;
+    return mClosed;
 }

-template<typename ElementT>
-LLThreadSafeQueue<ElementT>::operator bool()
+
+template<typename ElementT, typename QueueT>
+bool LLThreadSafeQueue<ElementT, QueueT>::done()
 {
-    return ! isClosed();
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    lock_t lock(mLock);
+    return mClosed && mStorage.empty();
 }

 #endif
--- a/indra/llcommon/lltrace.cpp
+++ b/indra/llcommon/lltrace.cpp
@ -61,6 +61,7 @@ TimeBlockTreeNode::TimeBlockTreeNode()

 void TimeBlockTreeNode::setParent( BlockTimerStatHandle* parent )
 {
+    LL_PROFILE_ZONE_SCOPED;
 	llassert_always(parent != mBlock);
 	llassert_always(parent != NULL);

--- a/indra/llcommon/lltrace.h
+++ b/indra/llcommon/lltrace.h
@ -227,6 +227,7 @@ public:

 	void setName(const char* name)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		mName = name;
 		setKey(name);
 	}
@ -234,12 +235,14 @@ public:
 	/*virtual*/ const char* getUnitLabel() const { return "KB"; }

 	StatType<MemAccumulator::AllocationFacet>& allocations() 
-	{ 
+	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		return static_cast<StatType<MemAccumulator::AllocationFacet>&>(*(StatType<MemAccumulator>*)this);
 	}

 	StatType<MemAccumulator::DeallocationFacet>& deallocations() 
-	{ 
+	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		return static_cast<StatType<MemAccumulator::DeallocationFacet>&>(*(StatType<MemAccumulator>*)this);
 	}
 };
@ -261,6 +264,7 @@ struct MeasureMem<T, typename T::mem_trackable_tag_t, IS_BYTES>
 {
 	static size_t measureFootprint(const T& value)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		return sizeof(T) + value.getMemFootprint();
 	}
 };
@ -270,6 +274,7 @@ struct MeasureMem<T, IS_MEM_TRACKABLE, typename T::is_unit_t>
 {
 	static size_t measureFootprint(const T& value)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		return U32Bytes(value).value();
 	}
 };
@ -279,6 +284,7 @@ struct MeasureMem<T*, IS_MEM_TRACKABLE, IS_BYTES>
 {
 	static size_t measureFootprint(const T* value)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		if (!value)
 		{
 			return 0;
@ -323,6 +329,7 @@ struct MeasureMem<std::basic_string<T>, IS_MEM_TRACKABLE, IS_BYTES>
 {
 	static size_t measureFootprint(const std::basic_string<T>& value)
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		return value.capacity() * sizeof(T);
 	}
 };
@ -331,6 +338,7 @@ struct MeasureMem<std::basic_string<T>, IS_MEM_TRACKABLE, IS_BYTES>
 template<typename T>
 inline void claim_alloc(MemStatHandle& measurement, const T& value)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 #if LL_TRACE_ENABLED
 	S32 size = MeasureMem<T>::measureFootprint(value);
 	if(size == 0) return;
@ -343,6 +351,7 @@ inline void claim_alloc(MemStatHandle& measurement, const T& value)
 template<typename T>
 inline void disclaim_alloc(MemStatHandle& measurement, const T& value)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 #if LL_TRACE_ENABLED
 	S32 size = MeasureMem<T>::measureFootprint(value);
 	if(size == 0) return;
@ -352,141 +361,6 @@ inline void disclaim_alloc(MemStatHandle& measurement, const T& value)
 #endif
 }

-template<typename DERIVED, size_t ALIGNMENT = LL_DEFAULT_HEAP_ALIGN>
-class MemTrackableNonVirtual
-{
-public:
-	typedef void mem_trackable_tag_t;
-
-	MemTrackableNonVirtual(const char* name)
-#if LL_TRACE_ENABLED
-	:	mMemFootprint(0)
-#endif
-	{
-#if LL_TRACE_ENABLED
-		static bool name_initialized = false;
-		if (!name_initialized)
-		{
-			name_initialized = true;
-			sMemStat.setName(name);
-		}
-#endif
-	}
-
-#if LL_TRACE_ENABLED
-	~MemTrackableNonVirtual()
-	{
-		disclaimMem(mMemFootprint);
-	}
-
-	static MemStatHandle& getMemStatHandle()
-	{
-		return sMemStat;
-	}
-
-	S32 getMemFootprint() const	{ return mMemFootprint; }
-#endif
-
-	void* operator new(size_t size) 
-	{
-#if LL_TRACE_ENABLED
-		claim_alloc(sMemStat, size);
-#endif
-		return ll_aligned_malloc<ALIGNMENT>(size);
-	}
-
-	template<int CUSTOM_ALIGNMENT>
-	static void* aligned_new(size_t size)
-	{
-#if LL_TRACE_ENABLED
-		claim_alloc(sMemStat, size);
-#endif
-		return ll_aligned_malloc<CUSTOM_ALIGNMENT>(size);
-	}
-
-	void operator delete(void* ptr, size_t size)
-	{
-#if LL_TRACE_ENABLED
-		disclaim_alloc(sMemStat, size);
-#endif
-		ll_aligned_free<ALIGNMENT>(ptr);
-	}
-
-	template<int CUSTOM_ALIGNMENT>
-	static void aligned_delete(void* ptr, size_t size)
-	{
-#if LL_TRACE_ENABLED
-		disclaim_alloc(sMemStat, size);
-#endif
-		ll_aligned_free<CUSTOM_ALIGNMENT>(ptr);
-	}
-
-	void* operator new [](size_t size)
-	{
-#if LL_TRACE_ENABLED
-		claim_alloc(sMemStat, size);
-#endif
-		return ll_aligned_malloc<ALIGNMENT>(size);
-	}
-
-	void operator delete[](void* ptr, size_t size)
-	{
-#if LL_TRACE_ENABLED
-		disclaim_alloc(sMemStat, size);
-#endif
-		ll_aligned_free<ALIGNMENT>(ptr);
-	}
-
-	// claim memory associated with other objects/data as our own, adding to our calculated footprint
-	template<typename CLAIM_T>
-	void claimMem(const CLAIM_T& value) const
-	{
-#if LL_TRACE_ENABLED
-		S32 size = MeasureMem<CLAIM_T>::measureFootprint(value);
-		claim_alloc(sMemStat, size);
-		mMemFootprint += size;
-#endif
-	}
-
-	// remove memory we had claimed from our calculated footprint
-	template<typename CLAIM_T>
-	void disclaimMem(const CLAIM_T& value) const
-	{
-#if LL_TRACE_ENABLED
-		S32 size = MeasureMem<CLAIM_T>::measureFootprint(value);
-		disclaim_alloc(sMemStat, size);
-		mMemFootprint -= size;
-#endif
-	}
-
-private:
-#if LL_TRACE_ENABLED
-	// use signed values so that we can temporarily go negative
-	// and reconcile in destructor
-	// NB: this assumes that no single class is responsible for > 2GB of allocations
-	mutable S32 mMemFootprint;
-	
-	static	MemStatHandle	sMemStat;
-#endif
-
-};
-
-#if LL_TRACE_ENABLED
-template<typename DERIVED, size_t ALIGNMENT>
-MemStatHandle MemTrackableNonVirtual<DERIVED, ALIGNMENT>::sMemStat(typeid(MemTrackableNonVirtual<DERIVED, ALIGNMENT>).name());
-#endif
-
-template<typename DERIVED, size_t ALIGNMENT = LL_DEFAULT_HEAP_ALIGN>
-class MemTrackable : public MemTrackableNonVirtual<DERIVED, ALIGNMENT>
-{
-public:
-	MemTrackable(const char* name)
-	:	MemTrackableNonVirtual<DERIVED, ALIGNMENT>(name)
-	{}
-
-	virtual ~MemTrackable()
-	{}
-};
 }

 #endif // LL_LLTRACE_H
--- a/indra/llcommon/lltraceaccumulators.cpp
+++ b/indra/llcommon/lltraceaccumulators.cpp
@ -41,6 +41,7 @@ extern MemStatHandle gTraceMemStat;

 AccumulatorBufferGroup::AccumulatorBufferGroup() 
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	claim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator));
 	claim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator));
 	claim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator));
@ -55,6 +56,7 @@ AccumulatorBufferGroup::AccumulatorBufferGroup(const AccumulatorBufferGroup& oth
 	mStackTimers(other.mStackTimers),
 	mMemStats(other.mMemStats)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	claim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator));
 	claim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator));
 	claim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator));
@ -64,6 +66,7 @@ AccumulatorBufferGroup::AccumulatorBufferGroup(const AccumulatorBufferGroup& oth

 AccumulatorBufferGroup::~AccumulatorBufferGroup()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	disclaim_alloc(gTraceMemStat, mCounts.capacity() * sizeof(CountAccumulator));
 	disclaim_alloc(gTraceMemStat, mSamples.capacity() * sizeof(SampleAccumulator));
 	disclaim_alloc(gTraceMemStat, mEvents.capacity() * sizeof(EventAccumulator));
@ -73,6 +76,7 @@ AccumulatorBufferGroup::~AccumulatorBufferGroup()

 void AccumulatorBufferGroup::handOffTo(AccumulatorBufferGroup& other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	other.mCounts.reset(&mCounts);
 	other.mSamples.reset(&mSamples);
 	other.mEvents.reset(&mEvents);
@ -82,6 +86,7 @@ void AccumulatorBufferGroup::handOffTo(AccumulatorBufferGroup& other)

 void AccumulatorBufferGroup::makeCurrent()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mCounts.makeCurrent();
 	mSamples.makeCurrent();
 	mEvents.makeCurrent();
@ -104,6 +109,7 @@ void AccumulatorBufferGroup::makeCurrent()
 //static
 void AccumulatorBufferGroup::clearCurrent()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	AccumulatorBuffer<CountAccumulator>::clearCurrent();	
 	AccumulatorBuffer<SampleAccumulator>::clearCurrent();
 	AccumulatorBuffer<EventAccumulator>::clearCurrent();
@ -118,6 +124,7 @@ bool AccumulatorBufferGroup::isCurrent() const

 void AccumulatorBufferGroup::append( const AccumulatorBufferGroup& other )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mCounts.addSamples(other.mCounts, SEQUENTIAL);
 	mSamples.addSamples(other.mSamples, SEQUENTIAL);
 	mEvents.addSamples(other.mEvents, SEQUENTIAL);
@ -127,6 +134,7 @@ void AccumulatorBufferGroup::append( const AccumulatorBufferGroup& other )

 void AccumulatorBufferGroup::merge( const AccumulatorBufferGroup& other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mCounts.addSamples(other.mCounts, NON_SEQUENTIAL);
 	mSamples.addSamples(other.mSamples, NON_SEQUENTIAL);
 	mEvents.addSamples(other.mEvents, NON_SEQUENTIAL);
@ -137,6 +145,7 @@ void AccumulatorBufferGroup::merge( const AccumulatorBufferGroup& other)

 void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mCounts.reset(other ? &other->mCounts : NULL);
 	mSamples.reset(other ? &other->mSamples : NULL);
 	mEvents.reset(other ? &other->mEvents : NULL);
@ -146,6 +155,7 @@ void AccumulatorBufferGroup::reset(AccumulatorBufferGroup* other)

 void AccumulatorBufferGroup::sync()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	if (isCurrent())
 	{
 		F64SecondsImplicit time_stamp = LLTimer::getTotalSeconds();
@ -190,7 +200,7 @@ F64 SampleAccumulator::mergeSumsOfSquares(const SampleAccumulator& a, const Samp

 void SampleAccumulator::addSamples( const SampleAccumulator& other, EBufferAppendType append_type )
 {
-	if (append_type == NON_SEQUENTIAL)
+    if (append_type == NON_SEQUENTIAL)
 	{
 		return;
 	}
@ -289,7 +299,7 @@ void EventAccumulator::addSamples( const EventAccumulator& other, EBufferAppendT

 void EventAccumulator::reset( const EventAccumulator* other )
 {
-	mNumSamples = 0;
+    mNumSamples = 0;
 	mSum = 0;
 	mMin = F32(NaN);
 	mMax = F32(NaN);
--- a/indra/llcommon/lltraceaccumulators.h
+++ b/indra/llcommon/lltraceaccumulators.h
@ -66,6 +66,7 @@ namespace LLTrace
 			: mStorageSize(0),
 			mStorage(NULL)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			const AccumulatorBuffer& other = *getDefaultBuffer();
 			resize(sNextStorageSlot);
 			for (S32 i = 0; i < sNextStorageSlot; i++)
@ -76,6 +77,7 @@ namespace LLTrace

 		~AccumulatorBuffer()
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			if (isCurrent())
 			{
 				LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL);
@ -98,6 +100,7 @@ namespace LLTrace
 			: mStorageSize(0),
 			mStorage(NULL)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			resize(sNextStorageSlot);
 			for (S32 i = 0; i < sNextStorageSlot; i++)
 			{
@ -107,6 +110,7 @@ namespace LLTrace

 		void addSamples(const AccumulatorBuffer<ACCUMULATOR>& other, EBufferAppendType append_type)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize >= sNextStorageSlot);
 			for (size_t i = 0; i < sNextStorageSlot; i++)
 			{
@ -116,6 +120,7 @@ namespace LLTrace

 		void copyFrom(const AccumulatorBuffer<ACCUMULATOR>& other)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			llassert(mStorageSize >= sNextStorageSlot && other.mStorageSize >= sNextStorageSlot);
 			for (size_t i = 0; i < sNextStorageSlot; i++)
 			{
@ -125,6 +130,7 @@ namespace LLTrace

 		void reset(const AccumulatorBuffer<ACCUMULATOR>* other = NULL)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			llassert(mStorageSize >= sNextStorageSlot);
 			for (size_t i = 0; i < sNextStorageSlot; i++)
 			{
@ -134,6 +140,7 @@ namespace LLTrace

 		void sync(F64SecondsImplicit time_stamp)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			llassert(mStorageSize >= sNextStorageSlot);
 			for (size_t i = 0; i < sNextStorageSlot; i++)
 			{
@ -153,12 +160,13 @@ namespace LLTrace

 		static void clearCurrent()
 		{
-			LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL);
+            LLThreadLocalSingletonPointer<ACCUMULATOR>::setInstance(NULL);
 		}

 		// NOTE: this is not thread-safe.  We assume that slots are reserved in the main thread before any child threads are spawned
 		size_t reserveSlot()
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			size_t next_slot = sNextStorageSlot++;
 			if (next_slot >= mStorageSize)
 			{
@ -172,6 +180,7 @@ namespace LLTrace

 		void resize(size_t new_size)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			if (new_size <= mStorageSize) return;

 			ACCUMULATOR* old_storage = mStorage;
@ -212,6 +221,7 @@ namespace LLTrace

 		static self_t* getDefaultBuffer()
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			static bool sInitialized = false;
 			if (!sInitialized)
 			{
@ -326,6 +336,7 @@ namespace LLTrace

 		void sample(F64 value)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			F64SecondsImplicit time_stamp = LLTimer::getTotalSeconds();

 			// store effect of last value
@ -444,9 +455,9 @@ namespace LLTrace
 		S32	mNumSamples;
 	};

-	class TimeBlockAccumulator
+	class alignas(32) TimeBlockAccumulator
 	{
-	public:
+    public:
 		typedef F64Seconds value_t;
 		static F64Seconds getDefaultValue() { return F64Seconds(0); }

@ -539,6 +550,7 @@ namespace LLTrace

 		void addSamples(const MemAccumulator& other, EBufferAppendType append_type)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			mAllocations.addSamples(other.mAllocations, append_type);
 			mDeallocations.addSamples(other.mDeallocations, append_type);

@ -557,6 +569,7 @@ namespace LLTrace

 		void reset(const MemAccumulator* other)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			mSize.reset(other ? &other->mSize : NULL);
 			mAllocations.reset(other ? &other->mAllocations : NULL);
 			mDeallocations.reset(other ? &other->mDeallocations : NULL);
--- a/indra/llcommon/lltracerecording.cpp
+++ b/indra/llcommon/lltracerecording.cpp
@ -50,6 +50,7 @@ Recording::Recording(EPlayState state)
 :	mElapsedSeconds(0),
 	mActiveBuffers(NULL)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	claim_alloc(gTraceMemStat, this);
 	mBuffers = new AccumulatorBufferGroup();
 	claim_alloc(gTraceMemStat, mBuffers);
@ -59,12 +60,14 @@ Recording::Recording(EPlayState state)
 Recording::Recording( const Recording& other )
 :	mActiveBuffers(NULL)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	claim_alloc(gTraceMemStat, this);
 	*this = other;
 }

 Recording& Recording::operator = (const Recording& other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	// this will allow us to seamlessly start without affecting any data we've acquired from other
 	setPlayState(PAUSED);

@ -85,6 +88,7 @@ Recording& Recording::operator = (const Recording& other)

 Recording::~Recording()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	disclaim_alloc(gTraceMemStat, this);
 	disclaim_alloc(gTraceMemStat, mBuffers);

@ -103,6 +107,7 @@ void Recording::update()
 #if LL_TRACE_ENABLED
 	if (isStarted())
 	{
+        LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 		mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();

 		// must have 
@ -123,6 +128,7 @@ void Recording::update()

 void Recording::handleReset()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 #if LL_TRACE_ENABLED
 	mBuffers.write()->reset();

@ -133,6 +139,7 @@ void Recording::handleReset()

 void Recording::handleStart()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 #if LL_TRACE_ENABLED
 	mSamplingTimer.reset();
 	mBuffers.setStayUnique(true);
@ -144,6 +151,7 @@ void Recording::handleStart()

 void Recording::handleStop()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 #if LL_TRACE_ENABLED
 	mElapsedSeconds += mSamplingTimer.getElapsedTimeF64();
 	// must have thread recorder running on this thread
@ -273,7 +281,7 @@ F64Kilobytes Recording::getMean(const StatType<MemAccumulator>& stat)

 F64Kilobytes Recording::getMax(const StatType<MemAccumulator>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return F64Bytes(llmax(accumulator.mSize.getMax(), active_accumulator && active_accumulator->mSize.hasValue() ? active_accumulator->mSize.getMax() : F32_MIN));
@ -281,7 +289,7 @@ F64Kilobytes Recording::getMax(const StatType<MemAccumulator>& stat)

 F64Kilobytes Recording::getStandardDeviation(const StatType<MemAccumulator>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	if (active_accumulator && active_accumulator->hasValue())
@ -297,7 +305,7 @@ F64Kilobytes Recording::getStandardDeviation(const StatType<MemAccumulator>& sta

 F64Kilobytes Recording::getLastValue(const StatType<MemAccumulator>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return F64Bytes(active_accumulator ? active_accumulator->mSize.getLastValue() : accumulator.mSize.getLastValue());
@ -305,7 +313,7 @@ F64Kilobytes Recording::getLastValue(const StatType<MemAccumulator>& stat)

 bool Recording::hasValue(const StatType<MemAccumulator::AllocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return accumulator.mAllocations.hasValue() || (active_accumulator ? active_accumulator->mAllocations.hasValue() : false);
@ -313,7 +321,7 @@ bool Recording::hasValue(const StatType<MemAccumulator::AllocationFacet>& stat)

 F64Kilobytes Recording::getSum(const StatType<MemAccumulator::AllocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return F64Bytes(accumulator.mAllocations.getSum() + (active_accumulator ? active_accumulator->mAllocations.getSum() : 0));
@ -321,7 +329,7 @@ F64Kilobytes Recording::getSum(const StatType<MemAccumulator::AllocationFacet>&

 F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::AllocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return F64Bytes((accumulator.mAllocations.getSum() + (active_accumulator ? active_accumulator->mAllocations.getSum() : 0)) / mElapsedSeconds.value());
@ -329,7 +337,7 @@ F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::AllocationFacet

 S32 Recording::getSampleCount(const StatType<MemAccumulator::AllocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return accumulator.mAllocations.getSampleCount() + (active_accumulator ? active_accumulator->mAllocations.getSampleCount() : 0);
@ -337,7 +345,7 @@ S32 Recording::getSampleCount(const StatType<MemAccumulator::AllocationFacet>& s

 bool Recording::hasValue(const StatType<MemAccumulator::DeallocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return accumulator.mDeallocations.hasValue() || (active_accumulator ? active_accumulator->mDeallocations.hasValue() : false);
@ -346,7 +354,7 @@ bool Recording::hasValue(const StatType<MemAccumulator::DeallocationFacet>& stat

 F64Kilobytes Recording::getSum(const StatType<MemAccumulator::DeallocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return F64Bytes(accumulator.mDeallocations.getSum() + (active_accumulator ? active_accumulator->mDeallocations.getSum() : 0));
@ -354,7 +362,7 @@ F64Kilobytes Recording::getSum(const StatType<MemAccumulator::DeallocationFacet>

 F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::DeallocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return F64Bytes((accumulator.mDeallocations.getSum() + (active_accumulator ? active_accumulator->mDeallocations.getSum() : 0)) / mElapsedSeconds.value());
@ -362,7 +370,7 @@ F64Kilobytes Recording::getPerSec(const StatType<MemAccumulator::DeallocationFac

 S32 Recording::getSampleCount(const StatType<MemAccumulator::DeallocationFacet>& stat)
 {
-	update();
+    update();
 	const MemAccumulator& accumulator = mBuffers->mMemStats[stat.getIndex()];
 	const MemAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mMemStats[stat.getIndex()] : NULL;
 	return accumulator.mDeallocations.getSampleCount() + (active_accumulator ? active_accumulator->mDeallocations.getSampleCount() : 0);
@ -370,7 +378,7 @@ S32 Recording::getSampleCount(const StatType<MemAccumulator::DeallocationFacet>&

 bool Recording::hasValue(const StatType<CountAccumulator>& stat)
 {
-	update();
+    update();
 	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];
 	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;
 	return accumulator.hasValue() || (active_accumulator ? active_accumulator->hasValue() : false);
@ -378,7 +386,7 @@ bool Recording::hasValue(const StatType<CountAccumulator>& stat)

 F64 Recording::getSum(const StatType<CountAccumulator>& stat)
 {
-	update();
+    update();
 	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];
 	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;
 	return accumulator.getSum() + (active_accumulator ? active_accumulator->getSum() : 0);
@ -386,7 +394,7 @@ F64 Recording::getSum(const StatType<CountAccumulator>& stat)

 F64 Recording::getPerSec( const StatType<CountAccumulator>& stat )
 {
-	update();
+    update();
 	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];
 	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;
 	F64 sum = accumulator.getSum() + (active_accumulator ? active_accumulator->getSum() : 0);
@ -395,7 +403,7 @@ F64 Recording::getPerSec( const StatType<CountAccumulator>& stat )

 S32 Recording::getSampleCount( const StatType<CountAccumulator>& stat )
 {
-	update();
+    update();
 	const CountAccumulator& accumulator = mBuffers->mCounts[stat.getIndex()];
 	const CountAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mCounts[stat.getIndex()] : NULL;
 	return accumulator.getSampleCount() + (active_accumulator ? active_accumulator->getSampleCount() : 0);
@ -403,7 +411,7 @@ S32 Recording::getSampleCount( const StatType<CountAccumulator>& stat )

 bool Recording::hasValue(const StatType<SampleAccumulator>& stat)
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;
 	return accumulator.hasValue() || (active_accumulator && active_accumulator->hasValue());
@ -411,7 +419,7 @@ bool Recording::hasValue(const StatType<SampleAccumulator>& stat)

 F64 Recording::getMin( const StatType<SampleAccumulator>& stat )
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;
 	return llmin(accumulator.getMin(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMin() : F32_MAX);
@ -419,7 +427,7 @@ F64 Recording::getMin( const StatType<SampleAccumulator>& stat )

 F64 Recording::getMax( const StatType<SampleAccumulator>& stat )
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;
 	return llmax(accumulator.getMax(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMax() : F32_MIN);
@ -427,7 +435,7 @@ F64 Recording::getMax( const StatType<SampleAccumulator>& stat )

 F64 Recording::getMean( const StatType<SampleAccumulator>& stat )
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;
 	if (active_accumulator && active_accumulator->hasValue())
@ -448,7 +456,7 @@ F64 Recording::getMean( const StatType<SampleAccumulator>& stat )

 F64 Recording::getStandardDeviation( const StatType<SampleAccumulator>& stat )
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;

@ -465,7 +473,7 @@ F64 Recording::getStandardDeviation( const StatType<SampleAccumulator>& stat )

 F64 Recording::getLastValue( const StatType<SampleAccumulator>& stat )
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;
 	return (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getLastValue() : accumulator.getLastValue());
@ -473,7 +481,7 @@ F64 Recording::getLastValue( const StatType<SampleAccumulator>& stat )

 S32 Recording::getSampleCount( const StatType<SampleAccumulator>& stat )
 {
-	update();
+    update();
 	const SampleAccumulator& accumulator = mBuffers->mSamples[stat.getIndex()];
 	const SampleAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mSamples[stat.getIndex()] : NULL;
 	return accumulator.getSampleCount() + (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getSampleCount() : 0);
@ -481,7 +489,7 @@ S32 Recording::getSampleCount( const StatType<SampleAccumulator>& stat )

 bool Recording::hasValue(const StatType<EventAccumulator>& stat)
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	return accumulator.hasValue() || (active_accumulator && active_accumulator->hasValue());
@ -489,7 +497,7 @@ bool Recording::hasValue(const StatType<EventAccumulator>& stat)

 F64 Recording::getSum( const StatType<EventAccumulator>& stat)
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	return (F64)(accumulator.getSum() + (active_accumulator && active_accumulator->hasValue() ? active_accumulator->getSum() : 0));
@ -497,7 +505,7 @@ F64 Recording::getSum( const StatType<EventAccumulator>& stat)

 F64 Recording::getMin( const StatType<EventAccumulator>& stat )
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	return llmin(accumulator.getMin(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMin() : F32_MAX);
@ -505,7 +513,7 @@ F64 Recording::getMin( const StatType<EventAccumulator>& stat )

 F64 Recording::getMax( const StatType<EventAccumulator>& stat )
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	return llmax(accumulator.getMax(), active_accumulator && active_accumulator->hasValue() ? active_accumulator->getMax() : F32_MIN);
@ -513,7 +521,7 @@ F64 Recording::getMax( const StatType<EventAccumulator>& stat )

 F64 Recording::getMean( const StatType<EventAccumulator>& stat )
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	if (active_accumulator && active_accumulator->hasValue())
@ -534,7 +542,7 @@ F64 Recording::getMean( const StatType<EventAccumulator>& stat )

 F64 Recording::getStandardDeviation( const StatType<EventAccumulator>& stat )
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;

@ -551,7 +559,7 @@ F64 Recording::getStandardDeviation( const StatType<EventAccumulator>& stat )

 F64 Recording::getLastValue( const StatType<EventAccumulator>& stat )
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	return active_accumulator ? active_accumulator->getLastValue() : accumulator.getLastValue();
@ -559,7 +567,7 @@ F64 Recording::getLastValue( const StatType<EventAccumulator>& stat )

 S32 Recording::getSampleCount( const StatType<EventAccumulator>& stat )
 {
-	update();
+    update();
 	const EventAccumulator& accumulator = mBuffers->mEvents[stat.getIndex()];
 	const EventAccumulator* active_accumulator = mActiveBuffers ? &mActiveBuffers->mEvents[stat.getIndex()] : NULL;
 	return accumulator.getSampleCount() + (active_accumulator ? active_accumulator->getSampleCount() : 0);
@ -575,17 +583,20 @@ PeriodicRecording::PeriodicRecording( S32 num_periods, EPlayState state)
 	mNumRecordedPeriods(0),
 	mRecordingPeriods(num_periods ? num_periods : 1)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	setPlayState(state);
 	claim_alloc(gTraceMemStat, this);
 }

 PeriodicRecording::~PeriodicRecording()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	disclaim_alloc(gTraceMemStat, this);
 }

 void PeriodicRecording::nextPeriod()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	if (mAutoResize)
 	{
 		mRecordingPeriods.push_back(Recording());
@ -600,6 +611,7 @@ void PeriodicRecording::nextPeriod()

 void PeriodicRecording::appendRecording(Recording& recording)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	getCurRecording().appendRecording(recording);
 	nextPeriod();
 }
@ -607,6 +619,7 @@ void PeriodicRecording::appendRecording(Recording& recording)

 void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	if (other.mRecordingPeriods.empty()) return;

 	getCurRecording().update();
@ -680,6 +693,7 @@ void PeriodicRecording::appendPeriodicRecording( PeriodicRecording& other )

 F64Seconds PeriodicRecording::getDuration() const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	F64Seconds duration;
 	S32 num_periods = mRecordingPeriods.size();
 	for (S32 i = 1; i <= num_periods; i++)
@ -693,6 +707,7 @@ F64Seconds PeriodicRecording::getDuration() const

 LLTrace::Recording PeriodicRecording::snapshotCurRecording() const
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	Recording recording_copy(getCurRecording());
 	recording_copy.stop();
 	return recording_copy;
@ -735,16 +750,19 @@ const Recording& PeriodicRecording::getPrevRecording( S32 offset ) const

 void PeriodicRecording::handleStart()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	getCurRecording().start();
 }

 void PeriodicRecording::handleStop()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	getCurRecording().pause();
 }

 void PeriodicRecording::handleReset()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	getCurRecording().stop();

 	if (mAutoResize)
@ -768,11 +786,13 @@ void PeriodicRecording::handleReset()

 void PeriodicRecording::handleSplitTo(PeriodicRecording& other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	getCurRecording().splitTo(other.getCurRecording());
 }

 F64 PeriodicRecording::getPeriodMin( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	bool has_value = false;
@ -794,6 +814,7 @@ F64 PeriodicRecording::getPeriodMin( const StatType<EventAccumulator>& stat, S32

 F64 PeriodicRecording::getPeriodMax( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	bool has_value = false;
@ -816,6 +837,7 @@ F64 PeriodicRecording::getPeriodMax( const StatType<EventAccumulator>& stat, S32
 // calculates means using aggregates per period
 F64 PeriodicRecording::getPeriodMean( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64 mean = 0;
@ -836,9 +858,9 @@ F64 PeriodicRecording::getPeriodMean( const StatType<EventAccumulator>& stat, S3
 			: NaN;
 }

-
 F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<EventAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64 period_mean = getPeriodMean(stat, num_periods);
@ -863,6 +885,7 @@ F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<EventAccumulat

 F64 PeriodicRecording::getPeriodMin( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	bool has_value = false;
@ -884,6 +907,7 @@ F64 PeriodicRecording::getPeriodMin( const StatType<SampleAccumulator>& stat, S3

 F64 PeriodicRecording::getPeriodMax(const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	bool has_value = false;
@ -906,6 +930,7 @@ F64 PeriodicRecording::getPeriodMax(const StatType<SampleAccumulator>& stat, S32

 F64 PeriodicRecording::getPeriodMean( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	S32 valid_period_count = 0;
@ -926,8 +951,35 @@ F64 PeriodicRecording::getPeriodMean( const StatType<SampleAccumulator>& stat, S
 			: NaN;
 }

+F64 PeriodicRecording::getPeriodMedian( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
+	num_periods = llmin(num_periods, getNumRecordedPeriods());
+
+	std::vector<F64> buf;
+	for (S32 i = 1; i <= num_periods; i++)
+	{
+		Recording& recording = getPrevRecording(i);
+		if (recording.getDuration() > (F32Seconds)0.f)
+		{
+			if (recording.hasValue(stat))
+			{
+				buf.push_back(recording.getMean(stat));
+			}
+		}
+	}
+	if (buf.size()==0)
+	{
+		return 0.0f;
+	}
+	std::sort(buf.begin(), buf.end());
+
+	return F64((buf.size() % 2 == 0) ? (buf[buf.size() / 2 - 1] + buf[buf.size() / 2]) / 2 : buf[buf.size() / 2]);
+}
+
 F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<SampleAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64 period_mean = getPeriodMean(stat, num_periods);
@ -953,6 +1005,7 @@ F64 PeriodicRecording::getPeriodStandardDeviation( const StatType<SampleAccumula

 F64Kilobytes PeriodicRecording::getPeriodMin( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64Kilobytes min_val(std::numeric_limits<F64>::max());
@ -972,6 +1025,7 @@ F64Kilobytes PeriodicRecording::getPeriodMin(const MemStatHandle& stat, S32 num_

 F64Kilobytes PeriodicRecording::getPeriodMax(const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64Kilobytes max_val(0.0);
@ -991,6 +1045,7 @@ F64Kilobytes PeriodicRecording::getPeriodMax(const MemStatHandle& stat, S32 num_

 F64Kilobytes PeriodicRecording::getPeriodMean( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64Kilobytes mean(0);
@ -1011,6 +1066,7 @@ F64Kilobytes PeriodicRecording::getPeriodMean(const MemStatHandle& stat, S32 num

 F64Kilobytes PeriodicRecording::getPeriodStandardDeviation( const StatType<MemAccumulator>& stat, S32 num_periods /*= S32_MAX*/ )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	num_periods = llmin(num_periods, getNumRecordedPeriods());

 	F64Kilobytes period_mean = getPeriodMean(stat, num_periods);
@ -1044,6 +1100,7 @@ F64Kilobytes PeriodicRecording::getPeriodStandardDeviation(const MemStatHandle&

 void ExtendableRecording::extend()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	// push the data back to accepted recording
 	mAcceptedRecording.appendRecording(mPotentialRecording);
 	// flush data, so we can start from scratch
@ -1052,22 +1109,26 @@ void ExtendableRecording::extend()

 void ExtendableRecording::handleStart()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mPotentialRecording.start();
 }

 void ExtendableRecording::handleStop()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mPotentialRecording.pause();
 }

 void ExtendableRecording::handleReset()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mAcceptedRecording.reset();
 	mPotentialRecording.reset();
 }

 void ExtendableRecording::handleSplitTo(ExtendableRecording& other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mPotentialRecording.splitTo(other.mPotentialRecording);
 }

@ -1084,6 +1145,7 @@ ExtendablePeriodicRecording::ExtendablePeriodicRecording()

 void ExtendablePeriodicRecording::extend()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	// push the data back to accepted recording
 	mAcceptedRecording.appendPeriodicRecording(mPotentialRecording);
 	// flush data, so we can start from scratch
@ -1093,22 +1155,26 @@ void ExtendablePeriodicRecording::extend()

 void ExtendablePeriodicRecording::handleStart()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mPotentialRecording.start();
 }

 void ExtendablePeriodicRecording::handleStop()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mPotentialRecording.pause();
 }

 void ExtendablePeriodicRecording::handleReset()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mAcceptedRecording.reset();
 	mPotentialRecording.reset();
 }

 void ExtendablePeriodicRecording::handleSplitTo(ExtendablePeriodicRecording& other)
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	mPotentialRecording.splitTo(other.mPotentialRecording);
 }

@ -1123,6 +1189,7 @@ PeriodicRecording& get_frame_recording()

 void LLStopWatchControlsMixinCommon::start()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch (mPlayState)
 	{
 	case STOPPED:
@ -1144,6 +1211,7 @@ void LLStopWatchControlsMixinCommon::start()

 void LLStopWatchControlsMixinCommon::stop()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch (mPlayState)
 	{
 	case STOPPED:
@ -1163,6 +1231,7 @@ void LLStopWatchControlsMixinCommon::stop()

 void LLStopWatchControlsMixinCommon::pause()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch (mPlayState)
 	{
 	case STOPPED:
@ -1182,6 +1251,7 @@ void LLStopWatchControlsMixinCommon::pause()

 void LLStopWatchControlsMixinCommon::unpause()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch (mPlayState)
 	{
 	case STOPPED:
@ -1201,6 +1271,7 @@ void LLStopWatchControlsMixinCommon::unpause()

 void LLStopWatchControlsMixinCommon::resume()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch (mPlayState)
 	{
 	case STOPPED:
@ -1221,6 +1292,7 @@ void LLStopWatchControlsMixinCommon::resume()

 void LLStopWatchControlsMixinCommon::restart()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch (mPlayState)
 	{
 	case STOPPED:
@ -1244,11 +1316,13 @@ void LLStopWatchControlsMixinCommon::restart()

 void LLStopWatchControlsMixinCommon::reset()
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	handleReset();
 }

 void LLStopWatchControlsMixinCommon::setPlayState( EPlayState state )
 {
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	switch(state)
 	{
 	case STOPPED:
--- a/indra/llcommon/lltracerecording.h
+++ b/indra/llcommon/lltracerecording.h
@ -355,6 +355,7 @@ namespace LLTrace
 		template <typename T>
 		S32 getSampleCount(const StatType<T>& stat, S32 num_periods = S32_MAX)
        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

            S32 num_samples = 0;
@ -374,6 +375,7 @@ namespace LLTrace
 		template <typename T>
 		typename T::value_t getPeriodMin(const StatType<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

 			bool has_value = false;
@ -396,6 +398,7 @@ namespace LLTrace
 		template<typename T>
 		T getPeriodMin(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return T(getPeriodMin(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
 		}

@ -403,6 +406,7 @@ namespace LLTrace
 		template<typename T>
 		T getPeriodMin(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return T(getPeriodMin(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));
 		}

@ -410,6 +414,7 @@ namespace LLTrace
 		template<typename T>
 		T getPeriodMin(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return T(getPeriodMin(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));
 		}

@ -419,6 +424,7 @@ namespace LLTrace
 		template <typename T>
 		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMinPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

 			typename RelatedTypes<typename T::value_t>::fractional_t min_val(std::numeric_limits<F64>::max());
@ -433,6 +439,7 @@ namespace LLTrace
 		template<typename T>
 		typename RelatedTypes<T>::fractional_t getPeriodMinPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodMinPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
 		}

@ -444,6 +451,7 @@ namespace LLTrace
 		template <typename T>
 		typename T::value_t getPeriodMax(const StatType<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

 			bool has_value = false;
@ -466,6 +474,7 @@ namespace LLTrace
 		template<typename T>
 		T getPeriodMax(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return T(getPeriodMax(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
 		}

@ -473,6 +482,7 @@ namespace LLTrace
 		template<typename T>
 		T getPeriodMax(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return T(getPeriodMax(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));
 		}

@ -480,6 +490,7 @@ namespace LLTrace
 		template<typename T>
 		T getPeriodMax(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return T(getPeriodMax(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));
 		}

@ -489,6 +500,7 @@ namespace LLTrace
 		template <typename T>
 		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMaxPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

 			F64 max_val = std::numeric_limits<F64>::min();
@ -503,6 +515,7 @@ namespace LLTrace
 		template<typename T>
 		typename RelatedTypes<T>::fractional_t getPeriodMaxPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodMaxPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
 		}

@ -514,6 +527,7 @@ namespace LLTrace
 		template <typename T>
 		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMean(const StatType<T >& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

 			typename RelatedTypes<typename T::value_t>::fractional_t mean(0);
@ -534,12 +548,14 @@ namespace LLTrace
 		template<typename T>
 		typename RelatedTypes<T>::fractional_t getPeriodMean(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
 		}
 		F64 getPeriodMean(const StatType<SampleAccumulator>& stat, S32 num_periods = S32_MAX);
 		template<typename T> 
 		typename RelatedTypes<T>::fractional_t getPeriodMean(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));
 		}

@ -547,6 +563,7 @@ namespace LLTrace
 		template<typename T>
 		typename RelatedTypes<T>::fractional_t getPeriodMean(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodMean(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));
 		}

@ -556,6 +573,7 @@ namespace LLTrace
 		template <typename T>
 		typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMeanPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			num_periods = llmin(num_periods, getNumRecordedPeriods());

 			typename RelatedTypes<typename T::value_t>::fractional_t mean = 0;
@ -577,9 +595,39 @@ namespace LLTrace
 		template<typename T>
 		typename RelatedTypes<T>::fractional_t getPeriodMeanPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodMeanPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
 		}

+        F64 getPeriodMedian( const StatType<SampleAccumulator>& stat, S32 num_periods = S32_MAX);
+
+        template <typename T>
+        typename RelatedTypes<typename T::value_t>::fractional_t getPeriodMedianPerSec(const StatType<T>& stat, S32 num_periods = S32_MAX)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
+            num_periods = llmin(num_periods, getNumRecordedPeriods());
+
+            std::vector <typename RelatedTypes<typename T::value_t>::fractional_t> buf;
+            for (S32 i = 1; i <= num_periods; i++)
+            {
+                Recording& recording = getPrevRecording(i);
+                if (recording.getDuration() > (F32Seconds)0.f)
+                {
+                    buf.push_back(recording.getPerSec(stat));
+                }
+            }
+            std::sort(buf.begin(), buf.end());
+
+            return typename RelatedTypes<T>::fractional_t((buf.size() % 2 == 0) ? (buf[buf.size() / 2 - 1] + buf[buf.size() / 2]) / 2 : buf[buf.size() / 2]);
+        }
+
+        template<typename T>
+        typename RelatedTypes<T>::fractional_t getPeriodMedianPerSec(const CountStatHandle<T>& stat, S32 num_periods = S32_MAX)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
+            return typename RelatedTypes<T>::fractional_t(getPeriodMedianPerSec(static_cast<const StatType<CountAccumulator>&>(stat), num_periods));
+        }
+
 		//
 		// PERIODIC STANDARD DEVIATION
 		//
@ -589,6 +637,7 @@ namespace LLTrace
 		template<typename T> 
 		typename RelatedTypes<T>::fractional_t getPeriodStandardDeviation(const SampleStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodStandardDeviation(static_cast<const StatType<SampleAccumulator>&>(stat), num_periods));
 		}

@ -596,6 +645,7 @@ namespace LLTrace
 		template<typename T>
 		typename RelatedTypes<T>::fractional_t getPeriodStandardDeviation(const EventStatHandle<T>& stat, S32 num_periods = S32_MAX)
 		{
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 			return typename RelatedTypes<T>::fractional_t(getPeriodStandardDeviation(static_cast<const StatType<EventAccumulator>&>(stat), num_periods));
 		}

--- a/indra/llcommon/lltracethreadrecorder.cpp
+++ b/indra/llcommon/lltracethreadrecorder.cpp
@ -274,12 +274,10 @@ void ThreadRecorder::pushToParent()
 }


-static LLTrace::BlockTimerStatHandle FTM_PULL_TRACE_DATA_FROM_CHILDREN("Pull child thread trace data");
-
 void ThreadRecorder::pullFromChildren()
 {
 #if LL_TRACE_ENABLED
-	LL_RECORD_BLOCK_TIME(FTM_PULL_TRACE_DATA_FROM_CHILDREN);
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_STATS;
 	if (mActiveRecordings.empty()) return;

 	{ LLMutexLock lock(&mChildListMutex);
--- a/indra/llcommon/lluuid.h
+++ b/indra/llcommon/lluuid.h
@ -184,6 +184,17 @@ struct boost::hash<LLUUID>
    }
 };

+// Adapt boost hash to std hash
+namespace std
+{
+    template<> struct hash<LLUUID>
+    {
+        std::size_t operator()(LLUUID const& s) const noexcept
+        {
+            return boost::hash<LLUUID>()(s);
+        }
+    };
+}
 #endif


--- a/indra/llcommon/stdtypes.h
+++ b/indra/llcommon/stdtypes.h
@ -42,10 +42,17 @@ typedef unsigned int			U32;
 // Windows wchar_t is 16-bit, whichever way /Zc:wchar_t is set. In effect,
 // Windows wchar_t is always a typedef, either for unsigned short or __wchar_t.
 // (__wchar_t, available either way, is Microsoft's native 2-byte wchar_t type.)
+// The version of clang available with VS 2019 also defines wchar_t as __wchar_t
+// which is also 16 bits.
 // In any case, llwchar should be a UTF-32 type.
 typedef U32				llwchar;
 #else
 typedef wchar_t				llwchar;
+// What we'd actually want is a simple module-scope 'if constexpr' to test
+// std::is_same<wchar_t, llwchar>::value and use that to define, or not
+// define, string conversion specializations. Since we don't have that, we'll
+// have to rely on #if instead. Sorry, Dr. Stroustrup.
+#define LLWCHAR_IS_WCHAR_T 1
 #endif

 #if LL_WINDOWS
--- a/indra/llcommon/stringize.h
+++ b/indra/llcommon/stringize.h
@ -31,58 +31,109 @@

 #include <sstream>
 #include <llstring.h>
+#include <boost/call_traits.hpp>

 /**
- * gstringize(item) encapsulates an idiom we use constantly, using
- * operator<<(std::ostringstream&, TYPE) followed by std::ostringstream::str()
- * or their wstring equivalents
- * to render a string expressing some item.
+ * stream_to(std::ostream&, items, ...) streams each item in the parameter list
+ * to the passed std::ostream using the insertion operator <<. This can be
+ * used, for instance, to make a simple print() function, e.g.:
+ *
+ * @code
+ * template <typename... Items>
+ * void print(Items&&... items)
+ * {
+ *     stream_to(std::cout, std::forward<Items>(items)...);
+ * }
+ * @endcode
 */
-template <typename CHARTYPE, typename T>
-std::basic_string<CHARTYPE> gstringize(const T& item)
+// recursion tail
+template <typename CHARTYPE>
+void stream_to(std::basic_ostream<CHARTYPE>& out) {}
+// stream one or more items
+template <typename CHARTYPE, typename T, typename... Items>
+void stream_to(std::basic_ostream<CHARTYPE>& out, T&& item, Items&&... items)
+{
+    out << std::forward<T>(item);
+    stream_to(out, std::forward<Items>(items)...);
+}
+
+// why we use function overloads, not function template specializations:
+// http://www.gotw.ca/publications/mill17.htm
+
+/**
+ * gstringize(item, ...) encapsulates an idiom we use constantly, using
+ * operator<<(std::ostringstream&, TYPE) followed by std::ostringstream::str()
+ * or their wstring equivalents to render a string expressing one or more items.
+ */
+// two or more args - the case of a single argument is handled separately
+template <typename CHARTYPE, typename T0, typename T1, typename... Items>
+auto gstringize(T0&& item0, T1&& item1, Items&&... items)
 {
    std::basic_ostringstream<CHARTYPE> out;
-    out << item;
+    stream_to(out, std::forward<T0>(item0), std::forward<T1>(item1),
+              std::forward<Items>(items)...);
    return out.str();
 }

-/**
- *partial specialization of stringize for handling wstring
- *TODO: we should have similar specializations for wchar_t[] but not until it is needed.
- */
-inline std::string stringize(const std::wstring& item)
+// generic single argument: stream to out, as above
+template <typename CHARTYPE, typename T>
+struct gstringize_impl
 {
-    return wstring_to_utf8str(item);
+    auto operator()(typename boost::call_traits<T>::param_type arg)
+    {
+        std::basic_ostringstream<CHARTYPE> out;
+        out << arg;
+        return out.str();
+    }
+};
+
+// partially specialize for a single STRING argument -
+// note that ll_convert<T>(T) already handles the trivial case
+template <typename OUTCHAR, typename INCHAR>
+struct gstringize_impl<OUTCHAR, std::basic_string<INCHAR>>
+{
+    auto operator()(const std::basic_string<INCHAR>& arg)
+    {
+        return ll_convert<std::basic_string<OUTCHAR>>(arg);
+    }
+};
+
+// partially specialize for a single CHARTYPE* argument -
+// since it's not a basic_string and we do want to optimize this common case
+template <typename OUTCHAR, typename INCHAR>
+struct gstringize_impl<OUTCHAR, INCHAR*>
+{
+    auto operator()(const INCHAR* arg)
+    {
+        return ll_convert<std::basic_string<OUTCHAR>>(arg);
+    }
+};
+
+// gstringize(single argument)
+template <typename CHARTYPE, typename T>
+auto gstringize(T&& item)
+{
+    // use decay<T> so we don't require separate specializations
+    // for T, const T, T&, const T& ...
+    return gstringize_impl<CHARTYPE, std::decay_t<T>>()(std::forward<T>(item));
 }

 /**
 * Specialization of gstringize for std::string return types
 */
-template <typename T>
-std::string stringize(const T& item)
+template <typename... Items>
+auto stringize(Items&&... items)
 {
-    return gstringize<char>(item);
-}
-
-/**
- * Specialization for generating wstring from string.
- * Both a convenience function and saves a miniscule amount of overhead.
- */
-inline std::wstring wstringize(const std::string& item)
-{
-    // utf8str_to_wstring() returns LLWString, which isn't necessarily the
-    // same as std::wstring
-    LLWString s(utf8str_to_wstring(item));
-    return std::wstring(s.begin(), s.end());
+    return gstringize<char>(std::forward<Items>(items)...);
 }

 /**
 * Specialization of gstringize for std::wstring return types
 */
-template <typename T>
-std::wstring wstringize(const T& item)
+template <typename... Items>
+auto wstringize(Items&&... items)
 {
-    return gstringize<wchar_t>(item);
+    return gstringize<wchar_t>(std::forward<Items>(items)...);
 }

 /**
@ -146,11 +197,9 @@ void destringize_f(std::basic_string<CHARTYPE> const & str, Functor const & f)
 * std::istringstream in(str);
 * in >> item1 >> item2 >> item3 ... ;
 * @endcode
- * @NOTE - once we get generic lambdas, we shouldn't need DEWSTRINGIZE() any
- * more since DESTRINGIZE() should do the right thing with a std::wstring. But
- * until then, the lambda we pass must accept the right std::basic_istream.
 */
-#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::istream& in){in >> EXPRESSION;}))
-#define DEWSTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](std::wistream& in){in >> EXPRESSION;}))
+#define DESTRINGIZE(STR, EXPRESSION) (destringize_f((STR), [&](auto& in){in >> EXPRESSION;}))
+// legacy name, just use DESTRINGIZE() going forward
+#define DEWSTRINGIZE(STR, EXPRESSION) DESTRINGIZE(STR, EXPRESSION)

 #endif /* ! defined(LL_STRINGIZE_H) */
--- a/indra/llcommon/tests/llinstancetracker_test.cpp
+++ b/indra/llcommon/tests/llinstancetracker_test.cpp
@ -90,19 +90,19 @@ namespace tut
        {
            Keyed one("one");
            ensure_equals(Keyed::instanceCount(), 1);
-            Keyed* found = Keyed::getInstance("one");
-            ensure("couldn't find stack Keyed", found);
-            ensure_equals("found wrong Keyed instance", found, &one);
+            auto found = Keyed::getInstance("one");
+            ensure("couldn't find stack Keyed", bool(found));
+            ensure_equals("found wrong Keyed instance", found.get(), &one);
            {
                boost::scoped_ptr<Keyed> two(new Keyed("two"));
                ensure_equals(Keyed::instanceCount(), 2);
-                Keyed* found = Keyed::getInstance("two");
-                ensure("couldn't find heap Keyed", found);
-                ensure_equals("found wrong Keyed instance", found, two.get());
+                auto found = Keyed::getInstance("two");
+                ensure("couldn't find heap Keyed", bool(found));
+                ensure_equals("found wrong Keyed instance", found.get(), two.get());
            }
            ensure_equals(Keyed::instanceCount(), 1);
        }
-        Keyed* found = Keyed::getInstance("one");
+        auto found = Keyed::getInstance("one");
        ensure("Keyed key lives too long", ! found);
        ensure_equals(Keyed::instanceCount(), 0);
    }
--- a/indra/llcommon/tests/llprocess_test.cpp
+++ b/indra/llcommon/tests/llprocess_test.cpp
@ -356,14 +356,15 @@ namespace tut

        // Create a script file in a temporary place.
        NamedTempFile script("py",
+			"from __future__ import print_function" EOL
            "import sys" EOL
            "import time" EOL
            EOL
            "time.sleep(2)" EOL
-            "print('stdout after wait', file=sys.stdout)" EOL
+            "print('stdout after wait',file=sys.stdout)" EOL
            "sys.stdout.flush()" EOL
            "time.sleep(2)" EOL
-            "print('stderr after wait', file=sys.stderr)" EOL
+            "print('stderr after wait',file=sys.stderr)" EOL
            "sys.stderr.flush()" EOL
            );

@ -572,12 +573,12 @@ namespace tut
    {
        set_test_name("arguments");
        PythonProcessLauncher py(get_test_name(),
-                                 "from __future__ import with_statement\n"
+                                 "from __future__ import with_statement, print_function\n"
                                 "import sys\n"
                                 // note nonstandard output-file arg!
                                 "with open(sys.argv[3], 'w') as f:\n"
                                 "    for arg in sys.argv[1:]:\n"
-                                 "        print(arg, file=f)\n");
+                                 "        print(arg,file=f)\n");
        // We expect that PythonProcessLauncher has already appended
        // its own NamedTempFile to mParams.args (sys.argv[0]).
        py.mParams.args.add("first arg");          // sys.argv[1]
@ -861,6 +862,7 @@ namespace tut
        set_test_name("'bogus' test");
        CaptureLog recorder;
        PythonProcessLauncher py(get_test_name(),
+                                 "from __future__ import print_function\n"
                                 "print('Hello world')\n");
        py.mParams.files.add(LLProcess::FileParam("bogus"));
        py.mPy = LLProcess::create(py.mParams);
@ -876,6 +878,7 @@ namespace tut
        // Replace this test with one or more real 'file' tests when we
        // implement 'file' support
        PythonProcessLauncher py(get_test_name(),
+                                 "from __future__ import print_function\n"
                                 "print('Hello world')\n");
        py.mParams.files.add(LLProcess::FileParam());
        py.mParams.files.add(LLProcess::FileParam("file"));
@ -891,6 +894,7 @@ namespace tut
        // implement 'tpipe' support
        CaptureLog recorder;
        PythonProcessLauncher py(get_test_name(),
+                                 "from __future__ import print_function\n"
                                 "print('Hello world')\n");
        py.mParams.files.add(LLProcess::FileParam());
        py.mParams.files.add(LLProcess::FileParam("tpipe"));
@ -908,6 +912,7 @@ namespace tut
        // implement 'npipe' support
        CaptureLog recorder;
        PythonProcessLauncher py(get_test_name(),
+                                 "from __future__ import print_function\n"
                                 "print('Hello world')\n");
        py.mParams.files.add(LLProcess::FileParam());
        py.mParams.files.add(LLProcess::FileParam());
@ -984,7 +989,8 @@ namespace tut
    {
        set_test_name("get*Pipe() validation");
        PythonProcessLauncher py(get_test_name(),
-                                 "print('this output is expected)'\n");
+                                 "from __future__ import print_function\n"
+                                 "print('this output is expected')\n");
        py.mParams.files.add(LLProcess::FileParam("pipe")); // pipe for  stdin
        py.mParams.files.add(LLProcess::FileParam());       // inherit stdout
        py.mParams.files.add(LLProcess::FileParam("pipe")); // pipe for stderr
@ -1004,6 +1010,7 @@ namespace tut
    {
        set_test_name("talk to stdin/stdout");
        PythonProcessLauncher py(get_test_name(),
+                                 "from __future__ import print_function\n"
                                 "import sys, time\n"
                                 "print('ok')\n"
                                 "sys.stdout.flush()\n"
@ -1122,6 +1129,7 @@ namespace tut
    {
        set_test_name("ReadPipe \"eof\" event");
        PythonProcessLauncher py(get_test_name(),
+                                 "from __future__ import print_function\n"
                                 "print('Hello from Python!')\n");
        py.mParams.files.add(LLProcess::FileParam()); // stdin
        py.mParams.files.add(LLProcess::FileParam("pipe")); // stdout
--- a/indra/llcommon/tests/threadsafeschedule_test.cpp
+++ b/indra/llcommon/tests/threadsafeschedule_test.cpp
@ -0,0 +1,69 @@
+/**
+ * @file   threadsafeschedule_test.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-04
+ * @brief  Test for threadsafeschedule.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "threadsafeschedule.h"
+// STL headers
+// std headers
+#include <chrono>
+// external library headers
+// other Linden headers
+#include "../test/lltut.h"
+
+using namespace std::literals::chrono_literals; // ms suffix
+using namespace std::literals::string_literals; // s suffix
+using Queue = LL::ThreadSafeSchedule<std::string>;
+
+/*****************************************************************************
+*   TUT
+*****************************************************************************/
+namespace tut
+{
+    struct threadsafeschedule_data
+    {
+        Queue queue;
+    };
+    typedef test_group<threadsafeschedule_data> threadsafeschedule_group;
+    typedef threadsafeschedule_group::object object;
+    threadsafeschedule_group threadsafeschedulegrp("threadsafeschedule");
+
+    template<> template<>
+    void object::test<1>()
+    {
+        set_test_name("push");
+        // Simply calling push() a few times might result in indeterminate
+        // delivery order if the resolution of steady_clock is coarser than
+        // the real time required for each push() call. Explicitly increment
+        // the timestamp for each one -- but since we're passing explicit
+        // timestamps, make the queue reorder them.
+        queue.push(Queue::TimeTuple(Queue::Clock::now() + 200ms, "ghi"));
+        // Given the various push() overloads, you have to match the type
+        // exactly: conversions are ambiguous.
+        queue.push("abc"s);
+        queue.push(Queue::Clock::now() + 100ms, "def");
+        queue.close();
+        auto entry = queue.pop();
+        ensure_equals("failed to pop first", std::get<0>(entry), "abc"s);
+        entry = queue.pop();
+        ensure_equals("failed to pop second", std::get<0>(entry), "def"s);
+        ensure("queue not closed", queue.isClosed());
+        ensure("queue prematurely done", ! queue.done());
+        std::string s;
+        bool popped = queue.tryPopFor(1s, s);
+        ensure("failed to pop third", popped);
+        ensure_equals("third is wrong", s, "ghi"s);
+        popped = queue.tryPop(s);
+        ensure("queue not empty", ! popped);
+        ensure("queue not done", queue.done());
+    }
+} // namespace tut
--- a/indra/llcommon/tests/tuple_test.cpp
+++ b/indra/llcommon/tests/tuple_test.cpp
@ -0,0 +1,47 @@
+/**
+ * @file   tuple_test.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-04
+ * @brief  Test for tuple.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "tuple.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "../test/lltut.h"
+
+/*****************************************************************************
+*   TUT
+*****************************************************************************/
+namespace tut
+{
+    struct tuple_data
+    {
+    };
+    typedef test_group<tuple_data> tuple_group;
+    typedef tuple_group::object object;
+    tuple_group tuplegrp("tuple");
+
+    template<> template<>
+    void object::test<1>()
+    {
+        set_test_name("tuple");
+        std::tuple<std::string, int> tup{ "abc", 17 };
+        std::tuple<int, std::string, int> ptup{ tuple_cons(34, tup) };
+        std::tuple<std::string, int> tup2;
+        int i;
+        std::tie(i, tup2) = tuple_split(ptup);
+        ensure_equals("tuple_car() fail", i, 34);
+        ensure_equals("tuple_cdr() (0) fail", std::get<0>(tup2), "abc");
+        ensure_equals("tuple_cdr() (1) fail", std::get<1>(tup2), 17);
+    }
+} // namespace tut
--- a/indra/llcommon/tests/workqueue_test.cpp
+++ b/indra/llcommon/tests/workqueue_test.cpp
@ -0,0 +1,235 @@
+/**
+ * @file   workqueue_test.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-07
+ * @brief  Test for workqueue.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "workqueue.h"
+// STL headers
+// std headers
+#include <chrono>
+#include <deque>
+// external library headers
+// other Linden headers
+#include "../test/lltut.h"
+#include "../test/catch_and_store_what_in.h"
+#include "llcond.h"
+#include "llcoros.h"
+#include "lleventcoro.h"
+#include "llstring.h"
+#include "stringize.h"
+
+using namespace LL;
+using namespace std::literals::chrono_literals; // ms suffix
+using namespace std::literals::string_literals; // s suffix
+
+/*****************************************************************************
+*   TUT
+*****************************************************************************/
+namespace tut
+{
+    struct workqueue_data
+    {
+        WorkQueue queue{"queue"};
+    };
+    typedef test_group<workqueue_data> workqueue_group;
+    typedef workqueue_group::object object;
+    workqueue_group workqueuegrp("workqueue");
+
+    template<> template<>
+    void object::test<1>()
+    {
+        set_test_name("name");
+        ensure_equals("didn't capture name", queue.getKey(), "queue");
+        ensure("not findable", WorkQueue::getInstance("queue") == queue.getWeak().lock());
+        WorkQueue q2;
+        ensure("has no name", LLStringUtil::startsWith(q2.getKey(), "WorkQueue"));
+    }
+
+    template<> template<>
+    void object::test<2>()
+    {
+        set_test_name("post");
+        bool wasRun{ false };
+        // We only get away with binding a simple bool because we're running
+        // the work on the same thread.
+        queue.post([&wasRun](){ wasRun = true; });
+        queue.close();
+        ensure("ran too soon", ! wasRun);
+        queue.runUntilClose();
+        ensure("didn't run", wasRun);
+    }
+
+    template<> template<>
+    void object::test<3>()
+    {
+        set_test_name("postEvery");
+        // record of runs
+        using Shared = std::deque<WorkQueue::TimePoint>;
+        // This is an example of how to share data between the originator of
+        // postEvery(work) and the work item itself, since usually a WorkQueue
+        // is used to dispatch work to a different thread. Neither of them
+        // should call any of LLCond's wait methods: you don't want to stall
+        // either the worker thread or the originating thread (conventionally
+        // main). Use LLCond or a subclass even if all you want to do is
+        // signal the work item that it can quit; consider LLOneShotCond.
+        LLCond<Shared> data;
+        auto start = WorkQueue::TimePoint::clock::now();
+        auto interval = 100ms;
+        queue.postEvery(
+            interval,
+            [&data, count = 0]
+            () mutable
+            {
+                // record the timestamp at which this instance is running
+                data.update_one(
+                    [](Shared& data)
+                    {
+                        data.push_back(WorkQueue::TimePoint::clock::now());
+                    });
+                // by the 3rd call, return false to stop
+                return (++count < 3);
+            });
+        // no convenient way to close() our queue while we've got a
+        // postEvery() running, so run until we have exhausted the iterations
+        // or we time out waiting
+        for (auto finish = start + 10*interval;
+             WorkQueue::TimePoint::clock::now() < finish &&
+             data.get([](const Shared& data){ return data.size(); }) < 3; )
+        {
+            queue.runPending();
+            std::this_thread::sleep_for(interval/10);
+        }
+        // Take a copy of the captured deque.
+        Shared result = data.get();
+        ensure_equals("called wrong number of times", result.size(), 3);
+        // postEvery() assumes you want the first call to happen right away.
+        // Pretend our start time was (interval) earlier than that, to make
+        // our too early/too late tests uniform for all entries.
+        start -= interval;
+        for (size_t i = 0; i < result.size(); ++i)
+        {
+            auto diff = result[i] - start;
+            start += interval;
+            try
+            {
+                ensure(STRINGIZE("call " << i << " too soon"), diff >= interval);
+                ensure(STRINGIZE("call " << i << " too late"), diff < interval*1.5);
+            }
+            catch (const tut::failure&)
+            {
+                auto interval_ms = interval / 1ms;
+                auto diff_ms = diff / 1ms;
+                std::cerr << "interval " << interval_ms
+                          << "ms; diff " << diff_ms << "ms" << std::endl;
+                throw;
+            }
+        }
+    }
+
+    template<> template<>
+    void object::test<4>()
+    {
+        set_test_name("postTo");
+        WorkQueue main("main");
+        auto qptr = WorkQueue::getInstance("queue");
+        int result = 0;
+        main.postTo(
+            qptr,
+            [](){ return 17; },
+            // Note that a postTo() *callback* can safely bind a reference to
+            // a variable on the invoking thread, because the callback is run
+            // on the invoking thread. (Of course the bound variable must
+            // survive until the callback is called.)
+            [&result](int i){ result = i; });
+        // this should post the callback to main
+        qptr->runOne();
+        // this should run the callback
+        main.runOne();
+        ensure_equals("failed to run int callback", result, 17);
+
+        std::string alpha;
+        // postTo() handles arbitrary return types
+        main.postTo(
+            qptr,
+            [](){ return "abc"s; },
+            [&alpha](const std::string& s){ alpha = s; });
+        qptr->runPending();
+        main.runPending();
+        ensure_equals("failed to run string callback", alpha, "abc");
+    }
+
+    template<> template<>
+    void object::test<5>()
+    {
+        set_test_name("postTo with void return");
+        WorkQueue main("main");
+        auto qptr = WorkQueue::getInstance("queue");
+        std::string observe;
+        main.postTo(
+            qptr,
+            // The ONLY reason we can get away with binding a reference to
+            // 'observe' in our work callable is because we're directly
+            // calling qptr->runOne() on this same thread. It would be a
+            // mistake to do that if some other thread were servicing 'queue'.
+            [&observe](){ observe = "queue"; },
+            [&observe](){ observe.append(";main"); });
+        qptr->runOne();
+        main.runOne();
+        ensure_equals("failed to run both lambdas", observe, "queue;main");
+    }
+
+    template<> template<>
+    void object::test<6>()
+    {
+        set_test_name("waitForResult");
+        std::string stored;
+        // Try to call waitForResult() on this thread's main coroutine. It
+        // should throw because the main coroutine must service the queue.
+        auto what{ catch_what<WorkQueue::Error>(
+                [this, &stored](){ stored = queue.waitForResult(
+                        [](){ return "should throw"; }); }) };
+        ensure("lambda should not have run", stored.empty());
+        ensure_not("waitForResult() should have thrown", what.empty());
+        ensure(STRINGIZE("should mention waitForResult: " << what),
+               what.find("waitForResult") != std::string::npos);
+
+        // Call waitForResult() on a coroutine, with a string result.
+        LLCoros::instance().launch(
+            "waitForResult string",
+            [this, &stored]()
+            { stored = queue.waitForResult(
+                    [](){ return "string result"; }); });
+        llcoro::suspend();
+        // Nothing will have happened yet because, even if the coroutine did
+        // run immediately, all it did was to queue the inner lambda on
+        // 'queue'. Service it.
+        queue.runOne();
+        llcoro::suspend();
+        ensure_equals("bad waitForResult return", stored, "string result");
+
+        // Call waitForResult() on a coroutine, with a void callable.
+        stored.clear();
+        bool done = false;
+        LLCoros::instance().launch(
+            "waitForResult void",
+            [this, &stored, &done]()
+            {
+                queue.waitForResult([&stored](){ stored = "ran"; });
+                done = true;
+            });
+        llcoro::suspend();
+        queue.runOne();
+        llcoro::suspend();
+        ensure_equals("didn't run coroutine", stored, "ran");
+        ensure("void waitForResult() didn't return", done);
+    }
+} // namespace tut
--- a/indra/llcommon/threadpool.cpp
+++ b/indra/llcommon/threadpool.cpp
@ -0,0 +1,88 @@
+/**
+ * @file   threadpool.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-21
+ * @brief  Implementation for threadpool.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "threadpool.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "llerror.h"
+#include "llevents.h"
+#include "stringize.h"
+
+LL::ThreadPool::ThreadPool(const std::string& name, size_t threads, size_t capacity):
+    mQueue(name, capacity),
+    mName("ThreadPool:" + name),
+    mThreadCount(threads)
+{}
+
+void LL::ThreadPool::start()
+{
+    for (size_t i = 0; i < mThreadCount; ++i)
+    {
+        std::string tname{ stringize(mName, ':', (i+1), '/', mThreadCount) };
+        mThreads.emplace_back(tname, [this, tname]()
+            {
+                LL_PROFILER_SET_THREAD_NAME(tname.c_str());
+                run(tname);
+            });
+    }
+    // Listen on "LLApp", and when the app is shutting down, close the queue
+    // and join the workers.
+    LLEventPumps::instance().obtain("LLApp").listen(
+        mName,
+        [this](const LLSD& stat)
+        {
+            std::string status(stat["status"]);
+            if (status != "running")
+            {
+                // viewer is starting shutdown -- proclaim the end is nigh!
+                LL_DEBUGS("ThreadPool") << mName << " saw " << status << LL_ENDL;
+                close();
+            }
+            return false;
+        });
+}
+
+LL::ThreadPool::~ThreadPool()
+{
+    close();
+}
+
+void LL::ThreadPool::close()
+{
+    if (! mQueue.isClosed())
+    {
+        LL_DEBUGS("ThreadPool") << mName << " closing queue and joining threads" << LL_ENDL;
+        mQueue.close();
+        for (auto& pair: mThreads)
+        {
+            LL_DEBUGS("ThreadPool") << mName << " waiting on thread " << pair.first << LL_ENDL;
+            pair.second.join();
+        }
+        LL_DEBUGS("ThreadPool") << mName << " shutdown complete" << LL_ENDL;
+    }
+}
+
+void LL::ThreadPool::run(const std::string& name)
+{
+    LL_DEBUGS("ThreadPool") << name << " starting" << LL_ENDL;
+    run();
+    LL_DEBUGS("ThreadPool") << name << " stopping" << LL_ENDL;
+}
+
+void LL::ThreadPool::run()
+{
+    mQueue.runUntilClose();
+}
--- a/indra/llcommon/threadpool.h
+++ b/indra/llcommon/threadpool.h
@ -0,0 +1,71 @@
+/**
+ * @file   threadpool.h
+ * @author Nat Goodspeed
+ * @date   2021-10-21
+ * @brief  ThreadPool configures a WorkQueue along with a pool of threads to
+ *         service it.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_THREADPOOL_H)
+#define LL_THREADPOOL_H
+
+#include "workqueue.h"
+#include <string>
+#include <thread>
+#include <utility>                  // std::pair
+#include <vector>
+
+namespace LL
+{
+
+    class ThreadPool
+    {
+    public:
+        /**
+         * Pass ThreadPool a string name. This can be used to look up the
+         * relevant WorkQueue.
+         */
+        ThreadPool(const std::string& name, size_t threads=1, size_t capacity=1024);
+        virtual ~ThreadPool();
+
+        /**
+         * Launch the ThreadPool. Until this call, a constructed ThreadPool
+         * launches no threads. That permits coders to derive from ThreadPool,
+         * or store it as a member of some other class, but refrain from
+         * launching it until all other construction is complete.
+         */
+        void start();
+
+        /**
+         * ThreadPool listens for application shutdown messages on the "LLApp"
+         * LLEventPump. Call close() to shut down this ThreadPool early.
+         */
+        void close();
+
+        std::string getName() const { return mName; }
+        size_t getWidth() const { return mThreads.size(); }
+        /// obtain a non-const reference to the WorkQueue to post work to it
+        WorkQueue& getQueue() { return mQueue; }
+
+        /**
+         * Override run() if you need special processing. The default run()
+         * implementation simply calls WorkQueue::runUntilClose().
+         */
+        virtual void run();
+
+    private:
+        void run(const std::string& name);
+
+        WorkQueue mQueue;
+        std::string mName;
+        size_t mThreadCount;
+        std::vector<std::pair<std::string, std::thread>> mThreads;
+    };
+
+} // namespace LL
+
+#endif /* ! defined(LL_THREADPOOL_H) */
--- a/indra/llcommon/threadsafeschedule.h
+++ b/indra/llcommon/threadsafeschedule.h
@ -0,0 +1,399 @@
+/**
+ * @file   threadsafeschedule.h
+ * @author Nat Goodspeed
+ * @date   2021-10-02
+ * @brief  ThreadSafeSchedule is an ordered queue in which every item has an
+ *         associated timestamp.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_THREADSAFESCHEDULE_H)
+#define LL_THREADSAFESCHEDULE_H
+
+#include "chrono.h"
+#include "llexception.h"
+#include "llthreadsafequeue.h"
+#include "tuple.h"
+#include <chrono>
+#include <tuple>  
+
+namespace LL
+{
+    namespace ThreadSafeSchedulePrivate
+    {
+        using TimePoint = std::chrono::steady_clock::time_point;
+        // Bundle consumer's data with a TimePoint to order items by timestamp.
+        template <typename... Args>
+        using TimestampedTuple = std::tuple<TimePoint, Args...>;
+
+        // comparison functor for TimedTuples -- see TimedQueue comments
+        struct ReverseTupleOrder
+        {
+            template <typename Tuple>
+            bool operator()(const Tuple& left, const Tuple& right) const
+            {
+                return std::get<0>(left) > std::get<0>(right);
+            }
+        };
+
+        template <typename... Args>
+        using TimedQueue = PriorityQueueAdapter<
+            TimestampedTuple<Args...>,
+            // std::vector is the default storage for std::priority_queue,
+            // have to restate to specify comparison template parameter
+            std::vector<TimestampedTuple<Args...>>,
+            // std::priority_queue uses a counterintuitive comparison
+            // behavior: the default std::less comparator is used to present
+            // the *highest* value as top(). So to sort by earliest timestamp,
+            // we must invert by using >.
+            ReverseTupleOrder>;
+    } // namespace ThreadSafeSchedulePrivate
+
+    /**
+     * ThreadSafeSchedule is an ordered LLThreadSafeQueue in which every item
+     * is given an associated timestamp. That is, TimePoint is implicitly
+     * prepended to the std::tuple with the specified types.
+     *
+     * Items are popped in increasing chronological order. Moreover, any item
+     * with a timestamp in the future is held back until
+     * std::chrono::steady_clock reaches that timestamp.
+     */
+    template <typename... Args>
+    class ThreadSafeSchedule:
+        public LLThreadSafeQueue<ThreadSafeSchedulePrivate::TimestampedTuple<Args...>,
+                                 ThreadSafeSchedulePrivate::TimedQueue<Args...>>
+    {
+    public:
+        using DataTuple = std::tuple<Args...>;
+        using TimeTuple = ThreadSafeSchedulePrivate::TimestampedTuple<Args...>;
+
+    private:
+        using super = LLThreadSafeQueue<TimeTuple, ThreadSafeSchedulePrivate::TimedQueue<Args...>>;
+        using lock_t = typename super::lock_t;
+        // VS 2017 needs this due to a bug:
+        // https://developercommunity.visualstudio.com/t/cannot-access-protected-enumerator-of-enclosing-cl/203430
+        enum pop_result { EMPTY=super::EMPTY, DONE=super::DONE, WAITING=super::WAITING, POPPED=super::POPPED };
+
+    public:
+        using Closed = LLThreadSafeQueueInterrupt;
+        using TimePoint = ThreadSafeSchedulePrivate::TimePoint;
+        using Clock = TimePoint::clock;
+
+        ThreadSafeSchedule(U32 capacity=1024):
+            super(capacity)
+        {}
+
+        /*----------------------------- push() -----------------------------*/
+        /// explicitly pass TimeTuple
+        using super::push;
+
+        /// pass DataTuple with implicit now
+        // This could be ambiguous for Args with a single type. Unfortunately
+        // we can't enable_if an individual method with a condition based on
+        // the *class* template arguments, only on that method's template
+        // arguments. We could specialize this class for the single-Args case;
+        // we could minimize redundancy by breaking out a common base class...
+        void push(const DataTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            push(tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass each component of the TimeTuple
+        void push(const TimePoint& time, Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            push(TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass every component except the TimePoint (implies now)
+        // This could be ambiguous if the first specified template parameter
+        // type is also TimePoint. We could try to disambiguate, but a simpler
+        // approach would be for the caller to explicitly construct DataTuple
+        // and call that overload.
+        void push(Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            push(Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*--------------------------- tryPush() ----------------------------*/
+        /// explicit TimeTuple
+        using super::tryPush;
+
+        /// DataTuple with implicit now
+        bool tryPush(const DataTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPush(tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass components
+        bool tryPush(const TimePoint& time, Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPush(TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass components with implicit now
+        bool tryPush(Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPush(Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*-------------------------- tryPushFor() --------------------------*/
+        /// explicit TimeTuple
+        using super::tryPushFor;
+
+        /// DataTuple with implicit now
+        template <typename Rep, typename Period>
+        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+                        const DataTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPushFor(timeout, tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass components
+        template <typename Rep, typename Period>
+        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+                        const TimePoint& time, Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPushFor(TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass components with implicit now
+        template <typename Rep, typename Period>
+        bool tryPushFor(const std::chrono::duration<Rep, Period>& timeout,
+                        Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPushFor(Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*------------------------- tryPushUntil() -------------------------*/
+        /// explicit TimeTuple
+        using super::tryPushUntil;
+
+        /// DataTuple with implicit now
+        template <typename Clock, typename Duration>
+        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+                          const DataTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPushUntil(until, tuple_cons(Clock::now(), tuple));
+        }
+
+        /// individually pass components
+        template <typename Clock, typename Duration>
+        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+                          const TimePoint& time, Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPushUntil(until, TimeTuple(time, std::forward<Args>(args)...));
+        }
+
+        /// individually pass components with implicit now
+        template <typename Clock, typename Duration>
+        bool tryPushUntil(const std::chrono::time_point<Clock, Duration>& until,
+                          Args&&... args)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tryPushUntil(until, Clock::now(), std::forward<Args>(args)...);
+        }
+
+        /*----------------------------- pop() ------------------------------*/
+        // Our consumer may or may not care about the timestamp associated
+        // with each popped item, so we allow retrieving either DataTuple or
+        // TimeTuple. One potential use would be to observe, and possibly
+        // adjust for, the time lag between the item time and the actual
+        // current time.
+
+        /// pop DataTuple by value
+        // It would be great to notice when sizeof...(Args) == 1 and directly
+        // return the first (only) value, instead of making pop()'s caller
+        // call std::get<0>(value). See push(DataTuple) remarks for why we
+        // haven't yet jumped through those hoops.
+        DataTuple pop()
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            return tuple_cdr(popWithTime());
+        }
+
+        /// pop TimeTuple by value
+        TimeTuple popWithTime()
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            lock_t lock(super::mLock);
+            // We can't just sit around waiting forever, given that there may
+            // be items in the queue that are not yet ready but will *become*
+            // ready in the near future. So in fact, with this class, every
+            // pop() becomes a tryPopUntil(), constrained to the timestamp of
+            // the head item. It almost doesn't matter what we specify for the
+            // caller's time constraint -- all we really care about is the
+            // head item's timestamp. Since pop() and popWithTime() are
+            // defined to wait until either an item becomes available or the
+            // queue is closed, loop until one of those things happens. The
+            // constraint we pass just determines how often we'll loop while
+            // waiting.
+            TimeTuple tt;
+            while (true)
+            {
+                // Pick a point suitably far into the future.
+                TimePoint until = TimePoint::clock::now() + std::chrono::hours(24);
+                pop_result popped = tryPopUntil_(lock, until, tt);
+                if (popped == POPPED)
+                    return std::move(tt);
+
+                // DONE: throw, just as super::pop() does
+                if (popped == DONE)
+                {
+                    LLTHROW(LLThreadSafeQueueInterrupt());
+                }
+                // WAITING: we've still got items to drain.
+                // EMPTY: not closed, so it's worth waiting for more items.
+                // Either way, loop back to wait.
+            }
+        }
+
+        // We can use tryPop(TimeTuple&) just as it stands; the only behavior
+        // difference is in our canPop() override method.
+        using super::tryPop;
+
+        /// tryPop(DataTuple&)
+        bool tryPop(DataTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            TimeTuple tt;
+            if (! super::tryPop(tt))
+                return false;
+            tuple = tuple_cdr(std::move(tt));
+            return true;
+        }
+
+        /// for when Args has exactly one type
+        bool tryPop(typename std::tuple_element<1, TimeTuple>::type& value)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            TimeTuple tt;
+            if (! super::tryPop(tt))
+                return false;
+            value = std::get<1>(std::move(tt));
+            return true;
+        }
+
+        /// tryPopFor()
+        template <typename Rep, typename Period, typename Tuple>
+        bool tryPopFor(const std::chrono::duration<Rep, Period>& timeout, Tuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            // It's important to use OUR tryPopUntil() implementation, rather
+            // than delegating immediately to our base class.
+            return tryPopUntil(Clock::now() + timeout, tuple);
+        }
+
+        /// tryPopUntil(TimeTuple&)
+        template <typename Clock, typename Duration>
+        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+                         TimeTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            // super::tryPopUntil() wakes up when an item becomes available or
+            // we hit 'until', whichever comes first. Thing is, the current
+            // head of the queue could become ready sooner than either of
+            // those events, and we need to deliver it as soon as it does.
+            // Don't wait past the TimePoint of the head item.
+            // Naturally, lock the queue before peeking at mStorage.
+            return super::tryLockUntil(
+                until,
+                [this, until, &tuple](lock_t& lock)
+                {
+                    // Use our time_point_cast to allow for 'until' that's a
+                    // time_point type other than TimePoint.
+                    return POPPED ==
+                        tryPopUntil_(lock, LL::time_point_cast<TimePoint>(until), tuple);
+                });
+        }
+
+        pop_result tryPopUntil_(lock_t& lock, const TimePoint& until, TimeTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            TimePoint adjusted = until;
+            if (! super::mStorage.empty())
+            {
+                LL_PROFILE_ZONE_NAMED("tpu - adjust");
+                // use whichever is earlier: the head item's timestamp, or
+                // the caller's limit
+                adjusted = min(std::get<0>(super::mStorage.front()), adjusted);
+            }
+            // now delegate to base-class tryPopUntil_()
+            pop_result popped;
+            {
+                LL_PROFILE_ZONE_NAMED("tpu - super");
+                while ((popped = pop_result(super::tryPopUntil_(lock, adjusted, tuple))) == WAITING)
+                {
+                    // If super::tryPopUntil_() returns WAITING, it means there's
+                    // a head item, but it's not yet time. But it's worth looping
+                    // back to recheck.
+                }
+            }
+            return popped;
+        }
+
+        /// tryPopUntil(DataTuple&)
+        template <typename Clock, typename Duration>
+        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+                         DataTuple& tuple)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            TimeTuple tt;
+            if (! tryPopUntil(until, tt))
+                return false;
+            tuple = tuple_cdr(std::move(tt));
+            return true;
+        }
+
+        /// for when Args has exactly one type
+        template <typename Clock, typename Duration>
+        bool tryPopUntil(const std::chrono::time_point<Clock, Duration>& until,
+                         typename std::tuple_element<1, TimeTuple>::type& value)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            TimeTuple tt;
+            if (! tryPopUntil(until, tt))
+                return false;
+            value = std::get<1>(std::move(tt));
+            return true;
+        }
+
+        /*------------------------------ etc. ------------------------------*/
+        // We can't hide items that aren't yet ready because we can't traverse
+        // the underlying priority_queue: it has no iterators, only top(). So
+        // a consumer could observe size() > 0 and yet tryPop() returns false.
+        // Shrug, in a multi-consumer scenario that would be expected behavior.
+        using super::size;
+        // open/closed state
+        using super::close;
+        using super::isClosed;
+        using super::done;
+
+    private:
+        // this method is called by base class pop_() every time we're
+        // considering whether to deliver the current head element
+        bool canPop(const TimeTuple& head) const override
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            // an item with a future timestamp isn't yet ready to pop
+            // (should we add some slop for overhead?)
+            return std::get<0>(head) <= Clock::now();
+        }
+    };
+
+} // namespace LL
+
+#endif /* ! defined(LL_THREADSAFESCHEDULE_H) */
--- a/indra/llcommon/tuple.h
+++ b/indra/llcommon/tuple.h
@ -0,0 +1,84 @@
+/**
+ * @file   tuple.h
+ * @author Nat Goodspeed
+ * @date   2021-10-04
+ * @brief  A couple tuple utilities
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_TUPLE_H)
+#define LL_TUPLE_H
+
+#include <tuple>
+#include <type_traits>              // std::remove_reference
+#include <utility>                  // std::pair
+
+/**
+ * tuple_cons() behaves like LISP cons: it uses std::tuple_cat() to prepend a
+ * new item of arbitrary type to an existing std::tuple.
+ */
+template <typename First, typename... Rest, typename Tuple_=std::tuple<Rest...>>
+auto tuple_cons(First&& first, Tuple_&& rest)
+{
+    // All we need to do is make a tuple containing 'first', and let
+    // tuple_cat() do the hard part.
+    return std::tuple_cat(std::tuple<First>(std::forward<First>(first)),
+                          std::forward<Tuple_>(rest));
+}
+
+/**
+ * tuple_car() behaves like LISP car: it extracts the first item from a
+ * std::tuple.
+ */
+template <typename... Args, typename Tuple_=std::tuple<Args...>>
+auto tuple_car(Tuple_&& tuple)
+{
+    return std::get<0>(std::forward<Tuple_>(tuple));
+}
+
+/**
+ * tuple_cdr() behaves like LISP cdr: it returns a new tuple containing
+ * everything BUT the first item.
+ */
+// derived from https://stackoverflow.com/a/24046437
+template <typename Tuple, std::size_t... Indices>
+auto tuple_cdr_(Tuple&& tuple, const std::index_sequence<Indices...>)
+{
+    // Given an index sequence from [0..N-1), extract tuple items [1..N)
+    return std::make_tuple(std::get<Indices+1u>(std::forward<Tuple>(tuple))...);
+}
+
+template <typename Tuple>
+auto tuple_cdr(Tuple&& tuple)
+{
+    return tuple_cdr_(
+        std::forward<Tuple>(tuple),
+        // Pass helper function an index sequence one item shorter than tuple
+        std::make_index_sequence<
+            std::tuple_size<
+                // tuple_size doesn't like reference types
+                typename std::remove_reference<Tuple>::type
+            >::value - 1u>
+        ());
+}
+
+/**
+ * tuple_split(), the opposite of tuple_cons(), has no direct analog in LISP.
+ * It returns a std::pair of tuple_car(), tuple_cdr(). We could call this
+ * function tuple_car_cdr(), or tuple_slice() or some such. But tuple_split()
+ * feels more descriptive.
+ */
+template <typename... Args, typename Tuple_=std::tuple<Args...>>
+auto tuple_split(Tuple_&& tuple)
+{
+    // We're not really worried about forwarding multiple times a tuple that
+    // might contain move-only items, because the implementation above only
+    // applies std::get() exactly once to each item.
+    return std::make_pair(tuple_car(std::forward<Tuple_>(tuple)),
+                          tuple_cdr(std::forward<Tuple_>(tuple)));
+}
+
+#endif /* ! defined(LL_TUPLE_H) */
--- a/indra/llcommon/workqueue.cpp
+++ b/indra/llcommon/workqueue.cpp
@ -0,0 +1,158 @@
+/**
+ * @file   workqueue.cpp
+ * @author Nat Goodspeed
+ * @date   2021-10-06
+ * @brief  Implementation for WorkQueue.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+// Precompiled header
+#include "linden_common.h"
+// associated header
+#include "workqueue.h"
+// STL headers
+// std headers
+// external library headers
+// other Linden headers
+#include "llcoros.h"
+#include LLCOROS_MUTEX_HEADER
+#include "llerror.h"
+#include "llexception.h"
+#include "stringize.h"
+
+using Mutex = LLCoros::Mutex;
+using Lock  = LLCoros::LockType;
+
+LL::WorkQueue::WorkQueue(const std::string& name, size_t capacity):
+    super(makeName(name)),
+    mQueue(capacity)
+{
+    // TODO: register for "LLApp" events so we can implicitly close() on
+    // viewer shutdown.
+}
+
+void LL::WorkQueue::close()
+{
+    mQueue.close();
+}
+
+size_t LL::WorkQueue::size()
+{
+    return mQueue.size();
+}
+
+bool LL::WorkQueue::isClosed()
+{
+    return mQueue.isClosed();
+}
+
+bool LL::WorkQueue::done()
+{
+    return mQueue.done();
+}
+
+void LL::WorkQueue::runUntilClose()
+{
+    try
+    {
+        for (;;)
+        {
+            LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+            callWork(mQueue.pop());
+        }
+    }
+    catch (const Queue::Closed&)
+    {
+    }
+}
+
+bool LL::WorkQueue::runPending()
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    for (Work work; mQueue.tryPop(work); )
+    {
+        callWork(work);
+    }
+    return ! mQueue.done();
+}
+
+bool LL::WorkQueue::runOne()
+{
+    Work work;
+    if (mQueue.tryPop(work))
+    {
+        callWork(work);
+    }
+    return ! mQueue.done();
+}
+
+bool LL::WorkQueue::runUntil(const TimePoint& until)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    // Should we subtract some slop to allow for typical Work execution time?
+    // How much slop?
+    // runUntil() is simply a time-bounded runPending().
+    for (Work work; TimePoint::clock::now() < until && mQueue.tryPop(work); )
+    {
+        callWork(work);
+    }
+    return ! mQueue.done();
+}
+
+std::string LL::WorkQueue::makeName(const std::string& name)
+{
+    if (! name.empty())
+        return name;
+
+    static U32 discriminator = 0;
+    static Mutex mutex;
+    U32 num;
+    {
+        // Protect discriminator from concurrent access by different threads.
+        // It can't be thread_local, else two racing threads will come up with
+        // the same name.
+        Lock lk(mutex);
+        num = discriminator++;
+    }
+    return STRINGIZE("WorkQueue" << num);
+}
+
+void LL::WorkQueue::callWork(const Queue::DataTuple& work)
+{
+    // ThreadSafeSchedule::pop() always delivers a tuple, even when
+    // there's only one data field per item, as for us.
+    callWork(std::get<0>(work));
+}
+
+void LL::WorkQueue::callWork(const Work& work)
+{
+    LL_PROFILE_ZONE_SCOPED_CATEGORY_THREAD;
+    try
+    {
+        work();
+    }
+    catch (...)
+    {
+        // No matter what goes wrong with any individual work item, the worker
+        // thread must go on! Log our own instance name with the exception.
+        LOG_UNHANDLED_EXCEPTION(getKey());
+    }
+}
+
+void LL::WorkQueue::error(const std::string& msg)
+{
+    LL_ERRS("WorkQueue") << msg << LL_ENDL;
+}
+
+void LL::WorkQueue::checkCoroutine(const std::string& method)
+{
+    // By convention, the default coroutine on each thread has an empty name
+    // string. See also LLCoros::logname().
+    if (LLCoros::getName().empty())
+    {
+        LLTHROW(Error("Do not call " + method + " from a thread's default coroutine"));
+    }
+}
--- a/indra/llcommon/workqueue.h
+++ b/indra/llcommon/workqueue.h
@ -0,0 +1,574 @@
+/**
+ * @file   workqueue.h
+ * @author Nat Goodspeed
+ * @date   2021-09-30
+ * @brief  Queue used for inter-thread work passing.
+ * 
+ * $LicenseInfo:firstyear=2021&license=viewerlgpl$
+ * Copyright (c) 2021, Linden Research, Inc.
+ * $/LicenseInfo$
+ */
+
+#if ! defined(LL_WORKQUEUE_H)
+#define LL_WORKQUEUE_H
+
+#include "llcoros.h"
+#include "llexception.h"
+#include "llinstancetracker.h"
+#include "threadsafeschedule.h"
+#include <chrono>
+#include <exception>                // std::current_exception
+#include <functional>               // std::function
+#include <string>
+
+namespace LL
+{
+    /**
+     * A typical WorkQueue has a string name that can be used to find it.
+     */
+    class WorkQueue: public LLInstanceTracker<WorkQueue, std::string>
+    {
+    private:
+        using super = LLInstanceTracker<WorkQueue, std::string>;
+
+    public:
+        using Work = std::function<void()>;
+
+    private:
+        using Queue = ThreadSafeSchedule<Work>;
+        // helper for postEvery()
+        template <typename Rep, typename Period, typename CALLABLE>
+        class BackJack;
+
+    public:
+        using TimePoint = Queue::TimePoint;
+        using TimedWork = Queue::TimeTuple;
+        using Closed    = Queue::Closed;
+
+        struct Error: public LLException
+        {
+            Error(const std::string& what): LLException(what) {}
+        };
+
+        /**
+         * You may omit the WorkQueue name, in which case a unique name is
+         * synthesized; for practical purposes that makes it anonymous.
+         */
+        WorkQueue(const std::string& name = std::string(), size_t capacity=1024);
+
+        /**
+         * Since the point of WorkQueue is to pass work to some other worker
+         * thread(s) asynchronously, it's important that the WorkQueue continue
+         * to exist until the worker thread(s) have drained it. To communicate
+         * that it's time for them to quit, close() the queue.
+         */
+        void close();
+
+        /**
+         * WorkQueue supports multiple producers and multiple consumers. In
+         * the general case it's misleading to test size(), since any other
+         * thread might change it the nanosecond the lock is released. On that
+         * basis, some might argue against publishing a size() method at all.
+         *
+         * But there are two specific cases in which a test based on size()
+         * might be reasonable:
+         *
+         * * If you're the only producer, noticing that size() == 0 is
+         *   meaningful.
+         * * If you're the only consumer, noticing that size() > 0 is
+         *   meaningful.
+         */
+        size_t size();
+        /// producer end: are we prevented from pushing any additional items?
+        bool isClosed();
+        /// consumer end: are we done, is the queue entirely drained?
+        bool done();
+
+        /*---------------------- fire and forget API -----------------------*/
+
+        /// fire-and-forget, but at a particular (future?) time
+        template <typename CALLABLE>
+        void post(const TimePoint& time, CALLABLE&& callable)
+        {
+            // Defer reifying an arbitrary CALLABLE until we hit this or
+            // postIfOpen(). All other methods should accept CALLABLEs of
+            // arbitrary type to avoid multiple levels of std::function
+            // indirection.
+            mQueue.push(TimedWork(time, std::move(callable)));
+        }
+
+        /// fire-and-forget
+        template <typename CALLABLE>
+        void post(CALLABLE&& callable)
+        {
+            // We use TimePoint::clock::now() instead of TimePoint's
+            // representation of the epoch because this WorkQueue may contain
+            // a mix of past-due TimedWork items and TimedWork items scheduled
+            // for the future. Sift this new item into the correct place.
+            post(TimePoint::clock::now(), std::move(callable));
+        }
+
+        /**
+         * post work for a particular time, unless the queue is closed before
+         * we can post
+         */
+        template <typename CALLABLE>
+        bool postIfOpen(const TimePoint& time, CALLABLE&& callable)
+        {
+            // Defer reifying an arbitrary CALLABLE until we hit this or
+            // post(). All other methods should accept CALLABLEs of arbitrary
+            // type to avoid multiple levels of std::function indirection.
+            return mQueue.pushIfOpen(TimedWork(time, std::move(callable)));
+        }
+
+        /**
+         * post work, unless the queue is closed before we can post
+         */
+        template <typename CALLABLE>
+        bool postIfOpen(CALLABLE&& callable)
+        {
+            return postIfOpen(TimePoint::clock::now(), std::move(callable));
+        }
+
+        /**
+         * Post work to be run at a specified time to another WorkQueue, which
+         * may or may not still exist and be open. Return true if we were able
+         * to post.
+         */
+        template <typename CALLABLE>
+        static bool postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable);
+
+        /**
+         * Post work to another WorkQueue, which may or may not still exist
+         * and be open. Return true if we were able to post.
+         */
+        template <typename CALLABLE>
+        static bool postMaybe(weak_t target, CALLABLE&& callable)
+        {
+            return postMaybe(target, TimePoint::clock::now(),
+                             std::forward<CALLABLE>(callable));
+        }
+
+        /**
+         * Launch a callable returning bool that will trigger repeatedly at
+         * specified interval, until the callable returns false.
+         *
+         * If you need to signal that callable from outside, DO NOT bind a
+         * reference to a simple bool! That's not thread-safe. Instead, bind
+         * an LLCond variant, e.g. LLOneShotCond or LLBoolCond.
+         */
+        template <typename Rep, typename Period, typename CALLABLE>
+        void postEvery(const std::chrono::duration<Rep, Period>& interval,
+                       CALLABLE&& callable);
+
+        template <typename CALLABLE>
+        bool tryPost(CALLABLE&& callable)
+        {
+            return mQueue.tryPush(TimedWork(TimePoint::clock::now(), std::move(callable)));
+        }
+
+        /*------------------------- handshake API --------------------------*/
+
+        /**
+         * Post work to another WorkQueue to be run at a specified time,
+         * requesting a specific callback to be run on this WorkQueue on
+         * completion.
+         *
+         * Returns true if able to post, false if the other WorkQueue is
+         * inaccessible.
+         */
+        // Apparently some Microsoft header file defines a macro CALLBACK? The
+        // natural template argument name CALLBACK produces very weird Visual
+        // Studio compile errors that seem utterly unrelated to this source
+        // code.
+        template <typename CALLABLE, typename FOLLOWUP>
+        bool postTo(weak_t target,
+                    const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback);
+
+        /**
+         * Post work to another WorkQueue, requesting a specific callback to
+         * be run on this WorkQueue on completion.
+         *
+         * Returns true if able to post, false if the other WorkQueue is
+         * inaccessible.
+         */
+        template <typename CALLABLE, typename FOLLOWUP>
+        bool postTo(weak_t target, CALLABLE&& callable, FOLLOWUP&& callback)
+        {
+            return postTo(target, TimePoint::clock::now(),
+                          std::move(callable), std::move(callback));
+        }
+
+        /**
+         * Post work to another WorkQueue to be run at a specified time,
+         * blocking the calling coroutine until then, returning the result to
+         * caller on completion.
+         *
+         * In general, we assume that each thread's default coroutine is busy
+         * servicing its WorkQueue or whatever. To try to prevent mistakes, we
+         * forbid calling waitForResult() from a thread's default coroutine.
+         */
+        template <typename CALLABLE>
+        auto waitForResult(const TimePoint& time, CALLABLE&& callable);
+
+        /**
+         * Post work to another WorkQueue, blocking the calling coroutine
+         * until then, returning the result to caller on completion.
+         *
+         * In general, we assume that each thread's default coroutine is busy
+         * servicing its WorkQueue or whatever. To try to prevent mistakes, we
+         * forbid calling waitForResult() from a thread's default coroutine.
+         */
+        template <typename CALLABLE>
+        auto waitForResult(CALLABLE&& callable)
+        {
+            return waitForResult(TimePoint::clock::now(), std::move(callable));
+        }
+
+        /*--------------------------- worker API ---------------------------*/
+
+        /**
+         * runUntilClose() pulls TimedWork items off this WorkQueue until the
+         * queue is closed, at which point it returns. This would be the
+         * typical entry point for a simple worker thread.
+         */
+        void runUntilClose();
+
+        /**
+         * runPending() runs all TimedWork items that are ready to run. It
+         * returns true if the queue remains open, false if the queue has been
+         * closed. This could be used by a thread whose primary purpose is to
+         * serve the queue, but also wants to do other things with its idle time.
+         */
+        bool runPending();
+
+        /**
+         * runOne() runs at most one ready TimedWork item -- zero if none are
+         * ready. It returns true if the queue remains open, false if the
+         * queue has been closed.
+         */
+        bool runOne();
+
+        /**
+         * runFor() runs a subset of ready TimedWork items, until the
+         * timeslice has been exceeded. It returns true if the queue remains
+         * open, false if the queue has been closed. This could be used by a
+         * busy main thread to lend a bounded few CPU cycles to this WorkQueue
+         * without risking the WorkQueue blowing out the length of any one
+         * frame.
+         */
+        template <typename Rep, typename Period>
+        bool runFor(const std::chrono::duration<Rep, Period>& timeslice)
+        {
+            LL_PROFILE_ZONE_SCOPED;
+            return runUntil(TimePoint::clock::now() + timeslice);
+        }
+
+        /**
+         * runUntil() is just like runFor(), only with a specific end time
+         * instead of a timeslice duration.
+         */
+        bool runUntil(const TimePoint& until);
+
+    private:
+        template <typename CALLABLE, typename FOLLOWUP>
+        static auto makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback);
+        /// general case: arbitrary C++ return type
+        template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE>
+        struct MakeReplyLambda;
+        /// specialize for CALLABLE returning void
+        template <typename CALLABLE, typename FOLLOWUP>
+        struct MakeReplyLambda<CALLABLE, FOLLOWUP, void>;
+
+        /// general case: arbitrary C++ return type
+        template <typename CALLABLE, typename RETURNTYPE>
+        struct WaitForResult;
+        /// specialize for CALLABLE returning void
+        template <typename CALLABLE>
+        struct WaitForResult<CALLABLE, void>;
+
+        static void checkCoroutine(const std::string& method);
+        static void error(const std::string& msg);
+        static std::string makeName(const std::string& name);
+        void callWork(const Queue::DataTuple& work);
+        void callWork(const Work& work);
+        Queue mQueue;
+    };
+
+    /**
+     * BackJack is, in effect, a hand-rolled lambda, binding a WorkQueue, a
+     * CALLABLE that returns bool, a TimePoint and an interval at which to
+     * relaunch it. As long as the callable continues returning true, BackJack
+     * keeps resubmitting it to the target WorkQueue.
+     */
+    // Why is BackJack a class and not a lambda? Because, unlike a lambda, a
+    // class method gets its own 'this' pointer -- which we need to resubmit
+    // the whole BackJack callable.
+    template <typename Rep, typename Period, typename CALLABLE>
+    class WorkQueue::BackJack
+    {
+    public:
+        // bind the desired data
+        BackJack(weak_t target,
+                 const TimePoint& start,
+                 const std::chrono::duration<Rep, Period>& interval,
+                 CALLABLE&& callable):
+            mTarget(target),
+            mStart(start),
+            mInterval(interval),
+            mCallable(std::move(callable))
+        {}
+
+        // Call by target WorkQueue -- note that although WE require a
+        // callable returning bool, WorkQueue wants a void callable. We
+        // consume the bool.
+        void operator()()
+        {
+            // If mCallable() throws an exception, don't catch it here: if it
+            // throws once, it's likely to throw every time, so it's a waste
+            // of time to arrange to call it again.
+            if (mCallable())
+            {
+                // Modify mStart to the new start time we desire. If we simply
+                // added mInterval to now, we'd get actual timings of
+                // (mInterval + slop), where 'slop' is the latency between the
+                // previous mStart and the WorkQueue actually calling us.
+                // Instead, add mInterval to mStart so that at least we
+                // register our intent to fire at exact mIntervals.
+                mStart += mInterval;
+
+                // We're being called at this moment by the target WorkQueue.
+                // Assume it still exists, rather than checking the result of
+                // lock().
+                // Resubmit the whole *this callable: that's why we're a class
+                // rather than a lambda. Allow moving *this so we can carry a
+                // move-only callable; but naturally this statement must be
+                // the last time we reference this instance, which may become
+                // moved-from.
+                try
+                {
+                    mTarget.lock()->post(mStart, std::move(*this));
+                }
+                catch (const Closed&)
+                {
+                    // Once this queue is closed, oh well, just stop
+                }
+            }
+        }
+
+    private:
+        weak_t mTarget;
+        TimePoint mStart;
+        std::chrono::duration<Rep, Period> mInterval;
+        CALLABLE mCallable;
+    };
+
+    template <typename Rep, typename Period, typename CALLABLE>
+    void WorkQueue::postEvery(const std::chrono::duration<Rep, Period>& interval,
+                              CALLABLE&& callable)
+    {
+        if (interval.count() <= 0)
+        {
+            // It's essential that postEvery() be called with a positive
+            // interval, since each call to BackJack posts another instance of
+            // itself at (start + interval) and we order by target time. A
+            // zero or negative interval would result in that BackJack
+            // instance going to the head of the queue every time, immediately
+            // ready to run. Effectively that would produce an infinite loop,
+            // a denial of service on this WorkQueue.
+            error("postEvery(interval) may not be 0");
+        }
+        // Instantiate and post a suitable BackJack, binding a weak_ptr to
+        // self, the current time, the desired interval and the desired
+        // callable.
+        post(
+            BackJack<Rep, Period, CALLABLE>(
+                 getWeak(), TimePoint::clock::now(), interval, std::move(callable)));
+    }
+
+    /// general case: arbitrary C++ return type
+    template <typename CALLABLE, typename FOLLOWUP, typename RETURNTYPE>
+    struct WorkQueue::MakeReplyLambda
+    {
+        auto operator()(CALLABLE&& callable, FOLLOWUP&& callback)
+        {
+            // Call the callable in any case -- but to minimize
+            // copying the result, immediately bind it into the reply
+            // lambda. The reply lambda also binds the original
+            // callback, so that when we, the originating WorkQueue,
+            // finally receive and process the reply lambda, we'll
+            // call the bound callback with the bound result -- on the
+            // same thread that originally called postTo().
+            return
+                [result = std::forward<CALLABLE>(callable)(),
+                 callback = std::move(callback)]
+                ()
+                { callback(std::move(result)); };
+        }
+    };
+
+    /// specialize for CALLABLE returning void
+    template <typename CALLABLE, typename FOLLOWUP>
+    struct WorkQueue::MakeReplyLambda<CALLABLE, FOLLOWUP, void>
+    {
+        auto operator()(CALLABLE&& callable, FOLLOWUP&& callback)
+        {
+            // Call the callable, which produces no result.
+            std::forward<CALLABLE>(callable)();
+            // Our completion callback is simply the caller's callback.
+            return std::move(callback);
+        }
+    };
+
+    template <typename CALLABLE, typename FOLLOWUP>
+    auto WorkQueue::makeReplyLambda(CALLABLE&& callable, FOLLOWUP&& callback)
+    {
+        return MakeReplyLambda<CALLABLE, FOLLOWUP,
+                               decltype(std::forward<CALLABLE>(callable)())>()
+            (std::move(callable), std::move(callback));
+    }
+
+    template <typename CALLABLE, typename FOLLOWUP>
+    bool WorkQueue::postTo(weak_t target,
+                           const TimePoint& time, CALLABLE&& callable, FOLLOWUP&& callback)
+    {
+        LL_PROFILE_ZONE_SCOPED;
+        // We're being asked to post to the WorkQueue at target.
+        // target is a weak_ptr: have to lock it to check it.
+        auto tptr = target.lock();
+        if (! tptr)
+            // can't post() if the target WorkQueue has been destroyed
+            return false;
+
+        // Here we believe target WorkQueue still exists. Post to it a
+        // lambda that packages our callable, our callback and a weak_ptr
+        // to this originating WorkQueue.
+        tptr->post(
+            time,
+            [reply = super::getWeak(),
+             callable = std::move(callable),
+             callback = std::move(callback)]
+            ()
+            {
+                // Use postMaybe() below in case this originating WorkQueue
+                // has been closed or destroyed. Remember, the outer lambda is
+                // now running on a thread servicing the target WorkQueue, and
+                // real time has elapsed since postTo()'s tptr->post() call.
+                try
+                {
+                    // Make a reply lambda to repost to THIS WorkQueue.
+                    // Delegate to makeReplyLambda() so we can partially
+                    // specialize on void return.
+                    postMaybe(reply, makeReplyLambda(std::move(callable), std::move(callback)));
+                }
+                catch (...)
+                {
+                    // Either variant of makeReplyLambda() is responsible for
+                    // calling the caller's callable. If that throws, return
+                    // the exception to the originating thread.
+                    postMaybe(
+                        reply,
+                        // Bind the current exception to transport back to the
+                        // originating WorkQueue. Once there, rethrow it.
+                        [exc = std::current_exception()](){ std::rethrow_exception(exc); });
+                }
+            });
+
+        // looks like we were able to post()
+        return true;
+    }
+
+    template <typename CALLABLE>
+    bool WorkQueue::postMaybe(weak_t target, const TimePoint& time, CALLABLE&& callable)
+    {
+        LL_PROFILE_ZONE_SCOPED;
+        // target is a weak_ptr: have to lock it to check it
+        auto tptr = target.lock();
+        if (tptr)
+        {
+            try
+            {
+                tptr->post(time, std::forward<CALLABLE>(callable));
+                // we were able to post()
+                return true;
+            }
+            catch (const Closed&)
+            {
+                // target WorkQueue still exists, but is Closed
+            }
+        }
+        // either target no longer exists, or its WorkQueue is Closed
+        return false;
+    }
+
+    /// general case: arbitrary C++ return type
+    template <typename CALLABLE, typename RETURNTYPE>
+    struct WorkQueue::WaitForResult
+    {
+        auto operator()(WorkQueue* self, const TimePoint& time, CALLABLE&& callable)
+        {
+            LLCoros::Promise<RETURNTYPE> promise;
+            self->post(
+                time,
+                // We dare to bind a reference to Promise because it's
+                // specifically designed for cross-thread communication.
+                [&promise, callable = std::move(callable)]()
+                {
+                    try
+                    {
+                        // call the caller's callable and trigger promise with result
+                        promise.set_value(callable());
+                    }
+                    catch (...)
+                    {
+                        promise.set_exception(std::current_exception());
+                    }
+                });
+            auto future{ LLCoros::getFuture(promise) };
+            // now, on the calling thread, wait for that result
+            LLCoros::TempStatus st("waiting for WorkQueue::waitForResult()");
+            return future.get();
+        }
+    };
+
+    /// specialize for CALLABLE returning void
+    template <typename CALLABLE>
+    struct WorkQueue::WaitForResult<CALLABLE, void>
+    {
+        void operator()(WorkQueue* self, const TimePoint& time, CALLABLE&& callable)
+        {
+            LLCoros::Promise<void> promise;
+            self->post(
+                time,
+                // &promise is designed for cross-thread access
+                [&promise, callable = std::move(callable)]()
+                {
+                    try
+                    {
+                        callable();
+                        promise.set_value();
+                    }
+                    catch (...)
+                    {
+                        promise.set_exception(std::current_exception());
+                    }
+                });
+            auto future{ LLCoros::getFuture(promise) };
+            // block until set_value()
+            LLCoros::TempStatus st("waiting for void WorkQueue::waitForResult()");
+            future.get();
+        }
+    };
+
+    template <typename CALLABLE>
+    auto WorkQueue::waitForResult(const TimePoint& time, CALLABLE&& callable)
+    {
+        checkCoroutine("waitForResult()");
+        // derive callable's return type so we can specialize for void
+        return WaitForResult<CALLABLE, decltype(std::forward<CALLABLE>(callable)())>()
+            (this, time, std::forward<CALLABLE>(callable));
+    }
+
+} // namespace LL
+
+#endif /* ! defined(LL_WORKQUEUE_H) */
--- a/indra/llimage/llimage.cpp
+++ b/indra/llimage/llimage.cpp
@ -623,8 +623,7 @@ void LLImage::setLastError(const std::string& message)
 //---------------------------------------------------------------------------

 LLImageBase::LLImageBase()
-:	LLTrace::MemTrackable<LLImageBase>("LLImage"),
-	mData(NULL),
+:	mData(NULL),
 	mDataSize(0),
 	mWidth(0),
 	mHeight(0),
@ -673,7 +672,6 @@ void LLImageBase::sanityCheck()
 void LLImageBase::deleteData()
 {
 	ll_aligned_free_16(mData);
-	disclaimMem(mDataSize);
 	mDataSize = 0;
 	mData = NULL;
 }
@ -731,7 +729,6 @@ U8* LLImageBase::allocateData(S32 size)
 		}
 	}
 	mDataSize = size;
-	claimMem(mDataSize);

 	return mData;
 }
@ -752,9 +749,7 @@ U8* LLImageBase::reallocateData(S32 size)
 		ll_aligned_free_16(mData) ;
 	}
 	mData = new_datap;
-	disclaimMem(mDataSize);
 	mDataSize = size;
-	claimMem(mDataSize);
 	mBadBufferAllocation = false;
 	return mData;
 }
@ -865,6 +860,12 @@ U8* LLImageRaw::reallocateData(S32 size)
 	return res;
 }

+void LLImageRaw::releaseData()
+{
+    LLImageBase::setSize(0, 0, 0);
+    LLImageBase::setDataAndSize(nullptr, 0);
+}
+
 // virtual
 void LLImageRaw::deleteData()
 {
@ -883,8 +884,6 @@ void LLImageRaw::setDataAndSize(U8 *data, S32 width, S32 height, S8 components)

 	LLImageBase::setSize(width, height, components) ;
 	LLImageBase::setDataAndSize(data, width * height * components) ;
-	
-	sGlobalRawMemory += getDataSize();
 }

 bool LLImageRaw::resize(U16 width, U16 height, S8 components)
@ -2254,9 +2253,7 @@ void LLImageBase::setDataAndSize(U8 *data, S32 size)
 { 
 	ll_assert_aligned(data, 16);
 	mData = data; 
-	disclaimMem(mDataSize); 
 	mDataSize = size; 
-	claimMem(mDataSize);
 }	

 //static
--- a/indra/llimage/llimage.h
+++ b/indra/llimage/llimage.h
@ -112,8 +112,7 @@ protected:
 // Image base class

 class LLImageBase 
-:	public LLThreadSafeRefCount,
-	public LLTrace::MemTrackable<LLImageBase>
+:	public LLThreadSafeRefCount
 {
 protected:
 	virtual ~LLImageBase();
@ -192,6 +191,12 @@ public:
 	/*virtual*/ void deleteData();
 	/*virtual*/ U8* allocateData(S32 size = -1);
 	/*virtual*/ U8* reallocateData(S32 size);
+
+    // use in conjunction with "no_copy" constructor to release data pointer before deleting
+    // so that deletion of this LLImageRaw will not free the memory at the "data" parameter 
+    // provided to "no_copy" constructor
+    void releaseData();
+
 	
 	bool resize(U16 width, U16 height, S8 components);

--- a/indra/llimage/llimagej2c.cpp
+++ b/indra/llimage/llimagej2c.cpp
@ -60,7 +60,6 @@ LLImageJ2C::LLImageJ2C() : 	LLImageFormatted(IMG_CODEC_J2C),
 							mAreaUsedForDataSizeCalcs(0)
 {
 	mImpl.reset(fallbackCreateLLImageJ2CImpl());
-	claimMem(mImpl);

 	// Clear data size table
 	for( S32 i = 0; i <= MAX_DISCARD_LEVEL; i++)
--- a/Show More
+++ b/Show More