SL-18154 Profile guided optimizations -- remove some unneeded operations and make LLDrawPoolMaterials less branchy.

master
Dave Parks 2022-11-17 13:35:39 -06:00
parent d539550252
commit e2d1af5c4f
7 changed files with 228 additions and 112 deletions

View File

@ -1215,8 +1215,18 @@ void LLGLSLShader::uniform1f(U32 index, GLfloat x)
}
}
void LLGLSLShader::fastUniform1f(U32 index, GLfloat x)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
llassert(mProgramObject);
llassert(mUniform.size() <= index);
llassert(mUniform[index] >= 0);
glUniform1f(mUniform[index], x);
}
void LLGLSLShader::uniform2f(U32 index, GLfloat x, GLfloat y)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1240,6 +1250,7 @@ void LLGLSLShader::uniform2f(U32 index, GLfloat x, GLfloat y)
void LLGLSLShader::uniform3f(U32 index, GLfloat x, GLfloat y, GLfloat z)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1263,6 +1274,7 @@ void LLGLSLShader::uniform3f(U32 index, GLfloat x, GLfloat y, GLfloat z)
void LLGLSLShader::uniform4f(U32 index, GLfloat x, GLfloat y, GLfloat z, GLfloat w)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1286,6 +1298,7 @@ void LLGLSLShader::uniform4f(U32 index, GLfloat x, GLfloat y, GLfloat z, GLfloat
void LLGLSLShader::uniform1iv(U32 index, U32 count, const GLint* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1309,6 +1322,7 @@ void LLGLSLShader::uniform1iv(U32 index, U32 count, const GLint* v)
void LLGLSLShader::uniform4iv(U32 index, U32 count, const GLint* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1333,6 +1347,7 @@ void LLGLSLShader::uniform4iv(U32 index, U32 count, const GLint* v)
void LLGLSLShader::uniform1fv(U32 index, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1356,6 +1371,7 @@ void LLGLSLShader::uniform1fv(U32 index, U32 count, const GLfloat* v)
void LLGLSLShader::uniform2fv(U32 index, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1379,6 +1395,7 @@ void LLGLSLShader::uniform2fv(U32 index, U32 count, const GLfloat* v)
void LLGLSLShader::uniform3fv(U32 index, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1402,6 +1419,7 @@ void LLGLSLShader::uniform3fv(U32 index, U32 count, const GLfloat* v)
void LLGLSLShader::uniform4fv(U32 index, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1426,6 +1444,7 @@ void LLGLSLShader::uniform4fv(U32 index, U32 count, const GLfloat* v)
void LLGLSLShader::uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1443,6 +1462,7 @@ void LLGLSLShader::uniformMatrix2fv(U32 index, U32 count, GLboolean transpose, c
void LLGLSLShader::uniformMatrix3fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1479,6 +1499,7 @@ void LLGLSLShader::uniformMatrix3x4fv(U32 index, U32 count, GLboolean transpose,
void LLGLSLShader::uniformMatrix4fv(U32 index, U32 count, GLboolean transpose, const GLfloat *v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
if (mProgramObject)
{
if (mUniform.size() <= index)
@ -1554,6 +1575,7 @@ GLint LLGLSLShader::getAttribLocation(U32 attrib)
void LLGLSLShader::uniform1i(const LLStaticHashedString& uniform, GLint v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1570,6 +1592,7 @@ void LLGLSLShader::uniform1i(const LLStaticHashedString& uniform, GLint v)
void LLGLSLShader::uniform1iv(const LLStaticHashedString& uniform, U32 count, const GLint* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1587,6 +1610,7 @@ void LLGLSLShader::uniform1iv(const LLStaticHashedString& uniform, U32 count, co
void LLGLSLShader::uniform4iv(const LLStaticHashedString& uniform, U32 count, const GLint* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1604,6 +1628,7 @@ void LLGLSLShader::uniform4iv(const LLStaticHashedString& uniform, U32 count, co
void LLGLSLShader::uniform2i(const LLStaticHashedString& uniform, GLint i, GLint j)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1621,6 +1646,7 @@ void LLGLSLShader::uniform2i(const LLStaticHashedString& uniform, GLint i, GLint
void LLGLSLShader::uniform1f(const LLStaticHashedString& uniform, GLfloat v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1637,6 +1663,7 @@ void LLGLSLShader::uniform1f(const LLStaticHashedString& uniform, GLfloat v)
void LLGLSLShader::uniform2f(const LLStaticHashedString& uniform, GLfloat x, GLfloat y)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1654,6 +1681,7 @@ void LLGLSLShader::uniform2f(const LLStaticHashedString& uniform, GLfloat x, GLf
void LLGLSLShader::uniform3f(const LLStaticHashedString& uniform, GLfloat x, GLfloat y, GLfloat z)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1670,6 +1698,7 @@ void LLGLSLShader::uniform3f(const LLStaticHashedString& uniform, GLfloat x, GLf
void LLGLSLShader::uniform1fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1686,6 +1715,7 @@ void LLGLSLShader::uniform1fv(const LLStaticHashedString& uniform, U32 count, co
void LLGLSLShader::uniform2fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1702,6 +1732,7 @@ void LLGLSLShader::uniform2fv(const LLStaticHashedString& uniform, U32 count, co
void LLGLSLShader::uniform3fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1718,6 +1749,7 @@ void LLGLSLShader::uniform3fv(const LLStaticHashedString& uniform, U32 count, co
void LLGLSLShader::uniform4fv(const LLStaticHashedString& uniform, U32 count, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1735,6 +1767,7 @@ void LLGLSLShader::uniform4fv(const LLStaticHashedString& uniform, U32 count, co
void LLGLSLShader::uniformMatrix4fv(const LLStaticHashedString& uniform, U32 count, GLboolean transpose, const GLfloat* v)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
GLint location = getUniformLocation(uniform);
if (location >= 0)
@ -1764,6 +1797,7 @@ void LLGLSLShader::vertexAttrib4fv(U32 index, GLfloat* v)
void LLGLSLShader::setMinimumAlpha(F32 minimum)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_SHADER;
gGL.flush();
uniform1f(LLShaderMgr::MINIMUM_ALPHA, minimum);
}

View File

@ -181,6 +181,7 @@ public:
void mapUniform(GLint index, const std::vector<LLStaticHashedString> *);
void uniform1i(U32 index, GLint i);
void uniform1f(U32 index, GLfloat v);
void fastUniform1f(U32 index, GLfloat v);
void uniform2f(U32 index, GLfloat x, GLfloat y);
void uniform3f(U32 index, GLfloat x, GLfloat y, GLfloat z);
void uniform4f(U32 index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);

View File

@ -438,6 +438,47 @@ LLWindowWin32::LLWindowWin32(LLWindowCallbacks* callbacks,
//MAINT-516 -- force a load of opengl32.dll just in case windows went sideways
LoadLibrary(L"opengl32.dll");
#if 0 // this is probably a bad idea, but keep it in your back pocket if you see what looks like
// process deprioritization during profiles
// force high thread priority
HANDLE hProcess = GetCurrentProcess();
HANDLE hThread = GetCurrentThread();
if (hProcess)
{
int priority = GetPriorityClass(hProcess);
if (priority < REALTIME_PRIORITY_CLASS)
{
if (SetPriorityClass(hProcess, REALTIME_PRIORITY_CLASS))
{
LL_INFOS() << "Set process priority to REALTIME_PRIORITY_CLASS" << LL_ENDL;
}
else
{
LL_INFOS() << "Failed to set process priority: " << std::hex << GetLastError() << LL_ENDL;
}
}
}
if (hThread)
{
int priority = GetThreadPriority(hThread);
if (priority < THREAD_PRIORITY_TIME_CRITICAL)
{
if (SetThreadPriority(hThread, THREAD_PRIORITY_TIME_CRITICAL))
{
LL_INFOS() << "Set thread priority to THREAD_PRIORITY_TIME_CRITICAL" << LL_ENDL;
}
else
{
LL_INFOS() << "Failed to set thread priority: " << std::hex << GetLastError() << LL_ENDL;
}
}
}
#endif
mFSAASamples = fsaa_samples;
mIconResource = gIconResource;
mOverrideAspectRatio = 0.f;
@ -3554,7 +3595,7 @@ BOOL LLWindowWin32::setDisplayResolution(S32 width, S32 height, S32 bits, S32 re
// Don't change anything if we don't have to
if (EnumDisplaySettings(NULL, ENUM_CURRENT_SETTINGS, &dev_mode))
{
if (dev_mode.dmPelsWidth == width &&
if (dev_mode.dmPelsWidth == width &&
dev_mode.dmPelsHeight == height &&
dev_mode.dmBitsPerPel == bits &&
dev_mode.dmDisplayFrequency == refresh )
@ -3620,15 +3661,15 @@ BOOL LLWindowWin32::resetDisplayResolution()
void LLWindowWin32::swapBuffers()
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_WIN32;
ASSERT_MAIN_THREAD();
{
LL_PROFILE_GPU_ZONE("flush");
glFlush(); //superstitious flush for maybe frame stall removal?
LL_PROFILE_ZONE_SCOPED_CATEGORY_WIN32;
SwapBuffers(mhDC);
}
SwapBuffers(mhDC);
LL_PROFILER_GPU_COLLECT;
{
LL_PROFILE_ZONE_NAMED_CATEGORY_WIN32("GPU Collect");
LL_PROFILER_GPU_COLLECT;
}
}

View File

@ -459,13 +459,11 @@ bool LLDrawPoolAlpha::TexSetup(LLDrawInfo* draw, bool use_material)
{
if (draw->mNormalMap)
{
draw->mNormalMap->addTextureStats(draw->mVSize);
current_shader->bindTexture(LLShaderMgr::BUMP_MAP, draw->mNormalMap);
}
if (draw->mSpecularMap)
{
draw->mSpecularMap->addTextureStats(draw->mVSize);
current_shader->bindTexture(LLShaderMgr::SPECULAR_MAP, draw->mSpecularMap);
}
}

View File

@ -164,112 +164,154 @@ void LLDrawPoolMaterials::renderDeferred(S32 pass)
LLCullResult::drawinfo_iterator begin = gPipeline.beginRenderMap(type);
LLCullResult::drawinfo_iterator end = gPipeline.endRenderMap(type);
for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i)
{
LLDrawInfo& params = **i;
mShader->uniform4f(LLShaderMgr::SPECULAR_COLOR, params.mSpecColor.mV[0], params.mSpecColor.mV[1], params.mSpecColor.mV[2], params.mSpecColor.mV[3]);
mShader->uniform1f(LLShaderMgr::ENVIRONMENT_INTENSITY, params.mEnvIntensity);
if (params.mNormalMap)
{
params.mNormalMap->addTextureStats(params.mVSize);
bindNormalMap(params.mNormalMap);
}
if (params.mSpecularMap)
{
params.mSpecularMap->addTextureStats(params.mVSize);
bindSpecularMap(params.mSpecularMap);
}
mShader->setMinimumAlpha(params.mAlphaMaskCutoff);
mShader->uniform1f(LLShaderMgr::EMISSIVE_BRIGHTNESS, params.mFullbright ? 1.f : 0.f);
F32 lastIntensity = 0.f;
F32 lastFullbright = 0.f;
F32 lastMinimumAlpha = 0.f;
LLVector4 lastSpecular = LLVector4(0, 0, 0, 0);
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL;
pushMaterialsBatch(params, mask, rigged);
}
}
}
GLint intensity = mShader->getUniformLocation(LLShaderMgr::ENVIRONMENT_INTENSITY);
GLint brightness = mShader->getUniformLocation(LLShaderMgr::EMISSIVE_BRIGHTNESS);
GLint minAlpha = mShader->getUniformLocation(LLShaderMgr::MINIMUM_ALPHA);
GLint specular = mShader->getUniformLocation(LLShaderMgr::SPECULAR_COLOR);
void LLDrawPoolMaterials::bindSpecularMap(LLViewerTexture* tex)
{
mShader->bindTexture(LLShaderMgr::SPECULAR_MAP, tex);
}
GLint specChannel = mShader->getUniformLocation(LLShaderMgr::SPECULAR_MAP);
GLint normChannel = mShader->getUniformLocation(LLShaderMgr::BUMP_MAP);
void LLDrawPoolMaterials::bindNormalMap(LLViewerTexture* tex)
{
mShader->bindTexture(LLShaderMgr::BUMP_MAP, tex);
}
LLTexture* lastNormalMap = nullptr;
LLTexture* lastSpecMap = nullptr;
LLTexture* lastDiffuse = nullptr;
void LLDrawPoolMaterials::pushMaterialsBatch(LLDrawInfo& params, U32 mask, bool rigged)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_MATERIAL;
applyModelMatrix(params);
bool tex_setup = false;
//not batching textures or batch has only 1 texture -- might need a texture matrix
if (params.mTextureMatrix)
{
//if (mShiny)
{
gGL.getTexUnit(0)->activate();
gGL.matrixMode(LLRender::MM_TEXTURE);
}
gGL.loadMatrix((GLfloat*) params.mTextureMatrix->mMatrix);
gPipeline.mTextureMatrixOps++;
tex_setup = true;
}
if (mShaderLevel > 1)
{
if (params.mTexture.notNull())
{
gGL.getTexUnit(diffuse_channel)->bindFast(params.mTexture);
}
else
{
gGL.getTexUnit(diffuse_channel)->unbindFast(LLTexUnit::TT_TEXTURE);
}
}
if (params.mGroup)
{
params.mGroup->rebuildMesh();
}
gGL.getTexUnit(diffuse_channel)->unbindFast(LLTexUnit::TT_TEXTURE);
// upload matrix palette to shader
if (rigged && params.mAvatar.notNull())
if (intensity > -1)
{
const LLVOAvatar::MatrixPaletteCache& mpc = params.mAvatar->updateSkinInfoMatrixPalette(params.mSkinInfo);
U32 count = mpc.mMatrixPalette.size();
if (count == 0)
{
//skin info not loaded yet, don't render
return;
}
mShader->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
count,
FALSE,
(GLfloat*)&(mpc.mGLMp[0]));
glUniform1f(intensity, lastIntensity);
}
//LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
if (brightness > -1)
{
glUniform1f(brightness, lastFullbright);
}
params.mVertexBuffer->setBufferFast(mask);
params.mVertexBuffer->drawRangeFast(LLRender::TRIANGLES, params.mStart, params.mEnd, params.mCount, params.mOffset);
if (minAlpha > -1)
{
glUniform1f(minAlpha, lastMinimumAlpha);
}
if (tex_setup)
if (specular > -1)
{
glUniform4fv(specular, 1, lastSpecular.mV);
}
LLVOAvatar* lastAvatar = nullptr;
for (LLCullResult::drawinfo_iterator i = begin; i != end; ++i)
{
gGL.getTexUnit(0)->activate();
gGL.loadIdentity();
gGL.matrixMode(LLRender::MM_MODELVIEW);
LL_PROFILE_ZONE_NAMED_CATEGORY_MATERIAL("materials draw loop");
LLDrawInfo& params = **i;
if (specular > -1 && params.mSpecColor != lastSpecular)
{
lastSpecular = params.mSpecColor;
glUniform4fv(specular, 1, lastSpecular.mV);
}
if (intensity != -1 && lastIntensity != params.mEnvIntensity)
{
lastIntensity = params.mEnvIntensity;
glUniform1f(intensity, lastIntensity);
}
if (minAlpha > -1 && lastMinimumAlpha != params.mAlphaMaskCutoff)
{
lastMinimumAlpha = params.mAlphaMaskCutoff;
glUniform1f(minAlpha, lastMinimumAlpha);
}
F32 fullbright = params.mFullbright ? 1.f : 0.f;
if (brightness > -1 && lastFullbright != fullbright)
{
lastFullbright = fullbright;
glUniform1f(brightness, lastFullbright);
}
if (normChannel > -1 && params.mNormalMap != lastNormalMap)
{
lastNormalMap = params.mNormalMap;
llassert(lastNormalMap);
gGL.getTexUnit(normChannel)->bindFast(lastNormalMap);
}
if (specChannel > -1 && params.mSpecularMap != lastSpecMap)
{
lastSpecMap = params.mSpecularMap;
llassert(lastSpecMap);
gGL.getTexUnit(specChannel)->bindFast(lastSpecMap);
}
if (params.mTexture != lastDiffuse)
{
lastDiffuse = params.mTexture;
if (lastDiffuse)
{
gGL.getTexUnit(diffuse_channel)->bindFast(lastDiffuse);
}
else
{
gGL.getTexUnit(diffuse_channel)->unbindFast(LLTexUnit::TT_TEXTURE);
}
}
// upload matrix palette to shader
if (rigged && params.mAvatar.notNull())
{
if (params.mAvatar != lastAvatar)
{
const LLVOAvatar::MatrixPaletteCache& mpc = params.mAvatar->updateSkinInfoMatrixPalette(params.mSkinInfo);
U32 count = mpc.mMatrixPalette.size();
if (count == 0)
{
//skin info not loaded yet, don't render
return;
}
mShader->uniformMatrix3x4fv(LLViewerShaderMgr::AVATAR_MATRIX,
count,
FALSE,
(GLfloat*)&(mpc.mGLMp[0]));
}
}
applyModelMatrix(params);
bool tex_setup = false;
//not batching textures or batch has only 1 texture -- might need a texture matrix
if (params.mTextureMatrix)
{
gGL.getTexUnit(0)->activate();
gGL.matrixMode(LLRender::MM_TEXTURE);
gGL.loadMatrix((GLfloat*)params.mTextureMatrix->mMatrix);
gPipeline.mTextureMatrixOps++;
tex_setup = true;
}
/*if (params.mGroup) // TOO LATE
{
params.mGroup->rebuildMesh();
}*/
params.mVertexBuffer->setBufferFast(mask);
params.mVertexBuffer->drawRangeFast(LLRender::TRIANGLES, params.mStart, params.mEnd, params.mCount, params.mOffset);
if (tex_setup)
{
gGL.getTexUnit(0)->activate();
gGL.loadIdentity();
gGL.matrixMode(LLRender::MM_MODELVIEW);
}
}
}

View File

@ -65,11 +65,6 @@ public:
void beginDeferredPass(S32 pass) override;
void endDeferredPass(S32 pass) override;
void renderDeferred(S32 pass) override;
void bindSpecularMap(LLViewerTexture* tex);
void bindNormalMap(LLViewerTexture* tex);
void pushMaterialsBatch(LLDrawInfo& params, U32 mask, bool rigged);
};
#endif //LL_LLDRAWPOOLMATERIALS_H

View File

@ -525,6 +525,11 @@ void LLVOSky::calc()
void LLVOSky::initCubeMap()
{
if (LLPipeline::sReflectionProbesEnabled)
{
return;
}
std::vector<LLPointer<LLImageRaw> > images;
for (S32 side = 0; side < NUM_CUBEMAP_FACES; side++)
{
@ -715,7 +720,7 @@ bool LLVOSky::updateSky()
mForceUpdate = FALSE;
}
}
else if (mCubeMapUpdateStage == NUM_CUBEMAP_FACES)
else if (mCubeMapUpdateStage == NUM_CUBEMAP_FACES && !LLPipeline::sReflectionProbesEnabled)
{
LL_PROFILE_ZONE_NAMED("updateSky - forced");
LLSkyTex::stepCurrent();
@ -776,7 +781,7 @@ bool LLVOSky::updateSky()
mCubeMapUpdateStage = -1;
}
// run 0 to 5 faces, each face in own frame
else if (mCubeMapUpdateStage >= 0 && mCubeMapUpdateStage < NUM_CUBEMAP_FACES)
else if (mCubeMapUpdateStage >= 0 && mCubeMapUpdateStage < NUM_CUBEMAP_FACES && !LLPipeline::sReflectionProbesEnabled)
{
LL_PROFILE_ZONE_NAMED("updateSky - create");
S32 side = mCubeMapUpdateStage;