SL-16166 Optimization pass on LLRenderPass::pushBatch

master
Runitai Linden 2021-10-12 14:46:19 -05:00
parent 75cf90723f
commit 52e1a45659
14 changed files with 247 additions and 54 deletions

View File

@ -176,7 +176,7 @@ private:
protected:
void setTexelsPerImage();
//note: do not make this function public.
public:
/*virtual*/ LLImageGL* getGLTexture() const ;
protected:

View File

@ -169,6 +169,7 @@ void LLTexUnit::refreshState(void)
void LLTexUnit::activate(void)
{
LL_PROFILE_ZONE_SCOPED;
if (mIndex < 0) return;
if ((S32)gGL.mCurrTextureUnitIndex != mIndex || gGL.mDirty)
@ -229,8 +230,20 @@ void LLTexUnit::disable(void)
}
}
void LLTexUnit::bindFast(LLTexture* texture)
{
LLImageGL* gl_tex = texture->getGLTexture();
glActiveTextureARB(GL_TEXTURE0_ARB + mIndex);
gGL.mCurrTextureUnitIndex = mIndex;
mCurrTexture = gl_tex->getTexName();
glBindTexture(sGLTextureType[gl_tex->getTarget()], mCurrTexture);
mHasMipMaps = gl_tex->mHasMipMaps;
}
bool LLTexUnit::bind(LLTexture* texture, bool for_rendering, bool forceBind)
{
LL_PROFILE_ZONE_SCOPED;
stop_glerror();
if (mIndex >= 0)
{
@ -1243,8 +1256,6 @@ void LLRender::syncLightState()
void LLRender::syncMatrices()
{
stop_glerror();
static const U32 name[] =
{
LLShaderMgr::MODELVIEW_MATRIX,
@ -1415,8 +1426,6 @@ void LLRender::syncMatrices()
}
}
}
stop_glerror();
}
void LLRender::translatef(const GLfloat& x, const GLfloat& y, const GLfloat& z)
@ -1925,6 +1934,7 @@ void LLRender::end()
}
void LLRender::flush()
{
LL_PROFILE_ZONE_SCOPED;
if (mCount > 0)
{
if (!mUIOffset.empty())

View File

@ -161,6 +161,17 @@ public:
bool bind(LLImageGL* texture, bool for_rendering = false, bool forceBind = false);
bool bind(LLTexture* texture, bool for_rendering = false, bool forceBind = false);
// bind implementation for inner loops
// makes the following assumptions:
// - No need for gGL.flush()
// - texture is not null
// - gl_tex->getTexName() is not zero
// - This texture is not being bound redundantly
// - USE_SRGB_DECODE is disabled
// - mTexOptionsDirty is false
// -
void bindFast(LLTexture* texture);
// Binds a cubemap to this texture unit
// (automatically enables the texture unit for cubemaps)
bool bind(LLCubeMap* cubeMap);

View File

@ -67,11 +67,9 @@ public:
virtual S32 getWidth(S32 discard_level = -1) const;
virtual S32 getHeight(S32 discard_level = -1) const;
virtual bool isActiveFetching();
virtual LLImageGL* getGLTexture() const;
private:
//note: do not make this function public.
virtual LLImageGL* getGLTexture() const;
virtual void updateBindStatsForTester();
};
#endif

View File

@ -787,6 +787,18 @@ void LLVertexBuffer::drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indi
placeFence();
}
void LLVertexBuffer::drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const
{
mMappable = false;
gGL.syncMatrices();
U16* idx = ((U16*)(U8*)mAlignedIndexOffset) + indices_offset;
LL_PROFILER_GPU_ZONEC("gl.DrawRangeElements", 0xFFFF00)
glDrawRangeElements(sGLMode[mode], start, end, count, GL_UNSIGNED_SHORT,
idx);
}
void LLVertexBuffer::draw(U32 mode, U32 count, U32 indices_offset) const
{
llassert(!LLGLSLShader::sNoFixedFunction || LLGLSLShader::sCurBoundShaderPtr != NULL);
@ -2272,6 +2284,21 @@ bool LLVertexBuffer::bindGLBuffer(bool force_bind)
return ret;
}
bool LLVertexBuffer::bindGLBufferFast()
{
if (mGLBuffer != sGLRenderBuffer || !sVBOActive)
{
glBindBufferARB(GL_ARRAY_BUFFER_ARB, mGLBuffer);
sGLRenderBuffer = mGLBuffer;
sBindCount++;
sVBOActive = true;
return true;
}
return false;
}
static LLTrace::BlockTimerStatHandle FTM_BIND_GL_INDICES("Bind Indices");
bool LLVertexBuffer::bindGLIndices(bool force_bind)
@ -2297,6 +2324,21 @@ bool LLVertexBuffer::bindGLIndices(bool force_bind)
return ret;
}
bool LLVertexBuffer::bindGLIndicesFast()
{
if (mGLIndices != sGLRenderIndices || !sIBOActive)
{
glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, mGLIndices);
sGLRenderIndices = mGLIndices;
sBindCount++;
sIBOActive = true;
return true;
}
return false;
}
void LLVertexBuffer::flush()
{
if (useVBOs())
@ -2487,6 +2529,26 @@ void LLVertexBuffer::setBuffer(U32 data_mask)
}
}
void LLVertexBuffer::setBufferFast(U32 data_mask)
{
//set up pointers if the data mask is different ...
bool setup = (sLastMask != data_mask);
const bool bindBuffer = bindGLBufferFast();
const bool bindIndices = bindGLIndicesFast();
setup = setup || bindBuffer || bindIndices;
setupClientArrays(data_mask);
if (data_mask && setup)
{
setupVertexBuffer(data_mask); // subclass specific setup (virtual function)
sSetCount++;
}
}
// virtual (default)
void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
{
@ -2644,6 +2706,99 @@ void LLVertexBuffer::setupVertexBuffer(U32 data_mask)
llglassertok();
}
void LLVertexBuffer::setupVertexBufferFast(U32 data_mask)
{
U8* base = (U8*)mAlignedOffset;
if (data_mask & MAP_NORMAL)
{
S32 loc = TYPE_NORMAL;
void* ptr = (void*)(base + mOffsets[TYPE_NORMAL]);
glVertexAttribPointerARB(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_NORMAL], ptr);
}
if (data_mask & MAP_TEXCOORD3)
{
S32 loc = TYPE_TEXCOORD3;
void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD3]);
glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD3], ptr);
}
if (data_mask & MAP_TEXCOORD2)
{
S32 loc = TYPE_TEXCOORD2;
void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD2]);
glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD2], ptr);
}
if (data_mask & MAP_TEXCOORD1)
{
S32 loc = TYPE_TEXCOORD1;
void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD1]);
glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD1], ptr);
}
if (data_mask & MAP_TANGENT)
{
S32 loc = TYPE_TANGENT;
void* ptr = (void*)(base + mOffsets[TYPE_TANGENT]);
glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TANGENT], ptr);
}
if (data_mask & MAP_TEXCOORD0)
{
S32 loc = TYPE_TEXCOORD0;
void* ptr = (void*)(base + mOffsets[TYPE_TEXCOORD0]);
glVertexAttribPointerARB(loc, 2, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_TEXCOORD0], ptr);
}
if (data_mask & MAP_COLOR)
{
S32 loc = TYPE_COLOR;
//bind emissive instead of color pointer if emissive is present
void* ptr = (data_mask & MAP_EMISSIVE) ? (void*)(base + mOffsets[TYPE_EMISSIVE]) : (void*)(base + mOffsets[TYPE_COLOR]);
glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_COLOR], ptr);
}
if (data_mask & MAP_EMISSIVE)
{
S32 loc = TYPE_EMISSIVE;
void* ptr = (void*)(base + mOffsets[TYPE_EMISSIVE]);
glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
if (!(data_mask & MAP_COLOR))
{ //map emissive to color channel when color is not also being bound to avoid unnecessary shader swaps
loc = TYPE_COLOR;
glVertexAttribPointerARB(loc, 4, GL_UNSIGNED_BYTE, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_EMISSIVE], ptr);
}
}
if (data_mask & MAP_WEIGHT)
{
S32 loc = TYPE_WEIGHT;
void* ptr = (void*)(base + mOffsets[TYPE_WEIGHT]);
glVertexAttribPointerARB(loc, 1, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_WEIGHT], ptr);
}
if (data_mask & MAP_WEIGHT4)
{
S32 loc = TYPE_WEIGHT4;
void* ptr = (void*)(base + mOffsets[TYPE_WEIGHT4]);
glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_WEIGHT4], ptr);
}
if (data_mask & MAP_CLOTHWEIGHT)
{
S32 loc = TYPE_CLOTHWEIGHT;
void* ptr = (void*)(base + mOffsets[TYPE_CLOTHWEIGHT]);
glVertexAttribPointerARB(loc, 4, GL_FLOAT, GL_TRUE, LLVertexBuffer::sTypeSize[TYPE_CLOTHWEIGHT], ptr);
}
if (data_mask & MAP_TEXTURE_INDEX)
{
#if !LL_DARWIN
S32 loc = TYPE_TEXTURE_INDEX;
void* ptr = (void*)(base + mOffsets[TYPE_VERTEX] + 12);
glVertexAttribIPointer(loc, 1, GL_UNSIGNED_INT, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
#endif
}
if (data_mask & MAP_VERTEX)
{
S32 loc = TYPE_VERTEX;
void* ptr = (void*)(base + mOffsets[TYPE_VERTEX]);
glVertexAttribPointerARB(loc, 3, GL_FLOAT, GL_FALSE, LLVertexBuffer::sTypeSize[TYPE_VERTEX], ptr);
}
}
LLVertexBuffer::MappedRegion::MappedRegion(S32 type, S32 index, S32 count)
: mType(type), mIndex(index), mCount(count)
{

View File

@ -210,13 +210,17 @@ protected:
virtual ~LLVertexBuffer(); // use unref()
virtual void setupVertexBuffer(U32 data_mask); // pure virtual, called from mapBuffer()
virtual void setupVertexBuffer(U32 data_mask);
void setupVertexBufferFast(U32 data_mask);
void setupVertexArray();
void genBuffer(U32 size);
void genIndices(U32 size);
bool bindGLBuffer(bool force_bind = false);
bool bindGLBufferFast();
bool bindGLIndices(bool force_bind = false);
bool bindGLIndicesFast();
bool bindGLArray();
void releaseBuffer();
void releaseIndices();
@ -239,6 +243,8 @@ public:
// set for rendering
virtual void setBuffer(U32 data_mask); // calls setupVertexBuffer() if data_mask is not 0
void setBufferFast(U32 data_mask); // calls setupVertexBufferFast(), assumes data_mask is not 0 among other assumptions
void flush(); //flush pending data to GL memory
// allocate buffer
bool allocateBuffer(S32 nverts, S32 nindices, bool create);
@ -290,6 +296,9 @@ public:
void drawArrays(U32 mode, U32 offset, U32 count) const;
void drawRange(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
//implementation for inner loops that does no safety checking
void drawRangeFast(U32 mode, U32 start, U32 end, U32 count, U32 indices_offset) const;
//for debugging, validate data in given range is valid
void validateRange(U32 start, U32 end, U32 count, U32 offset) const;

View File

@ -404,6 +404,7 @@ void LLRenderPass::renderTexture(U32 type, U32 mask, BOOL batch_textures)
void LLRenderPass::pushBatches(U32 type, U32 mask, BOOL texture, BOOL batch_textures)
{
LL_PROFILE_ZONE_SCOPED;
for (LLCullResult::drawinfo_iterator i = gPipeline.beginRenderMap(type); i != gPipeline.endRenderMap(type); ++i)
{
LLDrawInfo* pparams = *i;
@ -452,6 +453,7 @@ void LLRenderPass::applyModelMatrix(const LLDrawInfo& params)
void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL batch_textures)
{
LL_PROFILE_ZONE_SCOPED;
if (!params.mCount)
{
return;
@ -469,7 +471,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba
{
if (params.mTextureList[i].notNull())
{
gGL.getTexUnit(i)->bind(params.mTextureList[i], TRUE);
gGL.getTexUnit(i)->bindFast(params.mTextureList[i]);
}
}
}
@ -477,8 +479,7 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba
{ //not batching textures or batch has only 1 texture -- might need a texture matrix
if (params.mTexture.notNull())
{
params.mTexture->addTextureStats(params.mVSize);
gGL.getTexUnit(0)->bind(params.mTexture, TRUE) ;
gGL.getTexUnit(0)->bindFast(params.mTexture);
if (params.mTextureMatrix)
{
tex_setup = true;
@ -495,19 +496,17 @@ void LLRenderPass::pushBatch(LLDrawInfo& params, U32 mask, BOOL texture, BOOL ba
}
}
if (params.mVertexBuffer.notNull())
{
if (params.mGroup)
{
params.mGroup->rebuildMesh();
}
{
if (params.mGroup)
{
params.mGroup->rebuildMesh();
}
LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
params.mVertexBuffer->setBuffer(mask);
params.mVertexBuffer->drawRange(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
gPipeline.addTrianglesDrawn(params.mCount, params.mDrawMode);
}
LLGLEnableFunc stencil_test(GL_STENCIL_TEST, params.mSelected, &LLGLCommonFunc::selected_stencil_test);
params.mVertexBuffer->setBufferFast(mask);
params.mVertexBuffer->drawRangeFast(params.mDrawMode, params.mStart, params.mEnd, params.mCount, params.mOffset);
}
if (tex_setup)
{

View File

@ -106,6 +106,7 @@ void LLDrawPoolMaterials::endDeferredPass(S32 pass)
void LLDrawPoolMaterials::renderDeferred(S32 pass)
{
LL_PROFILE_ZONE_SCOPED;
static const U32 type_list[] =
{
LLRenderPass::PASS_MATERIAL,

View File

@ -471,6 +471,7 @@ void LLDrawPoolSimple::endDeferredPass(S32 pass)
void LLDrawPoolSimple::renderDeferred(S32 pass)
{
LL_PROFILE_ZONE_SCOPED;
LLGLDisable blend(GL_BLEND);
LLGLDisable alpha_test(GL_ALPHA_TEST);

View File

@ -153,6 +153,7 @@ void LLDrawPoolTree::beginDeferredPass(S32 pass)
void LLDrawPoolTree::renderDeferred(S32 pass)
{
LL_PROFILE_ZONE_SCOPED;
render(pass);
}

View File

@ -41,6 +41,7 @@
#include "llviewercamera.h"
#include "llvector4a.h"
#include <queue>
#include <unordered_map>
#define SG_STATE_INHERIT_MASK (OCCLUDED)
#define SG_INITIAL_STATE_MASK (DIRTY | GEOM_DIRTY)
@ -216,10 +217,10 @@ public:
typedef std::vector<LLPointer<LLSpatialGroup> > sg_vector_t;
typedef std::vector<LLPointer<LLSpatialBridge> > bridge_list_t;
typedef std::vector<LLPointer<LLDrawInfo> > drawmap_elem_t;
typedef std::map<U32, drawmap_elem_t > draw_map_t;
typedef std::unordered_map<U32, drawmap_elem_t > draw_map_t;
typedef std::vector<LLPointer<LLVertexBuffer> > buffer_list_t;
typedef std::map<LLFace*, buffer_list_t> buffer_texture_map_t;
typedef std::map<U32, buffer_texture_map_t> buffer_map_t;
typedef std::unordered_map<LLFace*, buffer_list_t> buffer_texture_map_t;
typedef std::unordered_map<U32, buffer_texture_map_t> buffer_map_t;
struct CompareDistanceGreater
{

View File

@ -845,14 +845,14 @@ void LLViewerTexture::addTextureStats(F32 virtual_size, BOOL needs_gltexture) co
{
//flag to reset the values because the old values are used.
resetMaxVirtualSizeResetCounter();
mMaxVirtualSize = virtual_size;
mAdditionalDecodePriority = 0.f;
mMaxVirtualSize = virtual_size;
mAdditionalDecodePriority = 0.f;
mNeedsGLTexture = needs_gltexture;
}
else if (virtual_size > mMaxVirtualSize)
{
mMaxVirtualSize = virtual_size;
}
}
}
void LLViewerTexture::resetTextureStats()

View File

@ -358,7 +358,6 @@ bool LLPipeline::sRenderAttachedLights = true;
bool LLPipeline::sRenderAttachedParticles = true;
bool LLPipeline::sRenderDeferred = false;
S32 LLPipeline::sVisibleLightCount = 0;
F32 LLPipeline::sMinRenderSize = 0.f;
bool LLPipeline::sRenderingHUDs;
F32 LLPipeline::sDistortionWaterClipPlaneMargin = 1.0125f;
@ -2550,13 +2549,6 @@ void LLPipeline::markNotCulled(LLSpatialGroup* group, LLCamera& camera)
return;
}
const LLVector4a* bounds = group->getBounds();
if (sMinRenderSize > 0.f &&
llmax(llmax(bounds[1][0], bounds[1][1]), bounds[1][2]) < sMinRenderSize)
{
return;
}
assertInitialized();
if (!group->getSpatialPartition()->mRenderByGroup)
@ -3480,7 +3472,6 @@ void LLPipeline::stateSort(LLSpatialGroup* group, LLCamera& camera)
group->mLastUpdateDistance = group->mDistance;
}
}
}
void LLPipeline::stateSort(LLSpatialBridge* bridge, LLCamera& camera, BOOL fov_changed)
@ -3787,6 +3778,27 @@ void renderSoundHighlights(LLDrawable* drawablep)
}
}
void LLPipeline::touchTextures(LLDrawInfo* info)
{
LL_PROFILE_ZONE_SCOPED;
for (auto& tex : info->mTextureList)
{
if (tex.notNull())
{
LLImageGL* gl_tex = tex->getGLTexture();
if (gl_tex && gl_tex->updateBindStats(gl_tex->mTextureMemory))
{
tex->setActive();
}
}
}
if (info->mTexture.notNull())
{
info->mTexture->addTextureStats(info->mVSize);
}
}
void LLPipeline::postSort(LLCamera& camera)
{
LL_RECORD_BLOCK_TIME(FTM_STATESORT_POSTSORT);
@ -3839,20 +3851,14 @@ void LLPipeline::postSort(LLCamera& camera)
for (LLSpatialGroup::drawmap_elem_t::iterator k = src_vec.begin(); k != src_vec.end(); ++k)
{
if (sMinRenderSize > 0.f)
{
LLVector4a bounds;
bounds.setSub((*k)->mExtents[1],(*k)->mExtents[0]);
if (llmax(llmax(bounds[0], bounds[1]), bounds[2]) > sMinRenderSize)
{
sCull->pushDrawInfo(j->first, *k);
}
}
else
{
sCull->pushDrawInfo(j->first, *k);
}
LLDrawInfo* info = *k;
sCull->pushDrawInfo(j->first, info);
if (!sShadowRender && !sReflectionRender)
{
touchTextures(info);
addTrianglesDrawn(info->mCount, info->mDrawMode);
}
}
}

View File

@ -265,6 +265,8 @@ public:
void stateSort(LLSpatialBridge* bridge, LLCamera& camera, BOOL fov_changed = FALSE);
void stateSort(LLDrawable* drawablep, LLCamera& camera);
void postSort(LLCamera& camera);
//update stats for textures in given DrawInfo
void touchTextures(LLDrawInfo* info);
void forAllVisibleDrawables(void (*func)(LLDrawable*));
void renderObjects(U32 type, U32 mask, bool texture = true, bool batch_texture = false);
@ -596,7 +598,6 @@ public:
static bool sRenderAttachedParticles;
static bool sRenderDeferred;
static S32 sVisibleLightCount;
static F32 sMinRenderSize;
static bool sRenderingHUDs;
static F32 sDistortionWaterClipPlaneMargin;