Profile guided optimizations

master
RunitaiLinden 2024-09-10 18:27:45 -05:00
parent 33116ea35e
commit cde5d29faf
9 changed files with 183 additions and 114 deletions

View File

@ -41,6 +41,13 @@ public:
LLStrider(Object* first) { mObjectp = first; mSkip = sizeof(Object); }
~LLStrider() { }
const LLStrider<Object>& operator=(const LLStrider<Object>& rhs)
{
mBytep = rhs.mBytep;
mSkip = rhs.mSkip;
return *this;
}
const LLStrider<Object>& operator = (Object *first) { mObjectp = first; return *this;}
void setStride (S32 skipBytes) { mSkip = (skipBytes ? skipBytes : sizeof(Object));}

View File

@ -1045,15 +1045,47 @@ void sub_image_lines(U32 target, S32 miplevel, S32 x_offset, S32 y_offset, S32 w
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
LL_PROFILE_ZONE_NUM(width);
LL_PROFILE_ZONE_NUM(height);
U32 components = LLImageGL::dataFormatComponents(pixformat);
U32 type_width = type_width_from_pixtype(pixtype);
const U32 line_width = data_width * components * type_width;
const U32 y_offset_end = y_offset + height;
for (U32 y_pos = y_offset; y_pos < y_offset_end; ++y_pos)
if (width == data_width && height % 32 == 0)
{
glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src);
src += line_width;
LL_PROFILE_ZONE_NAMED_CATEGORY_TEXTURE("subimage - batched lines");
// full width, batch multiple lines at a time
// set batch size based on width
U32 batch_size = 32;
if (width > 1024)
{
batch_size = 8;
}
else if (width > 512)
{
batch_size = 16;
}
// full width texture, do 32 lines at a time
for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += batch_size)
{
glTexSubImage2D(target, miplevel, x_offset, y_pos, width, batch_size, pixformat, pixtype, src);
src += line_width * batch_size;
}
}
else
{
// partial width or strange height
for (U32 y_pos = y_offset; y_pos < y_offset_end; y_pos += 1)
{
glTexSubImage2D(target, miplevel, x_offset, y_pos, width, 1, pixformat, pixtype, src);
src += line_width;
}
}
}
@ -2139,6 +2171,8 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
return ;
}
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
U32 length = w * h;
U32 alphatotal = 0;
@ -2150,15 +2184,15 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
// this will mid-skew the data (and thus increase the chances of not
// being used as a mask) from high-frequency alpha maps which
// suffer the worst from aliasing when used as alpha masks.
if (w >= 2 && h >= 2)
if (w >= 4 && h >= 4)
{
llassert(w%2 == 0);
llassert(h%2 == 0);
llassert(w%4 == 0);
llassert(h%4 == 0);
const GLubyte* rowstart = ((const GLubyte*) data_in) + mAlphaOffset;
for (U32 y = 0; y < h; y+=2)
for (U32 y = 0; y < h; y+=4)
{
const GLubyte* current = rowstart;
for (U32 x = 0; x < w; x+=2)
for (U32 x = 0; x < w; x+=4)
{
const U32 s1 = current[0];
alphatotal += s1;
@ -2182,7 +2216,7 @@ void LLImageGL::analyzeAlpha(const void* data_in, U32 w, U32 h)
}
rowstart += 2 * w * mAlphaStride;
rowstart += 4 * w * mAlphaStride;
}
length *= 2; // we sampled everything twice, essentially
}

View File

@ -954,6 +954,25 @@ LLVertexBuffer::LLVertexBuffer(U32 typemask)
}
}
// list of mapped buffers
// NOTE: must not be LLPointer<LLVertexBuffer> to avoid breaking non-ref-counted LLVertexBuffer instances
static std::vector<LLVertexBuffer*> sMappedBuffers;
//static
void LLVertexBuffer::flushBuffers()
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
// must only be called from main thread
llassert(LLCoros::on_main_thread_main_coro());
for (auto& buffer : sMappedBuffers)
{
buffer->_unmapBuffer();
buffer->mMapped = false;
}
sMappedBuffers.resize(0);
}
//static
U32 LLVertexBuffer::calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices)
{
@ -997,6 +1016,12 @@ U32 LLVertexBuffer::calcVertexSize(const U32& typemask)
//virtual
LLVertexBuffer::~LLVertexBuffer()
{
if (mMapped)
{ // is on the mapped buffer list but doesn't need to be flushed
mMapped = false;
unmapBuffer();
}
destroyGLBuffer();
destroyGLIndices();
@ -1198,6 +1223,7 @@ bool expand_region(LLVertexBuffer::MappedRegion& region, U32 start, U32 end)
U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 index, S32 count)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
_mapBuffer();
if (count == -1)
{
@ -1233,6 +1259,7 @@ U8* LLVertexBuffer::mapVertexBuffer(LLVertexBuffer::AttributeType type, U32 inde
U8* LLVertexBuffer::mapIndexBuffer(U32 index, S32 count)
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_VERTEX;
_mapBuffer();
if (count == -1)
{
@ -1289,11 +1316,11 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
LL_PROFILE_ZONE_NUM(end);
LL_PROFILE_ZONE_NUM(end-start);
constexpr U32 block_size = 8192;
constexpr U32 block_size = 65536;
for (U32 i = start; i <= end; i += block_size)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
//LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("glBufferSubData block");
//LL_PROFILE_GPU_ZONE("glBufferSubData");
U32 tend = llmin(i + block_size, end);
U32 size = tend - i + 1;
@ -1304,8 +1331,29 @@ void LLVertexBuffer::flush_vbo(GLenum target, U32 start, U32 end, void* data, U8
}
void LLVertexBuffer::unmapBuffer()
{
flushBuffers();
}
void LLVertexBuffer::_mapBuffer()
{
// must only be called from main thread
llassert(LLCoros::on_main_thread_main_coro());
if (!mMapped)
{
mMapped = true;
sMappedBuffers.push_back(this);
}
}
void LLVertexBuffer::_unmapBuffer()
{
STOP_GLERROR;
if (!mMapped)
{
return;
}
struct SortMappedRegion
{
bool operator()(const MappedRegion& lhs, const MappedRegion& rhs)
@ -1549,6 +1597,13 @@ void LLVertexBuffer::setBuffer()
return;
}
#endif
if (mMapped)
{
LL_WARNS() << "Missing call to unmapBuffer or flushBuffers" << LL_ENDL;
_unmapBuffer();
}
// no data may be pending
llassert(mMappedVertexRegions.empty());
llassert(mMappedIndexRegions.empty());

View File

@ -120,6 +120,9 @@ public:
// indexed by the following enum
static U32 calcOffsets(const U32& typemask, U32* offsets, U32 num_vertices);
// flush any pending mapped buffers
static void flushBuffers();
//WARNING -- when updating these enums you MUST
// 1 - update LLVertexBuffer::sTypeSize
// 2 - update LLVertexBuffer::vb_type_name
@ -190,6 +193,8 @@ public:
// map for data access (see also getFooStrider below)
U8* mapVertexBuffer(AttributeType type, U32 index, S32 count = -1);
U8* mapIndexBuffer(U32 index, S32 count = -1);
// synonym for flushBuffers
void unmapBuffer();
// set for rendering
@ -312,6 +317,13 @@ private:
bool allocateBuffer(S32 nverts, S32 nindices, bool create) { return allocateBuffer(nverts, nindices); }
// actually unmap buffer
void _unmapBuffer();
// add to set of mapped buffers
void _mapBuffer();
bool mMapped = false;
public:
static U64 getBytesAllocated();

View File

@ -1741,7 +1741,7 @@ bool LLFace::getGeometryVolume(const LLVolume& volume,
{ //bump mapped or has material, just do the whole expensive loop
LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("getGeometryVolume - texgen default");
std::vector<LLVector2> bump_tc;
LLStrider<LLVector2> bump_tc;
if (mat && !mat->getNormalID().isNull())
{ //writing out normal and specular texture coordinates, not bump offsets
@ -1803,49 +1803,70 @@ bool LLFace::getGeometryVolume(const LLVolume& volume,
}
const bool do_xform = (xforms & xform_channel) != XFORM_NONE;
// hold onto strider to front of TC array for use later
bump_tc = dst;
for (S32 i = 0; i < num_vertices; i++)
{
LLVector2 tc(vf.mTexCoords[i]);
LLVector4a& norm = vf.mNormals[i];
LLVector4a& center = *(vf.mCenter);
if (texgen != LLTextureEntry::TEX_GEN_DEFAULT)
// NOTE: split TEX_GEN_PLANAR implementation to reduce branchiness of inner loop
// These are per-vertex operations and every little bit counts
if (texgen == LLTextureEntry::TEX_GEN_PLANAR)
{
LLVector4a vec = vf.mPositions[i];
vec.mul(scalea);
if (texgen == LLTextureEntry::TEX_GEN_PLANAR)
LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - planar");
for (S32 i = 0; i < num_vertices; i++)
{
LLVector2 tc(vf.mTexCoords[i]);
LLVector4a& norm = vf.mNormals[i];
LLVector4a& center = *(vf.mCenter);
LLVector4a vec = vf.mPositions[i];
vec.mul(scalea);
planarProjection(tc, norm, center, vec);
if (tex_mode && mTextureMatrix)
{
LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
tmp = tmp * *mTextureMatrix;
tc.mV[0] = tmp.mV[0];
tc.mV[1] = tmp.mV[1];
}
else if (do_xform)
{
xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
}
*dst++ = tc;
}
}
else
{
LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - transform");
if (tex_mode && mTextureMatrix)
{
LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
tmp = tmp * *mTextureMatrix;
tc.mV[0] = tmp.mV[0];
tc.mV[1] = tmp.mV[1];
}
else if (do_xform)
{
xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
}
for (S32 i = 0; i < num_vertices; i++)
{
LLVector2 tc(vf.mTexCoords[i]);
*dst++ = tc;
if (do_bump)
{
bump_tc.push_back(tc);
if (tex_mode && mTextureMatrix)
{
LLVector3 tmp(tc.mV[0], tc.mV[1], 0.f);
tmp = tmp * *mTextureMatrix;
tc.mV[0] = tmp.mV[0];
tc.mV[1] = tmp.mV[1];
}
else if (do_xform)
{
xform(tc, cos_ang, sin_ang, os, ot, ms, mt);
}
*dst++ = tc;
}
}
}
}
if ((!mat && !gltf_mat) && do_bump)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_FACE("tgd - do bump");
mVertexBuffer->getTexCoord1Strider(tex_coords1, mGeomIndex, mGeomCount);
mVObjp->getVolume()->genTangents(face_index);

View File

@ -315,11 +315,9 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
}
}
static LLTrace::BlockTimerStatHandle FTM_FACE_RIGGING_INFO("Face Rigging Info");
void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face)
{
LL_RECORD_BLOCK_TIME(FTM_FACE_RIGGING_INFO);
LL_PROFILE_ZONE_SCOPED_CATEGORY_AVATAR;
if (vol_face.mJointRiggingInfoTab.needsUpdate())
{

View File

@ -1361,51 +1361,6 @@ void LLViewerFetchedTexture::addToCreateTexture()
}
else
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_TEXTURE;
#if 1
//
//if mRequestedDiscardLevel > mDesiredDiscardLevel, we assume the required image res keep going up,
//so do not scale down the over qualified image.
//Note: scaling down image is expensensive. Do it only when very necessary.
//
if(mRequestedDiscardLevel <= mDesiredDiscardLevel && !mForceToSaveRawImage)
{
U32 w = mFullWidth >> mRawDiscardLevel;
U32 h = mFullHeight >> mRawDiscardLevel;
//if big image, do not load extra data
//scale it down to size >= LLViewerTexture::sMinLargeImageSize
if(w * h > LLViewerTexture::sMinLargeImageSize)
{
S32 d_level = llmin(mRequestedDiscardLevel, (S32)mDesiredDiscardLevel) - mRawDiscardLevel;
if(d_level > 0)
{
S32 i = 0;
while((d_level > 0) && ((w >> i) * (h >> i) > LLViewerTexture::sMinLargeImageSize))
{
i++;
d_level--;
}
if(i > 0)
{
mRawDiscardLevel += i;
if(mRawDiscardLevel >= getDiscardLevel() && getDiscardLevel() > 0)
{
mNeedsCreateTexture = false;
destroyRawImage();
return;
}
{
//make a duplicate in case somebody else is using this raw image
mRawImage = mRawImage->scaled(w >> i, h >> i);
}
}
}
}
}
#endif
scheduleCreateTexture();
}
return;

View File

@ -10684,14 +10684,18 @@ void LLVOAvatar::updateRiggingInfo()
std::map<LLUUID, S32> curr_rigging_info_key;
// Get current rigging info key
for (LLVOVolume* vol : volumes)
{
if (vol->isMesh() && vol->getVolume())
LL_PROFILE_ZONE_NAMED_CATEGORY_AVATAR("update rig info - get key")
// Get current rigging info key
for (LLVOVolume* vol : volumes)
{
const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID();
S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD);
curr_rigging_info_key[mesh_id] = max_lod;
if (vol->isMesh() && vol->getVolume())
{
const LLUUID& mesh_id = vol->getVolume()->getParams().getSculptID();
S32 max_lod = llmax(vol->getLOD(), vol->mLastRiggingInfoLOD);
curr_rigging_info_key[mesh_id] = max_lod;
}
}
}

View File

@ -6028,8 +6028,8 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
group->mBuilt = 1.f;
const U32 MAX_BUFFER_COUNT = 4096;
LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT];
static std::vector<LLVertexBuffer*> locked_buffer;
locked_buffer.resize(0);
U32 buffer_count = 0;
@ -6074,8 +6074,6 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
group->dirtyGeom();
gPipeline.markRebuild(group);
}
buff->unmapBuffer();
}
}
}
@ -6091,17 +6089,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
{
LL_PROFILE_ZONE_NAMED("rebuildMesh - flush");
for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter)
{
(*iter)->unmapBuffer();
}
// don't forget alpha
if(group != NULL &&
!group->mVertexBuffer.isNull())
{
group->mVertexBuffer->unmapBuffer();
}
LLVertexBuffer::flushBuffers();
}
group->clearState(LLSpatialGroup::MESH_DIRTY | LLSpatialGroup::NEW_DRAWINFO);
@ -6783,11 +6771,6 @@ U32 LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace
++face_iter;
}
if (buffer)
{
buffer->unmapBuffer();
}
}
group->mBufferMap[mask].clear();