viewer#2411 LLFontGL::render optimizations #3

master
Andrey Kleshchev 2024-08-30 18:51:48 +03:00 committed by Andrey Kleshchev
parent a638d9610d
commit 5c64e5e13d
9 changed files with 244 additions and 248 deletions

View File

@ -42,7 +42,6 @@
#include "lltexture.h"
#include "lldir.h"
#include "llstring.h"
#include "llvertexbuffer.h"
// Third party library includes
#include <boost/tokenizer.hpp>
@ -145,8 +144,7 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, const LLRectf& rec
S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, const LLColor4 &color, HAlign halign, VAlign valign, U8 style,
ShadowType shadow, S32 max_chars, S32 max_pixels, F32* right_x, bool use_ellipses, bool use_color,
std::list<LLVertexBufferData> *buffer_list) const
ShadowType shadow, S32 max_chars, S32 max_pixels, F32* right_x, bool use_ellipses, bool use_color) const
{
LL_PROFILE_ZONE_SCOPED_CATEGORY_UI;
@ -160,7 +158,6 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
return 0;
}
gGL.flush(); // deliberately empty pending verts
gGL.getTexUnit(0)->enable(LLTexUnit::TT_TEXTURE);
S32 scaled_max_pixels = max_pixels == S32_MAX ? S32_MAX : llceil((F32)max_pixels * sScaleX);
@ -286,9 +283,6 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
std::pair<EFontGlyphType, S32> bitmap_entry = std::make_pair(EFontGlyphType::Grayscale, -1);
S32 glyph_count = 0;
S32 buffer_count = 0;
LLVertexBuffer* vb;
LLImageGL* font_image = nullptr;
for (i = begin_offset; i < begin_offset + length; i++)
{
llwchar wch = wstr[i];
@ -312,35 +306,16 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
// otherwise the queued glyphs will be taken from wrong textures.
if (glyph_count > 0)
{
if (buffer_list)
gGL.begin(LLRender::QUADS);
{
vb = gGL.beginNoCache(LLRender::QUADS, buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, font_image, LLRender::QUADS, buffer_count);
}
gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4);
vb = gGL.getBuffer(buffer_count); // instead of endNoCache to draw now
if (vb)
{
buffer_list->emplace_back(vb, font_image, LLRender::QUADS, buffer_count);
}
}
else
{
gGL.begin(LLRender::QUADS);
{
gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4);
}
gGL.end();
}
gGL.end();
glyph_count = 0;
}
bitmap_entry = next_bitmap_entry;
font_image = font_bitmap_cache->getImageGL(bitmap_entry.first, bitmap_entry.second);
LLImageGL* font_image = font_bitmap_cache->getImageGL(bitmap_entry.first, bitmap_entry.second);
gGL.getTexUnit(0)->bind(font_image);
}
@ -364,28 +339,11 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
if (glyph_count >= GLYPH_BATCH_SIZE)
{
if (buffer_list)
gGL.begin(LLRender::QUADS);
{
vb = gGL.beginNoCache(LLRender::QUADS, buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, font_image, LLRender::QUADS, buffer_count);
}
gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4);
vb = gGL.endNoCache(buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, font_image, LLRender::QUADS, buffer_count);
}
}
else
{
gGL.begin(LLRender::QUADS);
{
gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4);
}
gGL.end();
}
gGL.end();
glyph_count = 0;
}
@ -418,29 +376,11 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
cur_render_x = cur_x;
cur_render_y = cur_y;
}
if (buffer_list)
gGL.begin(LLRender::QUADS);
{
vb = gGL.beginNoCache(LLRender::QUADS, buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, font_image, LLRender::QUADS, buffer_count);
}
gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4);
vb = gGL.endNoCache(buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, font_image, LLRender::QUADS, buffer_count);
}
}
else
{
gGL.begin(LLRender::QUADS);
{
gGL.vertexBatchPreTransformed(vertices, uvs, colors, glyph_count * 4);
}
gGL.end();
}
gGL.end();
if (right_x)
@ -454,45 +394,14 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
F32 descender = (F32)llfloor(mFontFreetype->getDescenderHeight());
gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
if (buffer_list)
{
vb = gGL.beginNoCache(LLRender::LINES, buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, nullptr, LLRender::QUADS, buffer_count);
}
gGL.vertex2f(start_x, cur_y - descender);
gGL.vertex2f(cur_x, cur_y - descender);
vb = gGL.getBuffer(buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, nullptr, LLRender::LINES, buffer_count);
}
}
else
{
gGL.begin(LLRender::LINES);
gGL.vertex2f(start_x, cur_y - descender);
gGL.vertex2f(cur_x, cur_y - descender);
gGL.end();
}
}
else if (buffer_list)
{
vb = gGL.getBuffer(buffer_count);
if (vb)
{
buffer_list->emplace_back(vb, font_image, gGL.getMode(), buffer_count);
}
gGL.begin(LLRender::LINES);
gGL.vertex2f(start_x, cur_y - descender);
gGL.vertex2f(cur_x, cur_y - descender);
gGL.end();
}
if (draw_ellipses)
{
// signal a separate context
buffer_list->emplace_back(nullptr, nullptr, 0, 0);
// recursively render ellipses at end of string
// we've already reserved enough room
gGL.pushUIMatrix();
@ -507,8 +416,7 @@ S32 LLFontGL::render(const LLWString &wstr, S32 begin_offset, F32 x, F32 y, cons
S32_MAX, max_pixels,
right_x,
false,
use_color,
buffer_list);
use_color);
gGL.popUIMatrix();
}

View File

@ -33,7 +33,6 @@
#include "llimagegl.h"
#include "llpointer.h"
#include "llrect.h"
#include "llvertexbuffer.h"
#include "v2math.h"
class LLColor4;
@ -43,7 +42,6 @@ class LLFontFreetype;
// Structure used to store previously requested fonts.
class LLFontRegistry;
class LLVertexBuffer;
class LLFontGL
{
@ -81,16 +79,6 @@ public:
DROP_SHADOW_SOFT
};
struct LLVertexBufferData
{
LLVertexBufferData() : mBuffer(nullptr), mImage(nullptr), mMode(0), mCount(0) {}
LLVertexBufferData(LLVertexBuffer* buffer, LLImageGL* image, U8 mode, U32 count) : mBuffer(buffer), mImage(image), mMode(mode), mCount(count) {}
LLPointer<LLVertexBuffer> mBuffer;
LLPointer <LLImageGL> mImage; // might be a better idea to store
U8 mMode;
U32 mCount;
};
LLFontGL();
~LLFontGL();
@ -131,8 +119,7 @@ public:
S32 max_chars = S32_MAX, S32 max_pixels = S32_MAX,
F32* right_x=NULL,
bool use_ellipses = false,
bool use_color = true,
std::list<LLVertexBufferData>* buffer_list = nullptr) const;
bool use_color = true) const;
S32 render(const LLWString &text, S32 begin_offset, F32 x, F32 y, const LLColor4 &color) const;

View File

@ -60,12 +60,17 @@ S32 LLFontVertexBuffer::render(
bool use_ellipses,
bool use_color )
{
if (!LLFontGL::sDisplayFont) //do not display texts
{
return static_cast<S32>(text.length());
}
if (mBufferList.empty())
{
genBuffers(fontp, text, begin_offset, x, y, color, halign, valign,
style, shadow, max_chars, max_pixels, right_x, use_ellipses, use_color);
}
else if (mLastX != x || mLastY != y
else if (mLastX != x
|| mLastY != y
|| mLastFont != fontp
|| mLastColor != color // alphas change often
|| mLastHalign != halign
@ -74,7 +79,10 @@ S32 LLFontVertexBuffer::render(
|| mLastMaxChars != max_chars
|| mLastMaxPixels != max_pixels
|| mLastStyle != style
|| mLastShadow != shadow) // ex: buttons change shadow state
|| mLastShadow != shadow // ex: buttons change shadow state
|| mLastScaleX != LLFontGL::sScaleX
|| mLastScaleY != LLFontGL::sScaleY
|| mLastOrigin != LLFontGL::sCurOrigin)
{
genBuffers(fontp, text, begin_offset, x, y, color, halign, valign,
style, shadow, max_chars, max_pixels, right_x, use_ellipses, use_color);
@ -110,8 +118,11 @@ void LLFontVertexBuffer::genBuffers(
bool use_color)
{
mBufferList.clear();
gGL.beginList(&mBufferList);
mChars = fontp->render(text, begin_offset, x, y, color, halign, valign,
style, shadow, max_chars, max_pixels, right_x, use_ellipses, use_color, &mBufferList);
style, shadow, max_chars, max_pixels, right_x, use_ellipses, use_color);
gGL.endList();
mLastFont = fontp;
mLastOffset = begin_offset;
@ -126,67 +137,25 @@ void LLFontVertexBuffer::genBuffers(
mLastStyle = style;
mLastShadow = shadow;
mLastScaleX = LLFontGL::sScaleX;
mLastScaleY = LLFontGL::sScaleY;
mLastOrigin = LLFontGL::sCurOrigin;
if (right_x)
{
mLastRightX = *right_x;
}
}
void render_buffers(LLFontVertexBuffer::buffer_list_t::iterator iter, LLFontVertexBuffer::buffer_list_t::iterator end)
{
gGL.getTexUnit(0)->enable(LLTexUnit::TT_TEXTURE);
gGL.pushUIMatrix();
gGL.loadUIIdentity();
// Depth translation, so that floating text appears 'in-world'
// and is correctly occluded.
gGL.translatef(0.f, 0.f, LLFontGL::sCurDepth);
gGL.setSceneBlendType(LLRender::BT_ALPHA);
while (iter != end)
{
if (iter->mBuffer == nullptr)
{
// elipses indicator
iter++;
break;
}
if (iter->mImage)
{
gGL.getTexUnit(0)->bind(iter->mImage);
}
else
{
gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
}
iter->mBuffer->setBuffer();
if (LLRender::sGLCoreProfile && iter->mMode == LLRender::QUADS)
{
iter->mBuffer->drawArrays(LLRender::TRIANGLES, 0, iter->mCount);
}
else
{
iter->mBuffer->drawArrays(iter->mMode, 0, iter->mCount);
}
iter++;
}
if (iter != end)
{
gGL.pushUIMatrix();
render_buffers(iter, end);
gGL.popUIMatrix();
}
gGL.popUIMatrix();
}
void LLFontVertexBuffer::renderBuffers()
{
gGL.flush(); // deliberately empty pending verts
render_buffers(mBufferList.begin(), mBufferList.end());
gGL.getTexUnit(0)->enable(LLTexUnit::TT_TEXTURE);
gGL.pushUIMatrix();
for (LLVertexBufferData& buffer : mBufferList)
{
buffer.draw();
}
gGL.popUIMatrix();
}

View File

@ -30,7 +30,7 @@
#include "llfontgl.h"
class LLVertexBuffer;
class LLVertexBufferData;
class LLFontVertexBuffer
{
@ -52,8 +52,6 @@ public:
F32* right_x = NULL,
bool use_ellipses = false,
bool use_color = true);
typedef std::list<LLFontGL::LLVertexBufferData> buffer_list_t;
private:
void genBuffers(const LLFontGL* fontp,
@ -71,7 +69,7 @@ private:
void renderBuffers();
buffer_list_t mBufferList;
std::list<LLVertexBufferData> mBufferList;
S32 mChars = 0;
const LLFontGL *mLastFont = nullptr;
S32 mLastOffset = 0;
@ -86,6 +84,12 @@ private:
U8 mLastStyle = LLFontGL::NORMAL;
LLFontGL::ShadowType mLastShadow = LLFontGL::NO_SHADOW;
F32 mLastRightX = 0.f;
// LLFontGL's statics
F32 mLastScaleX = 1.f;
F32 mLastScaleY = 1.f;
LLCoordGL mLastOrigin;
bool mTrackStringChanges = true;
static std::hash<LLWString> sStringHasher;

View File

@ -77,6 +77,7 @@ struct LLVBCache
};
static std::unordered_map<U64, LLVBCache> sVBCache;
static thread_local std::list<LLVertexBufferData> *sBufferDataList = nullptr;
static const GLenum sGLTextureType[] =
{
@ -1528,6 +1529,25 @@ void LLRender::clearErrors()
}
}
void LLRender::beginList(std::list<LLVertexBufferData> *list)
{
flush();
sBufferDataList = list;
}
void LLRender::endList()
{
if (sBufferDataList)
{
flush();
sBufferDataList = nullptr;
}
else
{
llassert(false); // something failed to provide a list or closed it twice
}
}
void LLRender::begin(const GLuint& mode)
{
if (mode != mMode)
@ -1663,78 +1683,26 @@ void LLRender::flush()
if (mBuffer)
{
HBXXH64 hash;
LLVertexBuffer *vb;
U32 attribute_mask = LLGLSLShader::sCurBoundShaderPtr->mAttributeMask;
if (sBufferDataList)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache hash");
hash.update((U8*)mVerticesp.get(), count * sizeof(LLVector4a));
if (attribute_mask & LLVertexBuffer::MAP_TEXCOORD0)
{
hash.update((U8*)mTexcoordsp.get(), count * sizeof(LLVector2));
}
if (attribute_mask & LLVertexBuffer::MAP_COLOR)
{
hash.update((U8*)mColorsp.get(), count * sizeof(LLColor4U));
}
hash.finalize();
}
U64 vhash = hash.digest();
// check the VB cache before making a new vertex buffer
// This is a giant hack to deal with (mostly) our terrible UI rendering code
// that was built on top of OpenGL immediate mode. Huge performance wins
// can be had by not uploading geometry to VRAM unless absolutely necessary.
// Most of our usage of the "immediate mode" style draw calls is actually
// sending the same geometry over and over again.
// To leverage this, we maintain a running hash of the vertex stream being
// built up before a flush, and then check that hash against a VB
// cache just before creating a vertex buffer in VRAM
std::unordered_map<U64, LLVBCache>::iterator cache = sVBCache.find(vhash);
LLPointer<LLVertexBuffer> vb;
if (cache != sVBCache.end())
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache hit");
// cache hit, just use the cached buffer
vb = cache->second.vb;
cache->second.touched = std::chrono::steady_clock::now();
vb = genBuffer(attribute_mask, count);
sBufferDataList->emplace_back(
vb,
mMode,
count,
gGL.getTexUnit(0)->mCurrTexture,
mMatrix[MM_MODELVIEW][mMatIdx[MM_MODELVIEW]],
mMatrix[MM_PROJECTION][mMatIdx[MM_PROJECTION]],
mMatrix[MM_TEXTURE0][mMatIdx[MM_TEXTURE0]]
);
}
else
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache miss");
vb = genBuffer(attribute_mask, count);
sVBCache[vhash] = { vb , std::chrono::steady_clock::now() };
static U32 miss_count = 0;
miss_count++;
if (miss_count > 1024)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache clean");
miss_count = 0;
auto now = std::chrono::steady_clock::now();
using namespace std::chrono_literals;
// every 1024 misses, clean the cache of any VBs that haven't been touched in the last second
for (std::unordered_map<U64, LLVBCache>::iterator iter = sVBCache.begin(); iter != sVBCache.end(); )
{
if (now - iter->second.touched > 1s)
{
iter = sVBCache.erase(iter);
}
else
{
++iter;
}
}
}
vb = bufferfromCache(attribute_mask, count);
}
drawBuffer(vb, mMode, count);
@ -1749,6 +1717,81 @@ void LLRender::flush()
}
}
LLVertexBuffer* LLRender::bufferfromCache(U32 attribute_mask, U32 count)
{
LLVertexBuffer *vb = nullptr;
HBXXH64 hash;
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache hash");
hash.update((U8*)mVerticesp.get(), count * sizeof(LLVector4a));
if (attribute_mask & LLVertexBuffer::MAP_TEXCOORD0)
{
hash.update((U8*)mTexcoordsp.get(), count * sizeof(LLVector2));
}
if (attribute_mask & LLVertexBuffer::MAP_COLOR)
{
hash.update((U8*)mColorsp.get(), count * sizeof(LLColor4U));
}
hash.finalize();
}
U64 vhash = hash.digest();
// check the VB cache before making a new vertex buffer
// This is a giant hack to deal with (mostly) our terrible UI rendering code
// that was built on top of OpenGL immediate mode. Huge performance wins
// can be had by not uploading geometry to VRAM unless absolutely necessary.
// Most of our usage of the "immediate mode" style draw calls is actually
// sending the same geometry over and over again.
// To leverage this, we maintain a running hash of the vertex stream being
// built up before a flush, and then check that hash against a VB
// cache just before creating a vertex buffer in VRAM
std::unordered_map<U64, LLVBCache>::iterator cache = sVBCache.find(vhash);
if (cache != sVBCache.end())
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache hit");
// cache hit, just use the cached buffer
vb = cache->second.vb;
cache->second.touched = std::chrono::steady_clock::now();
}
else
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache miss");
vb = genBuffer(attribute_mask, count);
sVBCache[vhash] = { vb , std::chrono::steady_clock::now() };
static U32 miss_count = 0;
miss_count++;
if (miss_count > 1024)
{
LL_PROFILE_ZONE_NAMED_CATEGORY_VERTEX("vb cache clean");
miss_count = 0;
auto now = std::chrono::steady_clock::now();
using namespace std::chrono_literals;
// every 1024 misses, clean the cache of any VBs that haven't been touched in the last second
for (std::unordered_map<U64, LLVBCache>::iterator iter = sVBCache.begin(); iter != sVBCache.end(); )
{
if (now - iter->second.touched > 1s)
{
iter = sVBCache.erase(iter);
}
else
{
++iter;
}
}
}
}
return vb;
}
LLVertexBuffer* LLRender::genBuffer(U32 attribute_mask, S32 count)
{
LLVertexBuffer * vb = new LLVertexBuffer(attribute_mask);

View File

@ -50,7 +50,8 @@ class LLVertexBuffer;
class LLCubeMap;
class LLImageGL;
class LLRenderTarget;
class LLTexture ;
class LLTexture;
class LLVertexBufferData;
#define LL_MATRIX_STACK_DEPTH 32
@ -415,6 +416,10 @@ public:
void flush();
// if list is set, will store buffers in list for later use, if list isn't set, will use cache
void beginList(std::list<LLVertexBufferData> *list);
void endList();
void begin(const GLuint& mode);
void end();
@ -491,6 +496,7 @@ public:
private:
friend class LLLightState;
LLVertexBuffer* bufferfromCache(U32 attribute_mask, U32 count);
LLVertexBuffer* genBuffer(U32 attribute_mask, S32 count);
void drawBuffer(LLVertexBuffer* vb, U32 mode, S32 count);
void resetStriders(S32 count);
@ -525,7 +531,6 @@ private:
std::vector<LLVector3> mUIOffset;
std::vector<LLVector3> mUIScale;
};
extern F32 gGLModelView[16];

View File

@ -570,6 +570,53 @@ public:
static LLVBOPool* sVBOPool = nullptr;
void LLVertexBufferData::draw()
{
if (!mVB)
{
// signal for pushUIMatrix
return; // todo: find a better way?
}
if (mTexName)
{
gGL.getTexUnit(0)->bindManual(LLTexUnit::TT_TEXTURE, mTexName);
}
else
{
gGL.getTexUnit(0)->unbind(LLTexUnit::TT_TEXTURE);
}
gGL.matrixMode(LLRender::MM_MODELVIEW);
gGL.pushMatrix();
gGL.loadMatrix(mModelView.m);
gGL.matrixMode(LLRender::MM_PROJECTION);
gGL.pushMatrix();
gGL.loadMatrix(mProjection.m);
gGL.matrixMode(LLRender::MM_TEXTURE0);
gGL.pushMatrix();
gGL.loadMatrix(mTexture0.m);
mVB->setBuffer();
if (mMode == LLRender::QUADS && LLRender::sGLCoreProfile)
{
mVB->drawArrays(LLRender::TRIANGLES, 0, mCount);
}
else
{
mVB->drawArrays(mMode, 0, mCount);
}
gGL.popMatrix();
gGL.matrixMode(LLRender::MM_PROJECTION);
gGL.popMatrix();
gGL.matrixMode(LLRender::MM_MODELVIEW);
gGL.popMatrix();
}
//============================================================================
//static
U64 LLVertexBuffer::getBytesAllocated()
{

View File

@ -53,6 +53,37 @@
//============================================================================
// base class
class LLPrivateMemoryPool;
class LLVertexBuffer;
class LLVertexBufferData
{
public:
LLVertexBufferData()
: mVB(nullptr)
, mMode(0)
, mCount(0)
, mTexName(0)
{}
LLVertexBufferData(LLVertexBuffer* buffer, U8 mode, U32 count, U32 tex_name, glh::matrix4f model_view, glh::matrix4f projection, glh::matrix4f texture0)
: mVB(buffer)
, mMode(mode)
, mCount(count)
, mTexName(tex_name)
, mProjection(model_view)
, mModelView(projection)
, mTexture0(texture0)
{}
void draw();
LLPointer<LLVertexBuffer> mVB;
U8 mMode;
U32 mCount;
U32 mTexName;
glh::matrix4f mProjection;
glh::matrix4f mModelView;
glh::matrix4f mTexture0;
};
typedef std::list<LLVertexBufferData> buffer_data_list_t;
class LLVertexBuffer final : public LLRefCount
{
public:

View File

@ -59,7 +59,9 @@ protected:
: mColor(color),
mStyle(style),
mText(text),
mFont(font)
mFont(font),
mFontBufferLabel(false),
mFontBufferText(false)
{}
F32 getWidth(const LLFontGL* font);
const LLWString& getText() const { return mText; }