MAINT-2371 Lat round of optimizations.

Reviewed by Graham
master
Dave Parks 2013-03-08 17:11:30 -06:00
parent a65c275865
commit f8e059deee
7 changed files with 729 additions and 1152 deletions

View File

@ -107,15 +107,14 @@ public:
inline void rotate(const LLVector4a& v, LLVector4a& res)
{
res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
res.mul(mMatrix[0]);
LLVector4a y;
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
y.mul(mMatrix[1]);
LLVector4a y,z;
LLVector4a z;
res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
res.mul(mMatrix[0]);
y.mul(mMatrix[1]);
z.mul(mMatrix[2]);
res.add(y);

View File

@ -460,16 +460,13 @@ inline void LLVector4a::setMax(const LLVector4a& lhs, const LLVector4a& rhs)
mQ = _mm_max_ps(lhs.mQ, rhs.mQ);
}
// Set this to (c * lhs) + rhs * ( 1 - c)
// Set this to lhs + (rhs-lhs)*c
inline void LLVector4a::setLerp(const LLVector4a& lhs, const LLVector4a& rhs, F32 c)
{
LLVector4a a = lhs;
a.mul(c);
LLVector4a b = rhs;
b.mul(1.f-c);
setAdd(a, b);
LLVector4a t;
t.setSub(rhs,lhs);
t.mul(c);
setAdd(lhs, t);
}
inline LLBool32 LLVector4a::isFinite3() const

File diff suppressed because it is too large Load Diff

View File

@ -37,7 +37,6 @@ class LLPath;
template <class T> class LLOctreeNode;
class LLVector4a;
class LLVolumeFace;
class LLVolume;
class LLVolumeTriangle;
@ -50,6 +49,8 @@ class LLVolumeTriangle;
#include "v3math.h"
#include "v3dmath.h"
#include "v4math.h"
#include "llvector4a.h"
#include "llmatrix4a.h"
#include "llquaternion.h"
#include "llstrider.h"
#include "v4coloru.h"
@ -194,6 +195,26 @@ const U8 LL_SCULPT_FLAG_MIRROR = 128;
const S32 LL_SCULPT_MESH_MAX_FACES = 8;
template <class T, U32 alignment>
class LLAlignedArray
{
public:
T* mArray;
U32 mElementCount;
U32 mCapacity;
LLAlignedArray();
~LLAlignedArray();
void push_back(const T& elem);
U32 size() const { return mElementCount; }
void resize(U32 size);
T* append(S32 N);
T& operator[](int idx);
const T& operator[](int idx) const;
};
class LLProfileParams
{
public:
@ -708,16 +729,16 @@ public:
LLFaceID mFaceID;
};
std::vector<LLVector3> mProfile;
std::vector<LLVector2> mNormals;
LLAlignedArray<LLVector4a, 64> mProfile;
//LLAlignedArray<LLVector4a, 64> mNormals;
std::vector<Face> mFaces;
std::vector<LLVector3> mEdgeNormals;
std::vector<LLVector3> mEdgeCenters;
//LLAlignedArray<LLVector4a, 64> mEdgeNormals;
//LLAlignedArray<LLVector4a, 64> mEdgeCenters;
friend std::ostream& operator<<(std::ostream &s, const LLProfile &profile);
protected:
void genNormals(const LLProfileParams& params);
static S32 getNumNGonPoints(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);
void genNGon(const LLProfileParams& params, S32 sides, F32 offset=0.0f, F32 bevel = 0.0f, F32 ang_scale = 1.f, S32 split = 0);
@ -741,13 +762,29 @@ protected:
class LLPath
{
public:
struct PathPt
class PathPt
{
LLVector3 mPos;
LLVector2 mScale;
LLQuaternion mRot;
public:
LLMatrix4a mRot;
LLVector4a mPos;
LLVector4a mScale;
F32 mTexT;
PathPt() { mPos.setVec(0,0,0); mTexT = 0; mScale.setVec(0,0); mRot.loadIdentity(); }
F32 pad[3]; //for alignment
PathPt()
{
mPos.clear();
mTexT = 0;
mScale.clear();
mRot.setRows(LLVector4a(1,0,0,0),
LLVector4a(0,1,0,0),
LLVector4a(0,0,1,0));
//distinguished data in the pad for debugging
pad[0] = 3.14159f;
pad[1] = -3.14159f;
pad[2] = 0.585f;
}
};
public:
@ -779,7 +816,7 @@ public:
friend std::ostream& operator<<(std::ostream &s, const LLPath &path);
public:
std::vector<PathPt> mPath;
LLAlignedArray<PathPt, 64> mPath;
protected:
BOOL mOpen;
@ -951,11 +988,7 @@ protected:
~LLVolume(); // use unref
public:
struct Point
{
LLVector3 mPos;
};
struct FaceParams
{
LLFaceID mFaceID;
@ -978,8 +1011,8 @@ public:
const LLProfile& getProfile() const { return *mProfilep; }
LLPath& getPath() const { return *mPathp; }
void resizePath(S32 length);
const std::vector<Point>& getMesh() const { return mMesh; }
const LLVector3& getMeshPt(const U32 i) const { return mMesh[i].mPos; }
const LLAlignedArray<LLVector4a,64>& getMesh() const { return mMesh; }
const LLVector4a& getMeshPt(const U32 i) const { return mMesh[i]; }
void setDirty() { mPathp->setDirty(); mProfilep->setDirty(); }
@ -994,10 +1027,7 @@ public:
S32 getSculptLevel() const { return mSculptLevel; }
void setSculptLevel(S32 level) { mSculptLevel = level; }
S32 *getTriangleIndices(U32 &num_indices) const;
// returns number of triangle indeces required for path/profile mesh
S32 getNumTriangleIndices() const;
static void getLoDTriangleCounts(const LLVolumeParams& params, S32* counts);
S32 getNumTriangles(S32* vcount = NULL) const;
@ -1070,7 +1100,8 @@ public:
LLVolumeParams mParams;
LLPath *mPathp;
LLProfile *mProfilep;
std::vector<Point> mMesh;
LLAlignedArray<LLVector4a,64> mMesh;
BOOL mGenerateSingleFace;
typedef std::vector<LLVolumeFace> face_list_t;

View File

@ -683,30 +683,36 @@ void LLVolumeImplFlexible::doFlexibleUpdate()
LLVector4(z_axis, 0.f),
LLVector4(delta_pos, 1.f));
LL_CHECK_MEMORY
for (i=0; i<=num_render_sections; ++i)
{
new_point = &path->mPath[i];
LLVector3 pos = newSection[i].mPosition * rel_xform;
LLQuaternion rot = mSection[i].mAxisRotation * newSection[i].mRotation * delta_rot;
if (!mUpdated || (new_point->mPos-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f)
LLVector3 np(new_point->mPos.getF32ptr());
if (!mUpdated || (np-pos).magVec()/mVO->mDrawable->mDistanceWRTCamera > 0.001f)
{
new_point->mPos = newSection[i].mPosition * rel_xform;
new_point->mPos.load3((newSection[i].mPosition * rel_xform).mV);
mUpdated = FALSE;
}
new_point->mRot = rot;
new_point->mScale = newSection[i].mScale;
new_point->mRot.loadu(LLMatrix3(rot));
new_point->mScale.set(newSection[i].mScale.mV[0], newSection[i].mScale.mV[1], 0,1);
new_point->mTexT = ((F32)i)/(num_render_sections);
}
LL_CHECK_MEMORY
mLastSegmentRotation = parentSegmentRotation;
}
static LLFastTimer::DeclareTimer FTM_FLEXI_PREBUILD("Flexi Prebuild");
void LLVolumeImplFlexible::preRebuild()
{
if (!mUpdated)
{
LLFastTimer t(FTM_FLEXI_PREBUILD);
doFlexibleRebuild();
}
}

View File

@ -739,7 +739,7 @@ class LLVolumeGeometryManager: public LLGeometryManager
virtual void rebuildGeom(LLSpatialGroup* group);
virtual void rebuildMesh(LLSpatialGroup* group);
virtual void getGeometry(LLSpatialGroup* group);
void genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector<LLFace*>& faces, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE);
void genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort = FALSE, BOOL batch_textures = FALSE);
void registerFace(LLSpatialGroup* group, LLFace* facep, U32 type);
};

View File

@ -1051,8 +1051,7 @@ BOOL LLVOVolume::setVolume(const LLVolumeParams &params_in, const S32 detail, bo
}
}
}
static LLCachedControl<bool> use_transform_feedback(gSavedSettings, "RenderUseTransformFeedback");
bool cache_in_vram = use_transform_feedback && gTransformPositionProgram.mProgramObject &&
@ -4242,11 +4241,20 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
mFaceList.clear();
std::vector<LLFace*> fullbright_faces;
std::vector<LLFace*> bump_faces;
std::vector<LLFace*> simple_faces;
const U32 MAX_FACE_COUNT = 4096;
static LLFace** fullbright_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
static LLFace** bump_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
static LLFace** simple_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
static LLFace** alpha_faces = (LLFace**) ll_aligned_malloc(MAX_FACE_COUNT*sizeof(LLFace*),64);
U32 fullbright_count = 0;
U32 bump_count = 0;
U32 simple_count = 0;
U32 alpha_count = 0;
std::vector<LLFace*> alpha_faces;
U32 useage = group->mSpatialPartition->mBufferUsage;
U32 max_vertices = (gSavedSettings.getS32("RenderMaxVBOSize")*1024)/LLVertexBuffer::calcVertexSize(group->mSpatialPartition->mVertexDataMask);
@ -4257,6 +4265,8 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
bool emissive = false;
{
LLFastTimer t(FTM_REBUILD_VOLUME_FACE_LIST);
@ -4558,7 +4568,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
{
if (facep->canRenderAsMask())
{ //can be treated as alpha mask
simple_faces.push_back(facep);
if (simple_count < MAX_FACE_COUNT)
{
simple_faces[simple_count++] = facep;
}
}
else
{
@ -4566,7 +4579,10 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
{ //only treat as alpha in the pipeline if < 100% transparent
drawablep->setState(LLDrawable::HAS_ALPHA);
}
alpha_faces.push_back(facep);
if (alpha_count < MAX_FACE_COUNT)
{
alpha_faces[alpha_count++] = facep;
}
}
}
else
@ -4581,33 +4597,51 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
{
if (te->getBumpmap())
{ //needs normal + binormal
bump_faces.push_back(facep);
if (bump_count < MAX_FACE_COUNT)
{
bump_faces[bump_count++] = facep;
}
}
else if (te->getShiny() || !te->getFullbright())
{ //needs normal
simple_faces.push_back(facep);
if (simple_count < MAX_FACE_COUNT)
{
simple_faces[simple_count++] = facep;
}
}
else
{ //doesn't need normal
facep->setState(LLFace::FULLBRIGHT);
fullbright_faces.push_back(facep);
if (fullbright_count < MAX_FACE_COUNT)
{
fullbright_faces[fullbright_count++] = facep;
}
}
}
else
{
if (te->getBumpmap() && LLPipeline::sRenderBump)
{ //needs normal + binormal
bump_faces.push_back(facep);
if (bump_count < MAX_FACE_COUNT)
{
bump_faces[bump_count++] = facep;
}
}
else if ((te->getShiny() && LLPipeline::sRenderBump) ||
!(te->getFullbright() || bake_sunlight))
{ //needs normal
simple_faces.push_back(facep);
if (simple_count < MAX_FACE_COUNT)
{
simple_faces[simple_count++] = facep;
}
}
else
{ //doesn't need normal
facep->setState(LLFace::FULLBRIGHT);
fullbright_faces.push_back(facep);
if (fullbright_count < MAX_FACE_COUNT)
{
fullbright_faces[fullbright_count++] = facep;
}
}
}
}
@ -4657,17 +4691,17 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
if (batch_textures)
{
bump_mask |= LLVertexBuffer::MAP_BINORMAL;
genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, FALSE, TRUE);
genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, FALSE, TRUE);
genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, FALSE, FALSE);
genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, TRUE, TRUE);
genDrawInfo(group, simple_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, simple_faces, simple_count, FALSE, TRUE);
genDrawInfo(group, fullbright_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, fullbright_faces, fullbright_count, FALSE, TRUE);
genDrawInfo(group, bump_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, bump_faces, bump_count, FALSE, FALSE);
genDrawInfo(group, alpha_mask | LLVertexBuffer::MAP_TEXTURE_INDEX, alpha_faces, alpha_count, TRUE, TRUE);
}
else
{
genDrawInfo(group, simple_mask, simple_faces);
genDrawInfo(group, fullbright_mask, fullbright_faces);
genDrawInfo(group, bump_mask, bump_faces, FALSE, TRUE);
genDrawInfo(group, alpha_mask, alpha_faces, TRUE);
genDrawInfo(group, simple_mask, simple_faces, simple_count);
genDrawInfo(group, fullbright_mask, fullbright_faces, fullbright_count);
genDrawInfo(group, bump_mask, bump_faces, bump_count, FALSE, FALSE);
genDrawInfo(group, alpha_mask, alpha_faces, alpha_count, TRUE);
}
@ -4699,6 +4733,7 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
}
}
static LLFastTimer::DeclareTimer FTM_REBUILD_MESH_FLUSH("Flush Mesh");
void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
{
@ -4708,11 +4743,14 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
LLFastTimer ftm(FTM_REBUILD_VOLUME_VB);
LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO); //make sure getgeometryvolume shows up in the right place in timers
S32 num_mapped_veretx_buffer = LLVertexBuffer::sMappedCount ;
group->mBuilt = 1.f;
std::set<LLVertexBuffer*> mapped_buffers;
S32 num_mapped_vertex_buffer = LLVertexBuffer::sMappedCount ;
const U32 MAX_BUFFER_COUNT = 4096;
LLVertexBuffer* locked_buffer[MAX_BUFFER_COUNT];
U32 buffer_count = 0;
for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
{
@ -4722,7 +4760,7 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
{
LLVOVolume* vobj = drawablep->getVOVolume();
vobj->preRebuild();
if (drawablep->isState(LLDrawable::ANIMATED_CHILD))
{
vobj->updateRelativeXform(true);
@ -4747,9 +4785,9 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
}
if (buff->isLocked())
if (buff->isLocked() && buffer_count < MAX_BUFFER_COUNT)
{
mapped_buffers.insert(buff);
locked_buffer[buffer_count++] = buff;
}
}
}
@ -4765,21 +4803,24 @@ void LLVolumeGeometryManager::rebuildMesh(LLSpatialGroup* group)
}
}
for (std::set<LLVertexBuffer*>::iterator iter = mapped_buffers.begin(); iter != mapped_buffers.end(); ++iter)
{
(*iter)->flush();
}
// don't forget alpha
if(group != NULL &&
!group->mVertexBuffer.isNull() &&
group->mVertexBuffer->isLocked())
{
group->mVertexBuffer->flush();
LLFastTimer t(FTM_REBUILD_MESH_FLUSH);
for (LLVertexBuffer** iter = locked_buffer, ** end_iter = locked_buffer+buffer_count; iter != end_iter; ++iter)
{
(*iter)->flush();
}
// don't forget alpha
if(group != NULL &&
!group->mVertexBuffer.isNull() &&
group->mVertexBuffer->isLocked())
{
group->mVertexBuffer->flush();
}
}
//if not all buffers are unmapped
if(num_mapped_veretx_buffer != LLVertexBuffer::sMappedCount)
if(num_mapped_vertex_buffer != LLVertexBuffer::sMappedCount)
{
llwarns << "Not all mapped vertex buffers are unmapped!" << llendl ;
for (LLSpatialGroup::element_iter drawable_iter = group->getDataBegin(); drawable_iter != group->getDataEnd(); ++drawable_iter)
@ -4839,7 +4880,7 @@ static LLFastTimer::DeclareTimer FTM_GEN_DRAW_INFO_RESIZE_VB("Resize VB");
void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::vector<LLFace*>& faces, BOOL distance_sort, BOOL batch_textures)
void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, LLFace** faces, U32 face_count, BOOL distance_sort, BOOL batch_textures)
{
LLFastTimer t(FTM_REBUILD_VOLUME_GEN_DRAW_INFO);
@ -4875,17 +4916,18 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
if (!distance_sort)
{
//sort faces by things that break batches
std::sort(faces.begin(), faces.end(), CompareBatchBreakerModified());
std::sort(faces, faces+face_count, CompareBatchBreakerModified());
}
else
{
//sort faces by distance
std::sort(faces.begin(), faces.end(), LLFace::CompareDistanceGreater());
std::sort(faces, faces+face_count, LLFace::CompareDistanceGreater());
}
}
bool hud_group = group->isHUDGroup() ;
std::vector<LLFace*>::iterator face_iter = faces.begin();
LLFace** face_iter = faces;
LLFace** end_faces = faces+face_count;
LLSpatialGroup::buffer_map_t buffer_map;
@ -4916,7 +4958,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
bool flexi = false;
while (face_iter != faces.end())
while (face_iter != end_faces)
{
//pull off next face
LLFace* facep = *face_iter;
@ -4945,10 +4987,13 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
flexi = flexi || facep->getViewerObject()->getVolume()->isUnique();
//sum up vertices needed for this render batch
std::vector<LLFace*>::iterator i = face_iter;
LLFace** i = face_iter;
++i;
std::vector<LLViewerTexture*> texture_list;
const U32 MAX_TEXTURE_COUNT = 32;
LLViewerTexture* texture_list[MAX_TEXTURE_COUNT];
U32 texture_count = 0;
{
LLFastTimer t(FTM_GEN_DRAW_INFO_FACE_SIZE);
@ -4956,12 +5001,15 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
{
U8 cur_tex = 0;
facep->setTextureIndex(cur_tex);
texture_list.push_back(tex);
if (texture_count < MAX_TEXTURE_COUNT)
{
texture_list[texture_count++] = tex;
}
if (can_batch_texture(facep))
{ //populate texture_list with any textures that can be batched
//move i to the next unbatchable face
while (i != faces.end())
while (i != end_faces)
{
facep = *i;
@ -4976,7 +5024,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
if (distance_sort)
{ //textures might be out of order, see if texture exists in current batch
bool found = false;
for (U32 tex_idx = 0; tex_idx < texture_list.size(); ++tex_idx)
for (U32 tex_idx = 0; tex_idx < texture_count; ++tex_idx)
{
if (facep->getTexture() == texture_list[tex_idx])
{
@ -4988,7 +5036,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
if (!found)
{
cur_tex = texture_list.size();
cur_tex = texture_count;
}
}
else
@ -5003,7 +5051,10 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
tex = facep->getTexture();
texture_list.push_back(tex);
if (texture_count < MAX_TEXTURE_COUNT)
{
texture_list[texture_count++] = tex;
}
}
if (geom_count + facep->getGeomCount() > max_vertices)
@ -5026,7 +5077,7 @@ void LLVolumeGeometryManager::genDrawInfo(LLSpatialGroup* group, U32 mask, std::
}
else
{
while (i != faces.end() &&
while (i != end_faces &&
(LLPipeline::sTextureBindTest || (distance_sort || (*i)->getTexture() == tex)))
{
facep = *i;