Fix shutdown crash in teardown of joint heirarchy.

Ifdef'd code for potential skinning speed up to avoid lots of int<->float conversions (expensive and static for min space investment)
as updating rigged VBs shows up as a profiling bottleneck for Low rendering (where we actually use CPU skinning).
master
Graham Linden 2019-08-06 14:41:55 -07:00
parent 76128c4357
commit 71af0a2a9e
8 changed files with 268 additions and 71 deletions

View File

@ -303,16 +303,17 @@ void LLJoint::removeChild(LLJoint* joint)
//--------------------------------------------------------------------
void LLJoint::removeAllChildren()
{
for (joints_t::iterator iter = mChildren.begin();
iter != mChildren.end();)
for (LLJoint* joint : mChildren)
{
joints_t::iterator curiter = iter++;
LLJoint* joint = *curiter;
mChildren.erase(curiter);
joint->mXform.setParent(NULL);
joint->mParent = NULL;
joint->touch();
if (joint)
{
joint->mXform.setParent(NULL);
joint->mParent = NULL;
joint->touch();
//delete joint;
}
}
mChildren.clear();
}

View File

@ -2526,7 +2526,6 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
if (mdl[i].has("Weights"))
{
face.allocateWeights(num_verts);
face.allocateJointIndices(num_verts);
LLSD::Binary weights = mdl[i]["Weights"];
@ -2567,13 +2566,6 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
{
wght = LLVector4(0.999f,0.f,0.f,0.f);
}
if (face.mJointIndices)
{
for (U32 k=0; k<4; k++)
{
face.mJointIndices[cur_vertex * 4 + k] = llclamp((U8)joints[k], (U8)0, (U8)110);
}
}
for (U32 k=0; k<4; k++)
{
F32 f_combined = (F32) joints[k] + wght[k];
@ -4664,7 +4656,10 @@ LLVolumeFace::LLVolumeFace() :
mTexCoords(NULL),
mIndices(NULL),
mWeights(NULL),
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
mJustWeights(NULL),
mJointIndices(NULL),
#endif
mWeightsScrubbed(FALSE),
mOctree(NULL),
mOptimized(FALSE)
@ -4691,7 +4686,10 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
mTexCoords(NULL),
mIndices(NULL),
mWeights(NULL),
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
mJustWeights(NULL),
mJointIndices(NULL),
#endif
mWeightsScrubbed(FALSE),
mOctree(NULL)
{
@ -4768,19 +4766,22 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
mWeightsScrubbed = FALSE;
}
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
if (src.mJointIndices)
{
llassert(!mJointIndices); // don't orphan an old alloc here accidentally
allocateJointIndices(src.mNumVertices);
LLVector4a::memcpyNonAliased16((F32*) mJointIndices, (F32*) src.mJointIndices, src.mNumVertices * sizeof(U8) * 4);
}
else
else*/
{
ll_aligned_free_16(mJointIndices);
mJointIndices = NULL;
}
}
#endif
}
if (mNumIndices)
{
S32 idx_size = (mNumIndices*sizeof(U16)+0xF) & ~0xF;
@ -4823,8 +4824,13 @@ void LLVolumeFace::freeData()
mTangents = NULL;
ll_aligned_free_16(mWeights);
mWeights = NULL;
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
ll_aligned_free_16(mJointIndices);
mJointIndices = NULL;
ll_aligned_free_16(mJustWeights);
mJustWeights = NULL;
#endif
delete mOctree;
mOctree = NULL;
@ -5479,13 +5485,17 @@ bool LLVolumeFace::cacheOptimize()
// DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
ll_aligned_free_16(mWeights);
ll_aligned_free_16(mTangents);
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
ll_aligned_free_16(mJointIndices);
ll_aligned_free_16(mJustWeights);
mJustWeights = NULL;
mJointIndices = NULL; // filled in later as necessary by skinning code for acceleration
#endif
mPositions = pos;
mNormals = norm;
mTexCoords = tc;
mWeights = wght;
mJointIndices = NULL; // filled in later as necessary by skinning code for acceleration
mWeights = wght;
mTangents = binorm;
//std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks);
@ -6401,8 +6411,13 @@ void LLVolumeFace::allocateWeights(S32 num_verts)
void LLVolumeFace::allocateJointIndices(S32 num_verts)
{
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
ll_aligned_free_16(mJointIndices);
ll_aligned_free_16(mJustWeights);
mJointIndices = (U8*)ll_aligned_malloc_16(sizeof(U8) * 4 * num_verts);
mJustWeights = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a) * num_verts);
#endif
}
void LLVolumeFace::resizeIndices(S32 num_indices)

View File

@ -956,7 +956,11 @@ public:
// format is mWeights[vertex_index].mV[influence] = <joint_index>.<weight>
// mWeights.size() should be empty or match mVertices.size()
LLVector4a* mWeights;
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
LLVector4a* mJustWeights;
U8* mJointIndices;
#endif
mutable BOOL mWeightsScrubbed;

View File

@ -38,6 +38,7 @@
#include "lldrawable.h"
#include "lldrawpoolbump.h"
#include "llface.h"
#include "llvolume.h"
#include "llmeshrepository.h"
#include "llsky.h"
#include "llviewercamera.h"
@ -1833,15 +1834,13 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
LLFace* face,
const LLMeshSkinInfo* skin,
LLVolume* volume,
const LLVolumeFace& vol_face)
LLVolumeFace& vol_face)
{
LLVector4a* weights = vol_face.mWeights;
if (!weights)
{
return;
}
// FIXME ugly const cast
LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
LLPointer<LLVertexBuffer> buffer = face->getVertexBuffer();
LLDrawable* drawable = face->getDrawable();
@ -1851,6 +1850,48 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
return;
}
const U32 max_joints = LLSkinningUtil::getMaxJointCount();
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
#define CONDITION_WEIGHT(f) ((U8)llclamp((S32)f, (S32)0, (S32)max_joints-1))
LLVector4a* just_weights = vol_face.mJustWeights;
// we need to calculate the separated indices and store just the matrix weights for this vol...
if (!vol_face.mJointIndices)
{
// not very consty after all...
vol_face.allocateJointIndices(vol_face.mNumVertices);
just_weights = vol_face.mJustWeights;
U8* joint_indices_cursor = vol_face.mJointIndices;
for (int i = 0; i < vol_face.mNumVertices; i++)
{
F32* w = weights[i].getF32ptr();
F32* w_ = just_weights[i].getF32ptr();
F32 w0 = floorf(w[0]);
F32 w1 = floorf(w[1]);
F32 w2 = floorf(w[2]);
F32 w3 = floorf(w[3]);
joint_indices_cursor[0] = CONDITION_WEIGHT(w0);
joint_indices_cursor[1] = CONDITION_WEIGHT(w1);
joint_indices_cursor[2] = CONDITION_WEIGHT(w2);
joint_indices_cursor[3] = CONDITION_WEIGHT(w3);
// remove joint portion of combined weight
w_[0] = w[0] - w0;
w_[1] = w[1] - w1;
w_[2] = w[2] - w2;
w_[3] = w[3] - w3;
joint_indices_cursor += 4;
}
}
#endif
// FIXME ugly const cast
LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
U32 data_mask = face->getRiggedVertexBufferDataMask();
if (!vol_face.mWeightsScrubbed)
@ -1927,29 +1968,67 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
LLMatrix4a bind_shape_matrix;
bind_shape_matrix.loadu(skin->mBindShapeMatrix);
const U32 max_joints = LLSkinningUtil::getMaxJointCount();
for (U32 j = 0; j < buffer->getNumVerts(); ++j)
{
LLMatrix4a final_mat;
LLSkinningUtil::getPerVertexSkinMatrix(weights[j].getF32ptr(), mat, false, final_mat, max_joints);
LLVector4a& v = vol_face.mPositions[j];
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
U8* joint_indices_cursor = vol_face.mJointIndices;
// fast path with joint indices separate from weights
if (joint_indices_cursor)
{
LLMatrix4a src[4];
for (U32 j = 0; j < buffer->getNumVerts(); ++j)
{
LLMatrix4a final_mat;
//LLMatrix4a final_mat_correct;
LLVector4a t;
LLVector4a dst;
bind_shape_matrix.affineTransform(v, t);
final_mat.affineTransform(t, dst);
pos[j] = dst;
F32* jw = just_weights[j].getF32ptr();
if (norm)
{
LLVector4a& n = vol_face.mNormals[j];
bind_shape_matrix.rotate(n, t);
final_mat.rotate(t, dst);
dst.normalize3fast();
norm[j] = dst;
}
}
LLSkinningUtil::getPerVertexSkinMatrixWithIndices(jw, joint_indices_cursor, mat, final_mat, src);
joint_indices_cursor += 4;
LLVector4a& v = vol_face.mPositions[j];
LLVector4a t;
LLVector4a dst;
bind_shape_matrix.affineTransform(v, t);
final_mat.affineTransform(t, dst);
pos[j] = dst;
if (norm)
{
LLVector4a& n = vol_face.mNormals[j];
bind_shape_matrix.rotate(n, t);
final_mat.rotate(t, dst);
dst.normalize3fast();
norm[j] = dst;
}
}
}
// slow path with joint indices calculated from weights
else
#endif
{
for (U32 j = 0; j < buffer->getNumVerts(); ++j)
{
LLMatrix4a final_mat;
LLSkinningUtil::getPerVertexSkinMatrix(weights[j].getF32ptr(), mat, false, final_mat, max_joints);
LLVector4a& v = vol_face.mPositions[j];
LLVector4a t;
LLVector4a dst;
bind_shape_matrix.affineTransform(v, t);
final_mat.affineTransform(t, dst);
pos[j] = dst;
if (norm)
{
LLVector4a& n = vol_face.mNormals[j];
bind_shape_matrix.rotate(n, t);
final_mat.rotate(t, dst);
//dst.normalize3fast();
norm[j] = dst;
}
}
}
}
}
@ -2301,7 +2380,7 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)
stop_glerror();
const LLVolumeFace& vol_face = volume->getVolumeFace(te);
LLVolumeFace& vol_face = volume->getVolumeFace(te);
updateRiggedFaceVertexBuffer(avatar, face, skin, volume, vol_face);
}
}

View File

@ -257,7 +257,7 @@ typedef enum
LLFace* facep,
const LLMeshSkinInfo* skin,
LLVolume* volume,
const LLVolumeFace& vol_face);
LLVolumeFace& vol_face);
void updateRiggedVertexBuffers(LLVOAvatar* avatar);
void renderRigged(LLVOAvatar* avatar, U32 type, bool glow = false);

View File

@ -34,8 +34,12 @@
#include "llvolume.h"
#include "llrigginginfo.h"
#define DEBUG_SKINNING LL_DEBUG
#define MAT_USE_SSE 1
void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, const LLMeshSkinInfo *skin)
{
#if DEBUG_SKINNING
static S32 dump_count = 0;
const S32 max_dump = 10;
@ -81,16 +85,16 @@ void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, c
dump_count++;
}
#endif
}
void LLSkinningUtil::initClass()
{
}
U32 LLSkinningUtil::getMaxJointCount()
S32 LLSkinningUtil::getMaxJointCount()
{
U32 result = LL_MAX_JOINTS_PER_MESH_OBJECT;
return result;
return (S32)LL_MAX_JOINTS_PER_MESH_OBJECT;
}
U32 LLSkinningUtil::getMeshJointCount(const LLMeshSkinInfo *skin)
@ -120,6 +124,8 @@ void LLSkinningUtil::scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin
skin->mInvalidJointsScrubbed = true;
}
#define MAT_USE_SSE 1
void LLSkinningUtil::initSkinningMatrixPalette(
LLMatrix4* mat,
S32 count,
@ -130,9 +136,9 @@ void LLSkinningUtil::initSkinningMatrixPalette(
for (U32 j = 0; j < count; ++j)
{
LLJoint *joint = avatar->getJoint(skin->mJointNums[j]);
llassert(joint);
if (joint)
{
#define MAT_USE_SSE
#ifdef MAT_USE_SSE
LLMatrix4a bind, world, res;
bind.loadu(skin->mInvBindMatrix[j]);
@ -147,6 +153,7 @@ void LLSkinningUtil::initSkinningMatrixPalette(
else
{
mat[j] = skin->mInvBindMatrix[j];
#if DEBUG_SKINNING
// This shouldn't happen - in mesh upload, skinned
// rendering should be disabled unless all joints are
// valid. In other cases of skinned rendering, invalid
@ -157,16 +164,15 @@ void LLSkinningUtil::initSkinningMatrixPalette(
LL_WARNS_ONCE("Avatar") << avatar->getFullname()
<< " avatar build state: isBuilt() " << avatar->isBuilt()
<< " mInitFlags " << avatar->mInitFlags << LL_ENDL;
#if 0
dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
#endif
dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
}
}
}
void LLSkinningUtil::checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin)
{
#ifdef SHOW_ASSERT // same condition that controls llassert()
#if DEBUG_SKINNING
const S32 max_joints = skin->mJointNames.size();
for (U32 j=0; j<num_vertices; j++)
{
@ -265,6 +271,7 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
{
for (U32 j = 0; j < skin->mJointNames.size(); ++j)
{
#if DEBUG_SKINNING
LLJoint *joint = NULL;
if (skin->mJointNums[j] == -1)
{
@ -282,11 +289,16 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
{
LL_WARNS_ONCE("Avatar") << avatar->getFullname() << " unable to find joint " << skin->mJointNames[j] << LL_ENDL;
LL_WARNS_ONCE("Avatar") << avatar->getFullname() << " avatar build state: isBuilt() " << avatar->isBuilt() << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
#if 0
dump_avatar_and_skin_state("initJointNums joint not found", avatar, skin);
#endif
skin->mJointNums[j] = 0;
}
}
#else
LLJoint *joint = (skin->mJointNums[j] == -1) ? avatar->getJoint(skin->mJointNames[j]) : avatar->getJoint(skin->mJointNums[j]);
skin->mJointNums[j] = joint ? joint->getJointNum() : 0;
#endif
// insure we have *a* valid joint to reference
llassert(skin->mJointNums[j] >= 0);
}
skin->mJointNumsInitialized = true;
}
@ -344,14 +356,17 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
// FIXME could precompute these matMuls.
LLMatrix4a bind_shape;
bind_shape.loadu(skin->mBindShapeMatrix);
LLMatrix4a inv_bind;
inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
LLMatrix4a mat;
matMul(bind_shape, inv_bind, mat);
LLVector4a pos_joint_space;
bind_shape.loadu(skin->mBindShapeMatrix);
inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
matMul(bind_shape, inv_bind, mat);
mat.affineTransform(pos, pos_joint_space);
pos_joint_space.mul(wght[k]);
LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
update_min_max(extents[0], extents[1], pos_joint_space);
}
@ -366,6 +381,8 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
vol_face.mJointRiggingInfoTab.setNeedsUpdate(false);
}
}
#if DEBUG_SKINNING
if (vol_face.mJointRiggingInfoTab.size()!=0)
{
LL_DEBUGS("RigSpammish") << "we have rigging info for vf " << &vol_face
@ -376,10 +393,40 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
LL_DEBUGS("RigSpammish") << "no rigging info for vf " << &vol_face
<< " num_verts " << vol_face.mNumVertices << LL_ENDL;
}
#endif
}
}
void LLSkinningUtil::updateRiggingInfo_(LLMeshSkinInfo* skin, LLVOAvatar *avatar, S32 num_verts, LLVector4a* weights, LLVector4a* positions, U8* joint_indices, LLJointRiggingInfoTab &rig_info_tab)
{
LL_RECORD_BLOCK_TIME(FTM_FACE_RIGGING_INFO);
for (S32 i=0; i < num_verts; i++)
{
LLVector4a& pos = positions[i];
LLVector4a& wght = weights[i];
for (U32 k=0; k<4; ++k)
{
S32 joint_num = skin->mJointNums[joint_indices[k]];
llassert(joint_num >= 0 && joint_num < LL_CHARACTER_MAX_ANIMATED_JOINTS);
{
rig_info_tab[joint_num].setIsRiggedTo(true);
LLMatrix4a bind_shape;
bind_shape.loadu(skin->mBindShapeMatrix);
LLMatrix4a inv_bind;
inv_bind.loadu(skin->mInvBindMatrix[joint_indices[k]]);
LLMatrix4a mat;
matMul(bind_shape, inv_bind, mat);
LLVector4a pos_joint_space;
mat.affineTransform(pos, pos_joint_space);
pos_joint_space.mul(wght[k]);
LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
update_min_max(extents[0], extents[1], pos_joint_space);
}
}
}
}
// This is used for extracting rotation from a bind shape matrix that
// already has scales baked in
LLQuaternion LLSkinningUtil::getUnscaledQuaternion(const LLMatrix4& mat4)

View File

@ -27,23 +27,48 @@
#ifndef LLSKINNINGUTIL_H
#define LLSKINNINGUTIL_H
#include "v2math.h"
#include "v4math.h"
#include "llvector4a.h"
#include "llmatrix4a.h"
class LLVOAvatar;
class LLMeshSkinInfo;
class LLMatrix4a;
class LLVolumeFace;
class LLJointRiggingInfoTab;
namespace LLSkinningUtil
{
void initClass();
U32 getMaxJointCount();
S32 getMaxJointCount();
U32 getMeshJointCount(const LLMeshSkinInfo *skin);
void scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin);
void initSkinningMatrixPalette(LLMatrix4* mat, S32 count, const LLMeshSkinInfo* skin, LLVOAvatar *avatar);
void checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
void scrubSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
void getPerVertexSkinMatrix(F32* weights, LLMatrix4a* mat, bool handle_bad_scale, LLMatrix4a& final_mat, U32 max_joints);
LL_FORCE_INLINE void getPerVertexSkinMatrixWithIndices(
F32* weights,
U8* idx,
LLMatrix4a* mat,
LLMatrix4a& final_mat,
LLMatrix4a* src)
{
final_mat.clear();
src[0].setMul(mat[idx[0]], weights[0]);
src[1].setMul(mat[idx[1]], weights[1]);
final_mat.add(src[0]);
final_mat.add(src[1]);
src[2].setMul(mat[idx[2]], weights[2]);
src[3].setMul(mat[idx[3]], weights[3]);
final_mat.add(src[2]);
final_mat.add(src[3]);
}
void initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar);
void updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face);
void updateRiggingInfo_(LLMeshSkinInfo* skin, LLVOAvatar *avatar, S32 num_verts, LLVector4a* weights, LLVector4a* positions, U8* joint_indices, LLJointRiggingInfoTab &rig_info_tab);
LLQuaternion getUnscaledQuaternion(const LLMatrix4& mat4);
};

View File

@ -4787,18 +4787,44 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons
U32 max_joints = LLSkinningUtil::getMaxJointCount();
rigged_vert_count += dst_face.mNumVertices;
rigged_face_count++;
for (U32 j = 0; j < dst_face.mNumVertices; ++j)
{
LLMatrix4a final_mat;
LLSkinningUtil::getPerVertexSkinMatrix(weight[j].getF32ptr(), mat, false, final_mat, max_joints);
#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
if (vol_face.mJointIndices) // fast path with preconditioned joint indices
{
LLMatrix4a src[4];
U8* joint_indices_cursor = vol_face.mJointIndices;
LLVector4a* just_weights = vol_face.mJustWeights;
for (U32 j = 0; j < dst_face.mNumVertices; ++j)
{
LLMatrix4a final_mat;
F32* w = just_weights[j].getF32ptr();
LLSkinningUtil::getPerVertexSkinMatrixWithIndices(w, joint_indices_cursor, mat, final_mat, src);
joint_indices_cursor += 4;
LLVector4a& v = vol_face.mPositions[j];
LLVector4a t;
LLVector4a dst;
bind_shape_matrix.affineTransform(v, t);
final_mat.affineTransform(t, dst);
pos[j] = dst;
}
}
else
#endif
{
for (U32 j = 0; j < dst_face.mNumVertices; ++j)
{
LLMatrix4a final_mat;
LLSkinningUtil::getPerVertexSkinMatrix(weight[j].getF32ptr(), mat, false, final_mat, max_joints);
LLVector4a& v = vol_face.mPositions[j];
LLVector4a t;
LLVector4a dst;
bind_shape_matrix.affineTransform(v, t);
final_mat.affineTransform(t, dst);
pos[j] = dst;
}
LLVector4a& v = vol_face.mPositions[j];
LLVector4a t;
LLVector4a dst;
bind_shape_matrix.affineTransform(v, t);
final_mat.affineTransform(t, dst);
pos[j] = dst;
}
}
//update bounding box
// VFExtents change