Fix shutdown crash in teardown of joint heirarchy.

Ifdef'd code for potential skinning speed up to avoid lots of int<->float conversions (expensive and static for min space investment) as updating rigged VBs shows up as a profiling bottleneck for Low rendering (where we actually use CPU skinning).
2019-08-06 14:41:55 -07:00 · 2019-08-06 14:41:55 -07:00 · 71af0a2a9e
parent 76128c4357
commit 71af0a2a9e
8 changed files with 268 additions and 71 deletions
--- a/indra/llcharacter/lljoint.cpp
+++ b/indra/llcharacter/lljoint.cpp
@ -303,16 +303,17 @@ void LLJoint::removeChild(LLJoint* joint)
 //--------------------------------------------------------------------
 void LLJoint::removeAllChildren()
 {
-	for (joints_t::iterator iter = mChildren.begin();
-		 iter != mChildren.end();)
+	for (LLJoint* joint : mChildren)
 	{
-		joints_t::iterator curiter = iter++;
-		LLJoint* joint = *curiter;
-		mChildren.erase(curiter);
-		joint->mXform.setParent(NULL);
-		joint->mParent = NULL;
-		joint->touch();
+		if (joint)
+        {
+		    joint->mXform.setParent(NULL);
+		    joint->mParent = NULL;
+		    joint->touch();
+            //delete joint;
+        }
 	}
+    mChildren.clear();
 }


--- a/indra/llmath/llvolume.cpp
+++ b/indra/llmath/llvolume.cpp
@ -2526,7 +2526,6 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
 			if (mdl[i].has("Weights"))
 			{
 				face.allocateWeights(num_verts);
-                face.allocateJointIndices(num_verts);

 				LLSD::Binary weights = mdl[i]["Weights"];

@ -2567,13 +2566,6 @@ bool LLVolume::unpackVolumeFaces(std::istream& is, S32 size)
                    {
                        wght = LLVector4(0.999f,0.f,0.f,0.f);
                    }
-                    if (face.mJointIndices)
-                    {
-                        for (U32 k=0; k<4; k++)
-                        {
-                            face.mJointIndices[cur_vertex * 4 + k] = llclamp((U8)joints[k], (U8)0, (U8)110);
-                        }
-                    }
                    for (U32 k=0; k<4; k++)
                    {
                        F32 f_combined = (F32) joints[k] + wght[k];
@ -4664,7 +4656,10 @@ LLVolumeFace::LLVolumeFace() :
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+    mJustWeights(NULL),
    mJointIndices(NULL),
+#endif
    mWeightsScrubbed(FALSE),
 	mOctree(NULL),
 	mOptimized(FALSE)
@ -4691,7 +4686,10 @@ LLVolumeFace::LLVolumeFace(const LLVolumeFace& src)
 	mTexCoords(NULL),
 	mIndices(NULL),
 	mWeights(NULL),
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+    mJustWeights(NULL),
    mJointIndices(NULL),
+#endif
    mWeightsScrubbed(FALSE),
 	mOctree(NULL)
 { 
@ -4768,19 +4766,22 @@ LLVolumeFace& LLVolumeFace::operator=(const LLVolumeFace& src)
            mWeightsScrubbed = FALSE;
 		}   

+    #if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
        if (src.mJointIndices)
        {
            llassert(!mJointIndices); // don't orphan an old alloc here accidentally
            allocateJointIndices(src.mNumVertices);
            LLVector4a::memcpyNonAliased16((F32*) mJointIndices, (F32*) src.mJointIndices, src.mNumVertices * sizeof(U8) * 4);
        }
-        else
+        else*/
        {
            ll_aligned_free_16(mJointIndices);
            mJointIndices = NULL;
        }     
-	}
+    #endif

+	}
+    
 	if (mNumIndices)
 	{
 		S32 idx_size = (mNumIndices*sizeof(U16)+0xF) & ~0xF;
@ -4823,8 +4824,13 @@ void LLVolumeFace::freeData()
 	mTangents = NULL;
 	ll_aligned_free_16(mWeights);
 	mWeights = NULL;
+
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
    ll_aligned_free_16(mJointIndices);
 	mJointIndices = NULL;
+    ll_aligned_free_16(mJustWeights);
+	mJustWeights = NULL;
+#endif

 	delete mOctree;
 	mOctree = NULL;
@ -5479,13 +5485,17 @@ bool LLVolumeFace::cacheOptimize()
 	// DO NOT free mNormals and mTexCoords as they are part of mPositions buffer
 	ll_aligned_free_16(mWeights);
 	ll_aligned_free_16(mTangents);
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
    ll_aligned_free_16(mJointIndices);
+    ll_aligned_free_16(mJustWeights);
+    mJustWeights = NULL;
+    mJointIndices = NULL; // filled in later as necessary by skinning code for acceleration
+#endif

 	mPositions = pos;
 	mNormals = norm;
 	mTexCoords = tc;
-	mWeights = wght;
-    mJointIndices = NULL; // filled in later as necessary by skinning code for acceleration
+	mWeights = wght;    
 	mTangents = binorm;

 	//std::string result = llformat("ACMR pre/post: %.3f/%.3f  --  %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks);
@ -6401,8 +6411,13 @@ void LLVolumeFace::allocateWeights(S32 num_verts)

 void LLVolumeFace::allocateJointIndices(S32 num_verts)
 {
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
    ll_aligned_free_16(mJointIndices);
+    ll_aligned_free_16(mJustWeights);
+
    mJointIndices = (U8*)ll_aligned_malloc_16(sizeof(U8) * 4 * num_verts);    
+    mJustWeights = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a) * num_verts);    
+#endif
 }

 void LLVolumeFace::resizeIndices(S32 num_indices)
--- a/indra/llmath/llvolume.h
+++ b/indra/llmath/llvolume.h
@ -956,7 +956,11 @@ public:
 	// format is mWeights[vertex_index].mV[influence] = <joint_index>.<weight>
 	// mWeights.size() should be empty or match mVertices.size()  
 	LLVector4a* mWeights;
+
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+    LLVector4a* mJustWeights;
    U8* mJointIndices;
+#endif

    mutable BOOL mWeightsScrubbed;

--- a/indra/newview/lldrawpoolavatar.cpp
+++ b/indra/newview/lldrawpoolavatar.cpp
@ -38,6 +38,7 @@
 #include "lldrawable.h"
 #include "lldrawpoolbump.h"
 #include "llface.h"
+#include "llvolume.h"
 #include "llmeshrepository.h"
 #include "llsky.h"
 #include "llviewercamera.h"
@ -1833,15 +1834,13 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
    LLFace* face,
    const LLMeshSkinInfo* skin,
    LLVolume* volume,
-    const LLVolumeFace& vol_face)
+    LLVolumeFace& vol_face)
 {
 	LLVector4a* weights = vol_face.mWeights;
 	if (!weights)
 	{
 		return;
 	}
-    // FIXME ugly const cast
-    LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));

 	LLPointer<LLVertexBuffer> buffer = face->getVertexBuffer();
 	LLDrawable* drawable = face->getDrawable();
@ -1851,6 +1850,48 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		return;
 	}

+    const U32 max_joints = LLSkinningUtil::getMaxJointCount();
+
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+    #define CONDITION_WEIGHT(f) ((U8)llclamp((S32)f, (S32)0, (S32)max_joints-1))
+    LLVector4a* just_weights = vol_face.mJustWeights;
+    // we need to calculate the separated indices and store just the matrix weights for this vol...
+    if (!vol_face.mJointIndices)
+    {
+        // not very consty after all...
+        vol_face.allocateJointIndices(vol_face.mNumVertices);
+        just_weights = vol_face.mJustWeights;
+
+        U8* joint_indices_cursor = vol_face.mJointIndices;
+        for (int i = 0; i < vol_face.mNumVertices; i++)
+        {
+            F32* w = weights[i].getF32ptr();
+            F32* w_ = just_weights[i].getF32ptr();
+
+            F32 w0 = floorf(w[0]);
+            F32 w1 = floorf(w[1]);
+            F32 w2 = floorf(w[2]);
+            F32 w3 = floorf(w[3]);
+
+            joint_indices_cursor[0] = CONDITION_WEIGHT(w0);
+            joint_indices_cursor[1] = CONDITION_WEIGHT(w1);
+            joint_indices_cursor[2] = CONDITION_WEIGHT(w2);
+            joint_indices_cursor[3] = CONDITION_WEIGHT(w3);
+
+            // remove joint portion of combined weight
+            w_[0] = w[0] - w0;
+            w_[1] = w[1] - w1;
+            w_[2] = w[2] - w2;
+            w_[3] = w[3] - w3;
+
+            joint_indices_cursor += 4;
+        }
+    }
+#endif
+
+    // FIXME ugly const cast
+    LLSkinningUtil::scrubInvalidJoints(avatar, const_cast<LLMeshSkinInfo*>(skin));
+
 	U32 data_mask = face->getRiggedVertexBufferDataMask();

    if (!vol_face.mWeightsScrubbed)
@ -1927,29 +1968,67 @@ void LLDrawPoolAvatar::updateRiggedFaceVertexBuffer(
 		LLMatrix4a bind_shape_matrix;
 		bind_shape_matrix.loadu(skin->mBindShapeMatrix);

-        const U32 max_joints = LLSkinningUtil::getMaxJointCount();
-		for (U32 j = 0; j < buffer->getNumVerts(); ++j)
-		{
-			LLMatrix4a final_mat;
-            LLSkinningUtil::getPerVertexSkinMatrix(weights[j].getF32ptr(), mat, false, final_mat, max_joints);
-			
-			LLVector4a& v = vol_face.mPositions[j];
+#if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+        U8* joint_indices_cursor = vol_face.mJointIndices;
+        // fast path with joint indices separate from weights
+        if (joint_indices_cursor)
+        {
+            LLMatrix4a src[4];
+		    for (U32 j = 0; j < buffer->getNumVerts(); ++j)
+		    {
+			    LLMatrix4a final_mat;
+                //LLMatrix4a final_mat_correct;

-			LLVector4a t;
-			LLVector4a dst;
-			bind_shape_matrix.affineTransform(v, t);
-			final_mat.affineTransform(t, dst);
-			pos[j] = dst;
+                F32* jw = just_weights[j].getF32ptr();

-			if (norm)
-			{
-				LLVector4a& n = vol_face.mNormals[j];
-				bind_shape_matrix.rotate(n, t);
-				final_mat.rotate(t, dst);
-				dst.normalize3fast();
-				norm[j] = dst;
-			}
-		}
+                LLSkinningUtil::getPerVertexSkinMatrixWithIndices(jw, joint_indices_cursor, mat, final_mat, src);                
+
+                joint_indices_cursor += 4;
+
+			    LLVector4a& v = vol_face.mPositions[j];
+
+			    LLVector4a t;
+			    LLVector4a dst;
+			    bind_shape_matrix.affineTransform(v, t);
+			    final_mat.affineTransform(t, dst);
+			    pos[j] = dst;
+
+			    if (norm)
+			    {
+				    LLVector4a& n = vol_face.mNormals[j];
+				    bind_shape_matrix.rotate(n, t);
+				    final_mat.rotate(t, dst);
+				    dst.normalize3fast();
+				    norm[j] = dst;
+			    }
+		    }
+        }
+        // slow path with joint indices calculated from weights
+        else
+#endif
+        {
+            for (U32 j = 0; j < buffer->getNumVerts(); ++j)
+		    {
+			    LLMatrix4a final_mat;
+                LLSkinningUtil::getPerVertexSkinMatrix(weights[j].getF32ptr(), mat, false, final_mat, max_joints);
+
+			    LLVector4a& v = vol_face.mPositions[j];
+			    LLVector4a t;
+			    LLVector4a dst;
+			    bind_shape_matrix.affineTransform(v, t);
+			    final_mat.affineTransform(t, dst);
+			    pos[j] = dst;
+
+			    if (norm)
+			    {
+				    LLVector4a& n = vol_face.mNormals[j];
+				    bind_shape_matrix.rotate(n, t);
+				    final_mat.rotate(t, dst);
+				    //dst.normalize3fast();
+				    norm[j] = dst;
+			    }
+		    }
+        }
 	}
 }

@ -2301,7 +2380,7 @@ void LLDrawPoolAvatar::updateRiggedVertexBuffers(LLVOAvatar* avatar)

 			stop_glerror();

-			const LLVolumeFace& vol_face = volume->getVolumeFace(te);
+			LLVolumeFace& vol_face = volume->getVolumeFace(te);
 			updateRiggedFaceVertexBuffer(avatar, face, skin, volume, vol_face);
 		}
 	}
--- a/indra/newview/lldrawpoolavatar.h
+++ b/indra/newview/lldrawpoolavatar.h
@ -257,7 +257,7 @@ typedef enum
 									  LLFace* facep, 
 									  const LLMeshSkinInfo* skin, 
 									  LLVolume* volume,
-									  const LLVolumeFace& vol_face);
+									  LLVolumeFace& vol_face);
 	void updateRiggedVertexBuffers(LLVOAvatar* avatar);

 	void renderRigged(LLVOAvatar* avatar, U32 type, bool glow = false);
--- a/indra/newview/llskinningutil.cpp
+++ b/indra/newview/llskinningutil.cpp
@ -34,8 +34,12 @@
 #include "llvolume.h"
 #include "llrigginginfo.h"

+#define DEBUG_SKINNING  LL_DEBUG
+#define MAT_USE_SSE     1
+
 void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, const LLMeshSkinInfo *skin)
 {
+#if DEBUG_SKINNING
    static S32 dump_count = 0;
    const S32 max_dump = 10;

@ -81,16 +85,16 @@ void dump_avatar_and_skin_state(const std::string& reason, LLVOAvatar *avatar, c

        dump_count++;
    }
+#endif
 }

 void LLSkinningUtil::initClass()
 {
 }

-U32 LLSkinningUtil::getMaxJointCount()
+S32 LLSkinningUtil::getMaxJointCount()
 {
-    U32 result = LL_MAX_JOINTS_PER_MESH_OBJECT;
-	return result;
+    return (S32)LL_MAX_JOINTS_PER_MESH_OBJECT;
 }

 U32 LLSkinningUtil::getMeshJointCount(const LLMeshSkinInfo *skin)
@ -120,6 +124,8 @@ void LLSkinningUtil::scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin
    skin->mInvalidJointsScrubbed = true;
 }

+#define MAT_USE_SSE 1
+
 void LLSkinningUtil::initSkinningMatrixPalette(
    LLMatrix4* mat,
    S32 count, 
@ -130,9 +136,9 @@ void LLSkinningUtil::initSkinningMatrixPalette(
    for (U32 j = 0; j < count; ++j)
    {
        LLJoint *joint = avatar->getJoint(skin->mJointNums[j]);
+        llassert(joint);
        if (joint)
        {
-#define MAT_USE_SSE
 #ifdef MAT_USE_SSE
            LLMatrix4a bind, world, res;
            bind.loadu(skin->mInvBindMatrix[j]);
@ -147,6 +153,7 @@ void LLSkinningUtil::initSkinningMatrixPalette(
        else
        {
            mat[j] = skin->mInvBindMatrix[j];
+#if DEBUG_SKINNING
            // This  shouldn't  happen   -  in  mesh  upload,  skinned
            // rendering  should  be disabled  unless  all joints  are
            // valid.  In other  cases of  skinned  rendering, invalid
@ -157,16 +164,15 @@ void LLSkinningUtil::initSkinningMatrixPalette(
            LL_WARNS_ONCE("Avatar") << avatar->getFullname() 
                                    << " avatar build state: isBuilt() " << avatar->isBuilt() 
                                    << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
-#if 0
-            dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
 #endif
+            dump_avatar_and_skin_state("initSkinningMatrixPalette joint not found", avatar, skin);
        }
    }
 }

 void LLSkinningUtil::checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin)
 {
-#ifdef SHOW_ASSERT                  // same condition that controls llassert()
+#if DEBUG_SKINNING
 	const S32 max_joints = skin->mJointNames.size();
    for (U32 j=0; j<num_vertices; j++)
    {
@ -265,6 +271,7 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
    {
        for (U32 j = 0; j < skin->mJointNames.size(); ++j)
        {
+    #if DEBUG_SKINNING     
            LLJoint *joint = NULL;
            if (skin->mJointNums[j] == -1)
            {
@ -282,11 +289,16 @@ void LLSkinningUtil::initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar)
                {
                    LL_WARNS_ONCE("Avatar") << avatar->getFullname() << " unable to find joint " << skin->mJointNames[j] << LL_ENDL;
                    LL_WARNS_ONCE("Avatar") << avatar->getFullname() << " avatar build state: isBuilt() " << avatar->isBuilt() << " mInitFlags " << avatar->mInitFlags << LL_ENDL;
-#if 0
                    dump_avatar_and_skin_state("initJointNums joint not found", avatar, skin);
-#endif
+                    skin->mJointNums[j] = 0;
                }
            }
+    #else
+            LLJoint *joint = (skin->mJointNums[j] == -1) ? avatar->getJoint(skin->mJointNames[j]) : avatar->getJoint(skin->mJointNums[j]);
+            skin->mJointNums[j] = joint ? joint->getJointNum() : 0;            
+    #endif
+            // insure we have *a* valid joint to reference
+            llassert(skin->mJointNums[j] >= 0);
        }
        skin->mJointNumsInitialized = true;
    }
@ -344,14 +356,17 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a

                                // FIXME could precompute these matMuls.
                                LLMatrix4a bind_shape;
-                                bind_shape.loadu(skin->mBindShapeMatrix);
                                LLMatrix4a inv_bind;
-                                inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
                                LLMatrix4a mat;
-                                matMul(bind_shape, inv_bind, mat);
                                LLVector4a pos_joint_space;
+
+                                bind_shape.loadu(skin->mBindShapeMatrix);
+                                inv_bind.loadu(skin->mInvBindMatrix[joint_index]);
+                                matMul(bind_shape, inv_bind, mat);
+
                                mat.affineTransform(pos, pos_joint_space);
                                pos_joint_space.mul(wght[k]);
+
                                LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
                                update_min_max(extents[0], extents[1], pos_joint_space);
                            }
@ -366,6 +381,8 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
                vol_face.mJointRiggingInfoTab.setNeedsUpdate(false);
            }
        }
+
+#if DEBUG_SKINNING
        if (vol_face.mJointRiggingInfoTab.size()!=0)
        {
            LL_DEBUGS("RigSpammish") << "we have rigging info for vf " << &vol_face 
@ -376,10 +393,40 @@ void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *a
            LL_DEBUGS("RigSpammish") << "no rigging info for vf " << &vol_face 
                                     << " num_verts " << vol_face.mNumVertices << LL_ENDL; 
        }
+#endif

    }
 }

+void LLSkinningUtil::updateRiggingInfo_(LLMeshSkinInfo* skin, LLVOAvatar *avatar, S32 num_verts, LLVector4a* weights, LLVector4a* positions, U8* joint_indices, LLJointRiggingInfoTab &rig_info_tab)
+{
+    LL_RECORD_BLOCK_TIME(FTM_FACE_RIGGING_INFO);
+    for (S32 i=0; i < num_verts; i++)
+    {
+        LLVector4a& pos  = positions[i];
+        LLVector4a& wght = weights[i];
+        for (U32 k=0; k<4; ++k)
+        {
+            S32 joint_num = skin->mJointNums[joint_indices[k]];
+            llassert(joint_num >= 0 && joint_num < LL_CHARACTER_MAX_ANIMATED_JOINTS);
+            {
+                rig_info_tab[joint_num].setIsRiggedTo(true);
+                LLMatrix4a bind_shape;
+                bind_shape.loadu(skin->mBindShapeMatrix);
+                LLMatrix4a inv_bind;
+                inv_bind.loadu(skin->mInvBindMatrix[joint_indices[k]]);
+                LLMatrix4a mat;
+                matMul(bind_shape, inv_bind, mat);
+                LLVector4a pos_joint_space;
+                mat.affineTransform(pos, pos_joint_space);
+                pos_joint_space.mul(wght[k]);
+                LLVector4a *extents = rig_info_tab[joint_num].getRiggedExtents();
+                update_min_max(extents[0], extents[1], pos_joint_space);
+            }
+        }
+    }
+}
+
 // This is used for extracting rotation from a bind shape matrix that
 // already has scales baked in
 LLQuaternion LLSkinningUtil::getUnscaledQuaternion(const LLMatrix4& mat4)
--- a/indra/newview/llskinningutil.h
+++ b/indra/newview/llskinningutil.h
@ -27,23 +27,48 @@
 #ifndef LLSKINNINGUTIL_H
 #define LLSKINNINGUTIL_H

+#include "v2math.h"
+#include "v4math.h"
+#include "llvector4a.h"
+#include "llmatrix4a.h"
+
 class LLVOAvatar;
 class LLMeshSkinInfo;
-class LLMatrix4a;
 class LLVolumeFace;
+class LLJointRiggingInfoTab;

 namespace LLSkinningUtil
 {
    void initClass();
-    U32 getMaxJointCount();
+    S32 getMaxJointCount();
    U32 getMeshJointCount(const LLMeshSkinInfo *skin);
    void scrubInvalidJoints(LLVOAvatar *avatar, LLMeshSkinInfo* skin);
    void initSkinningMatrixPalette(LLMatrix4* mat, S32 count, const LLMeshSkinInfo* skin, LLVOAvatar *avatar);
    void checkSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
    void scrubSkinWeights(LLVector4a* weights, U32 num_vertices, const LLMeshSkinInfo* skin);
    void getPerVertexSkinMatrix(F32* weights, LLMatrix4a* mat, bool handle_bad_scale, LLMatrix4a& final_mat, U32 max_joints);
+
+    LL_FORCE_INLINE void getPerVertexSkinMatrixWithIndices(
+        F32*        weights,
+        U8*         idx,
+        LLMatrix4a* mat,
+        LLMatrix4a& final_mat,
+        LLMatrix4a* src)
+    {    
+        final_mat.clear();
+        src[0].setMul(mat[idx[0]], weights[0]);
+        src[1].setMul(mat[idx[1]], weights[1]);
+        final_mat.add(src[0]);
+        final_mat.add(src[1]);
+        src[2].setMul(mat[idx[2]], weights[2]);        
+        src[3].setMul(mat[idx[3]], weights[3]);
+        final_mat.add(src[2]);
+        final_mat.add(src[3]);
+    }
+
    void initJointNums(LLMeshSkinInfo* skin, LLVOAvatar *avatar);
    void updateRiggingInfo(const LLMeshSkinInfo* skin, LLVOAvatar *avatar, LLVolumeFace& vol_face);
+    void updateRiggingInfo_(LLMeshSkinInfo* skin, LLVOAvatar *avatar, S32 num_verts, LLVector4a* weights, LLVector4a* positions, U8* joint_indices, LLJointRiggingInfoTab &rig_info_tab);
 	LLQuaternion getUnscaledQuaternion(const LLMatrix4& mat4);
 };

--- a/indra/newview/llvovolume.cpp
+++ b/indra/newview/llvovolume.cpp
@ -4787,18 +4787,44 @@ void LLRiggedVolume::update(const LLMeshSkinInfo* skin, LLVOAvatar* avatar, cons
                U32 max_joints = LLSkinningUtil::getMaxJointCount();
                rigged_vert_count += dst_face.mNumVertices;
                rigged_face_count++;
-				for (U32 j = 0; j < dst_face.mNumVertices; ++j)
-				{
-					LLMatrix4a final_mat;
-                    LLSkinningUtil::getPerVertexSkinMatrix(weight[j].getF32ptr(), mat, false, final_mat, max_joints);
+
+            #if USE_SEPARATE_JOINT_INDICES_AND_WEIGHTS
+                if (vol_face.mJointIndices) // fast path with preconditioned joint indices
+                {
+                    LLMatrix4a src[4];
+                    U8* joint_indices_cursor = vol_face.mJointIndices;
+                    LLVector4a* just_weights = vol_face.mJustWeights;
+                    for (U32 j = 0; j < dst_face.mNumVertices; ++j)
+				    {
+					    LLMatrix4a final_mat;
+                        F32* w = just_weights[j].getF32ptr();
+                        LLSkinningUtil::getPerVertexSkinMatrixWithIndices(w, joint_indices_cursor, mat, final_mat, src);
+                        joint_indices_cursor += 4;
+
+					    LLVector4a& v = vol_face.mPositions[j];
+					    LLVector4a t;
+					    LLVector4a dst;
+					    bind_shape_matrix.affineTransform(v, t);
+					    final_mat.affineTransform(t, dst);
+					    pos[j] = dst;
+				    }
+                }
+                else
+            #endif
+                {
+				    for (U32 j = 0; j < dst_face.mNumVertices; ++j)
+				    {
+					    LLMatrix4a final_mat;
+                        LLSkinningUtil::getPerVertexSkinMatrix(weight[j].getF32ptr(), mat, false, final_mat, max_joints);
 				
-					LLVector4a& v = vol_face.mPositions[j];
-					LLVector4a t;
-					LLVector4a dst;
-					bind_shape_matrix.affineTransform(v, t);
-					final_mat.affineTransform(t, dst);
-					pos[j] = dst;
-				}
+					    LLVector4a& v = vol_face.mPositions[j];
+					    LLVector4a t;
+					    LLVector4a dst;
+					    bind_shape_matrix.affineTransform(v, t);
+					    final_mat.affineTransform(t, dst);
+					    pos[j] = dst;
+				    }
+                }

 				//update bounding box
 				// VFExtents change