SL-451 - SSE-optimized matrix multiply, used in initSkinningMatrixPalette()

master
Brad Payne (Vir Linden) 2016-09-28 15:43:07 -04:00
parent e6297ab3d6
commit 8d6bb9ee4e
5 changed files with 56 additions and 7 deletions

View File

@ -1493,3 +1493,20 @@ namespace LLError
}
}
bool debugLoggingEnabled(const std::string& tag)
{
const char* tags[] = {tag.c_str()};
::size_t tag_count = 1;
LLError::CallSite _site(LLError::LEVEL_DEBUG, __FILE__, __LINE__,
typeid(_LL_CLASS_TO_LOG), __FUNCTION__, false, tags, tag_count);
if (LL_UNLIKELY(_site.shouldLog()))
{
return true;
}
else
{
return false;
}
}

View File

@ -363,4 +363,7 @@ typedef LLError::NoClassInfo _LL_CLASS_TO_LOG;
#define LL_INFOS_ONCE(...) lllog(LLError::LEVEL_INFO, true, ##__VA_ARGS__)
#define LL_WARNS_ONCE(...) lllog(LLError::LEVEL_WARN, true, ##__VA_ARGS__)
// Check at run-time whether logging is enabled, without generating output
bool debugLoggingEnabled(const std::string& tag);
#endif // LL_LLERROR_H

View File

@ -153,4 +153,27 @@ public:
}
};
inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat)
{
LLVector4a result;
result = _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(0, 0, 0, 0)), mat.mMatrix[0]);
result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(1, 1, 1, 1)), mat.mMatrix[1]));
result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(2, 2, 2, 2)), mat.mMatrix[2]));
result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(3, 3, 3, 3)), mat.mMatrix[3]));
return result;
}
inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
{
LLVector4a row0 = rowMul(a.mMatrix[0], b);
LLVector4a row1 = rowMul(a.mMatrix[1], b);
LLVector4a row2 = rowMul(a.mMatrix[2], b);
LLVector4a row3 = rowMul(a.mMatrix[3], b);
res.mMatrix[0] = row0;
res.mMatrix[1] = row1;
res.mMatrix[2] = row2;
res.mMatrix[3] = row3;
}
#endif

View File

@ -243,8 +243,6 @@ void LLSkinningUtil::initSkinningMatrixPalette(
const LLMeshSkinInfo* skin,
LLVOAvatar *avatar)
{
// BENTO - switching to use Matrix4a and SSE might speed this up.
// Note that we are mostly passing Matrix4a's to this routine anyway, just dubiously casted.
for (U32 j = 0; j < count; ++j)
{
LLJoint *joint = NULL;
@ -260,13 +258,23 @@ void LLSkinningUtil::initSkinningMatrixPalette(
{
joint = avatar->getJoint(skin->mJointNums[j]);
}
mat[j] = skin->mInvBindMatrix[j];
if (joint)
{
#define MAT_USE_SSE
#ifdef MAT_USE_SSE
LLMatrix4a bind, world, res;
bind.loadu(skin->mInvBindMatrix[j]);
world.loadu(joint->getWorldMatrix());
matMul(bind,world,res);
memcpy(mat[j].mMatrix,res.mMatrix,16*sizeof(float));
#else
mat[j] = skin->mInvBindMatrix[j];
mat[j] *= joint->getWorldMatrix();
#endif
}
else
{
mat[j] = skin->mInvBindMatrix[j];
// This shouldn't happen - in mesh upload, skinned
// rendering should be disabled unless all joints are
// valid. In other cases of skinned rendering, invalid

View File

@ -4779,13 +4779,11 @@ void LLVolumeGeometryManager::rebuildGeom(LLSpatialGroup* group)
if (rigged && pAvatarVO)
{
pAvatarVO->addAttachmentOverridesForObject(vobj);
#if 0
if (pAvatarVO->isSelf())
{
if (debugLoggingEnabled("Avatar") && pAvatarVO->isSelf())
{
bool verbose = true;
pAvatarVO->showAttachmentOverrides(verbose);
}
#endif
}
//for each face