244 lines
6.9 KiB
C++
244 lines
6.9 KiB
C++
/**
|
|
* @file llmatrix4a.h
|
|
* @brief LLMatrix4a class header file - memory aligned and vectorized 4x4 matrix
|
|
*
|
|
* $LicenseInfo:firstyear=2007&license=viewerlgpl$
|
|
* Second Life Viewer Source Code
|
|
* Copyright (C) 2010, Linden Research, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation;
|
|
* version 2.1 of the License only.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*
|
|
* Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
|
|
* $/LicenseInfo$
|
|
*/
|
|
|
|
#ifndef LL_LLMATRIX4A_H
|
|
#define LL_LLMATRIX4A_H
|
|
|
|
#include "llvector4a.h"
|
|
#include "m4math.h"
|
|
#include "m3math.h"
|
|
|
|
class LLMatrix4a
|
|
{
|
|
public:
|
|
LL_ALIGN_16(LLVector4a mMatrix[4]);
|
|
|
|
LLMatrix4a()
|
|
{
|
|
|
|
}
|
|
|
|
explicit LLMatrix4a(const LLMatrix4& val)
|
|
{
|
|
loadu(val);
|
|
}
|
|
|
|
inline F32* getF32ptr()
|
|
{
|
|
return (F32*) &mMatrix;
|
|
}
|
|
|
|
inline const F32* getF32ptr() const
|
|
{
|
|
return (F32*)&mMatrix;
|
|
}
|
|
|
|
inline LLMatrix4& asMatrix4()
|
|
{
|
|
return *(LLMatrix4*)this;
|
|
}
|
|
|
|
inline const LLMatrix4& asMatrix4() const
|
|
{
|
|
return *(LLMatrix4*)this;
|
|
}
|
|
|
|
inline void clear()
|
|
{
|
|
mMatrix[0].clear();
|
|
mMatrix[1].clear();
|
|
mMatrix[2].clear();
|
|
mMatrix[3].clear();
|
|
}
|
|
|
|
inline void setIdentity()
|
|
{
|
|
mMatrix[0].set(1.f, 0.f, 0.f, 0.f);
|
|
mMatrix[1].set(0.f, 1.f, 0.f, 0.f);
|
|
mMatrix[2].set(0.f, 0.f, 1.f, 0.f);
|
|
mMatrix[3].set(0.f, 0.f, 0.f, 1.f);
|
|
}
|
|
|
|
inline void loadu(const LLMatrix4& src)
|
|
{
|
|
mMatrix[0] = _mm_loadu_ps(src.mMatrix[0]);
|
|
mMatrix[1] = _mm_loadu_ps(src.mMatrix[1]);
|
|
mMatrix[2] = _mm_loadu_ps(src.mMatrix[2]);
|
|
mMatrix[3] = _mm_loadu_ps(src.mMatrix[3]);
|
|
}
|
|
|
|
inline void loadu(const F32* src)
|
|
{
|
|
mMatrix[0] = _mm_loadu_ps(src);
|
|
mMatrix[1] = _mm_loadu_ps(src+4);
|
|
mMatrix[2] = _mm_loadu_ps(src+8);
|
|
mMatrix[3] = _mm_loadu_ps(src+12);
|
|
}
|
|
|
|
inline void loadu(const LLMatrix3& src)
|
|
{
|
|
mMatrix[0].load3(src.mMatrix[0]);
|
|
mMatrix[1].load3(src.mMatrix[1]);
|
|
mMatrix[2].load3(src.mMatrix[2]);
|
|
mMatrix[3].set(0,0,0,1.f);
|
|
}
|
|
|
|
inline void add(const LLMatrix4a& rhs)
|
|
{
|
|
mMatrix[0].add(rhs.mMatrix[0]);
|
|
mMatrix[1].add(rhs.mMatrix[1]);
|
|
mMatrix[2].add(rhs.mMatrix[2]);
|
|
mMatrix[3].add(rhs.mMatrix[3]);
|
|
}
|
|
|
|
inline void setRows(const LLVector4a& r0, const LLVector4a& r1, const LLVector4a& r2)
|
|
{
|
|
mMatrix[0] = r0;
|
|
mMatrix[1] = r1;
|
|
mMatrix[2] = r2;
|
|
}
|
|
|
|
inline void setMul(const LLMatrix4a& m, const F32 s)
|
|
{
|
|
mMatrix[0].setMul(m.mMatrix[0], s);
|
|
mMatrix[1].setMul(m.mMatrix[1], s);
|
|
mMatrix[2].setMul(m.mMatrix[2], s);
|
|
mMatrix[3].setMul(m.mMatrix[3], s);
|
|
}
|
|
|
|
inline void setLerp(const LLMatrix4a& a, const LLMatrix4a& b, F32 w)
|
|
{
|
|
LLVector4a d0,d1,d2,d3;
|
|
d0.setSub(b.mMatrix[0], a.mMatrix[0]);
|
|
d1.setSub(b.mMatrix[1], a.mMatrix[1]);
|
|
d2.setSub(b.mMatrix[2], a.mMatrix[2]);
|
|
d3.setSub(b.mMatrix[3], a.mMatrix[3]);
|
|
|
|
// this = a + d*w
|
|
|
|
d0.mul(w);
|
|
d1.mul(w);
|
|
d2.mul(w);
|
|
d3.mul(w);
|
|
|
|
mMatrix[0].setAdd(a.mMatrix[0],d0);
|
|
mMatrix[1].setAdd(a.mMatrix[1],d1);
|
|
mMatrix[2].setAdd(a.mMatrix[2],d2);
|
|
mMatrix[3].setAdd(a.mMatrix[3],d3);
|
|
}
|
|
|
|
inline void rotate(const LLVector4a& v, LLVector4a& res) const
|
|
{
|
|
LLVector4a y,z;
|
|
|
|
res = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
|
|
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
|
|
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
|
res.mul(mMatrix[0]);
|
|
y.mul(mMatrix[1]);
|
|
z.mul(mMatrix[2]);
|
|
|
|
res.add(y);
|
|
res.add(z);
|
|
}
|
|
|
|
inline void affineTransformSSE(const LLVector4a& v, LLVector4a& res) const
|
|
{
|
|
LLVector4a x,y,z;
|
|
|
|
x = _mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, 0));
|
|
y = _mm_shuffle_ps(v, v, _MM_SHUFFLE(1, 1, 1, 1));
|
|
z = _mm_shuffle_ps(v, v, _MM_SHUFFLE(2, 2, 2, 2));
|
|
|
|
x.mul(mMatrix[0]);
|
|
y.mul(mMatrix[1]);
|
|
z.mul(mMatrix[2]);
|
|
|
|
x.add(y);
|
|
z.add(mMatrix[3]);
|
|
res.setAdd(x,z);
|
|
}
|
|
|
|
inline void affineTransformNonSSE(const LLVector4a& v, LLVector4a& res) const
|
|
{
|
|
F32 x = v[0] * mMatrix[0][0] + v[1] * mMatrix[1][0] + v[2] * mMatrix[2][0] + mMatrix[3][0];
|
|
F32 y = v[0] * mMatrix[0][1] + v[1] * mMatrix[1][1] + v[2] * mMatrix[2][1] + mMatrix[3][1];
|
|
F32 z = v[0] * mMatrix[0][2] + v[1] * mMatrix[1][2] + v[2] * mMatrix[2][2] + mMatrix[3][2];
|
|
F32 w = 1.0f;
|
|
res.set(x,y,z,w);
|
|
}
|
|
|
|
inline void affineTransform(const LLVector4a& v, LLVector4a& res) const
|
|
{
|
|
affineTransformSSE(v,res);
|
|
}
|
|
|
|
const LLVector4a& getTranslation() const { return mMatrix[3]; }
|
|
};
|
|
|
|
inline LLVector4a rowMul(const LLVector4a &row, const LLMatrix4a &mat)
|
|
{
|
|
LLVector4a result;
|
|
result = _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(0, 0, 0, 0)), mat.mMatrix[0]);
|
|
result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(1, 1, 1, 1)), mat.mMatrix[1]));
|
|
result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(2, 2, 2, 2)), mat.mMatrix[2]));
|
|
result = _mm_add_ps(result, _mm_mul_ps(_mm_shuffle_ps(row, row, _MM_SHUFFLE(3, 3, 3, 3)), mat.mMatrix[3]));
|
|
return result;
|
|
}
|
|
|
|
inline void matMul(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
|
|
{
|
|
LLVector4a row0 = rowMul(a.mMatrix[0], b);
|
|
LLVector4a row1 = rowMul(a.mMatrix[1], b);
|
|
LLVector4a row2 = rowMul(a.mMatrix[2], b);
|
|
LLVector4a row3 = rowMul(a.mMatrix[3], b);
|
|
|
|
res.mMatrix[0] = row0;
|
|
res.mMatrix[1] = row1;
|
|
res.mMatrix[2] = row2;
|
|
res.mMatrix[3] = row3;
|
|
}
|
|
|
|
//Faster version of matMul wehere res must not be a or b
|
|
inline void matMulUnsafe(const LLMatrix4a &a, const LLMatrix4a &b, LLMatrix4a &res)
|
|
{
|
|
res.mMatrix[0] = rowMul(a.mMatrix[0], b);
|
|
res.mMatrix[1] = rowMul(a.mMatrix[1], b);
|
|
res.mMatrix[2] = rowMul(a.mMatrix[2], b);
|
|
res.mMatrix[3] = rowMul(a.mMatrix[3], b);
|
|
}
|
|
|
|
inline std::ostream& operator<<(std::ostream& s, const LLMatrix4a& m)
|
|
{
|
|
s << "[" << m.mMatrix[0] << ", " << m.mMatrix[1] << ", " << m.mMatrix[2] << ", " << m.mMatrix[3] << "]";
|
|
return s;
|
|
}
|
|
|
|
void matMulBoundBox(const LLMatrix4a &a, const LLVector4a *in_extents, LLVector4a *out_extents);
|
|
|
|
#endif
|