SH-2789 WIP - various fixes to force 16-byte alignment

master
Brad Payne (Vir Linden) 2011-12-19 18:17:18 -05:00
parent e62ad2bf13
commit 9c2e0d84f8
14 changed files with 151 additions and 54 deletions

View File

@ -61,6 +61,15 @@ BOOL LLMemory::sEnableMemoryFailurePrevention = FALSE;
LLPrivateMemoryPoolManager::mem_allocation_info_t LLPrivateMemoryPoolManager::sMemAllocationTracker;
#endif
void ll_assert_aligned_func(uintptr_t ptr,U32 alignment)
{
if (ptr%alignment!=0)
{
llwarns << "alignment check failed" << llendl;
}
llassert(ptr%alignment==0);
}
//static
void LLMemory::initClass()
{

View File

@ -27,7 +27,7 @@
#define LLMEMORY_H
#include "llmemtype.h"
#if LL_DEBUG
#if !LL_USE_TCMALLOC
inline void* ll_aligned_malloc( size_t size, int align )
{
void* mem = malloc( size + (align - 1) + sizeof(void*) );
@ -94,7 +94,8 @@ inline void ll_aligned_free_32(void *p)
free(p); // posix_memalign() is compatible with heap deallocator
#endif
}
#else // LL_DEBUG
#else // USE_TCMALLOC
// ll_aligned_foo are noops now that we use tcmalloc everywhere (tcmalloc aligns automatically at appropriate intervals)
#define ll_aligned_malloc( size, align ) malloc(size)
#define ll_aligned_free( ptr ) free(ptr)
@ -514,8 +515,10 @@ void LLPrivateMemoryPoolTester::operator delete[](void* addr)
#define CHECK_ALIGNMENT
LL_COMMON_API void ll_assert_aligned_func(uintptr_t ptr,U32 alignment);
#ifdef CHECK_ALIGNMENT
#define ll_assert_aligned(ptr,alignment) llassert((reinterpret_cast<uintptr_t>(ptr))%(alignment) == 0)
#define ll_assert_aligned(ptr,alignment) ll_assert_aligned_func(reinterpret_cast<uintptr_t>(ptr),((U32)alignment))
#else
#define ll_assert_aligned(ptr,alignment)
#endif

View File

@ -88,7 +88,7 @@ public:
typedef LLOctreeNode<T> oct_node;
typedef LLOctreeListener<T> oct_listener;
/*void* operator new(size_t size)
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
@ -96,7 +96,7 @@ public:
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}*/
}
LLOctreeNode( const LLVector4a& center,
const LLVector4a& size,

7
indra/llmath/llvector4a.cpp Normal file → Executable file
View File

@ -41,11 +41,15 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
/*static */void LLVector4a::memcpyNonAliased16(F32* __restrict dst, const F32* __restrict src, size_t bytes)
{
memcpy((void*)dst,(const void*)src,bytes);
#if 0
assert(src != NULL);
assert(dst != NULL);
assert(bytes > 0);
assert((bytes % sizeof(F32))== 0);
ll_assert_aligned(src,16);
ll_assert_aligned(dst,16);
F32* end = dst + (bytes / sizeof(F32) );
if (bytes > 64)
@ -87,6 +91,7 @@ extern const LLVector4a LL_V4A_EPSILON = reinterpret_cast<const LLVector4a&> ( F
dst += 4;
src += 4;
}
#endif
}
void LLVector4a::setRotated( const LLRotation& rot, const LLVector4a& vec )

14
indra/llmath/llvolumeoctree.h Normal file → Executable file
View File

@ -73,6 +73,16 @@ class LLVolumeOctreeListener : public LLOctreeListener<LLVolumeTriangle>
{
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
LLVolumeOctreeListener(LLOctreeNode<LLVolumeTriangle>* node);
~LLVolumeOctreeListener();
@ -99,8 +109,8 @@ public:
public:
LLVector4a mBounds[2]; // bounding box (center, size) of this node and all its children (tight fit to objects)
LLVector4a mExtents[2]; // extents (min, max) of this node and all its children
LL_ALIGN_16(LLVector4a mBounds[2]); // bounding box (center, size) of this node and all its children (tight fit to objects)
LL_ALIGN_16(LLVector4a mExtents[2]); // extents (min, max) of this node and all its children
};
class LLOctreeTriangleRayIntersect : public LLOctreeTraveler<LLVolumeTriangle>

View File

@ -34,10 +34,21 @@
#include "../llsimdmath.h"
#include "../llvector4a.h"
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void *p)
{
ll_aligned_free_16(p);
}
namespace tut
{
#define is_aligned(ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr))%(alignment)==0)
#define is_aligned_relative(ptr,base_ptr,alignment) ((reinterpret_cast<uintptr_t>(ptr)-reinterpret_cast<uintptr_t>(base_ptr))%(alignment)==0)
struct alignment_test {};
@ -51,38 +62,40 @@ class MyVector4a
LLQuad mQ;
} LL_ALIGN_POSTFIX(16);
LL_ALIGN_PREFIX(64)
class MyBigBlob
// Verify that aligned allocators perform as advertised.
template<> template<>
void alignment_test_object_t::test<1>()
{
public:
~MyBigBlob() {}
private:
LLQuad mQ[4];
} LL_ALIGN_POSTFIX(64);
const int num_tests = 7;
void *align_ptr;
for (int i=0; i<num_tests; i++)
{
align_ptr = ll_aligned_malloc_16(sizeof(MyVector4a));
ensure("ll_aligned_malloc_16 failed", is_aligned(align_ptr,16));
ll_aligned_free_16(align_ptr);
align_ptr = ll_aligned_malloc_32(sizeof(MyVector4a));
ensure("ll_aligned_malloc_32 failed", is_aligned(align_ptr,32));
ll_aligned_free_32(align_ptr);
}
}
// In-place allocation of objects and arrays.
template<> template<>
void alignment_test_object_t::test<1>()
void alignment_test_object_t::test<2>()
{
ensure("LLAlignment reality is broken: ", (1==1));
MyVector4a vec1;
ensure("LLAlignment vec1 unaligned", is_aligned(&vec1,16));
MyBigBlob bb1;
ensure("LLAlignment bb1 unaligned", is_aligned(&bb1,64));
MyVector4a veca[12];
ensure("LLAlignment veca unaligned", is_aligned(veca,16));
MyBigBlob bba[12];
ensure("LLAlignment bba unaligned", is_aligned(bba,64));
}
// Heap allocation of objects and arrays.
template<> template<>
void alignment_test_object_t::test<2>()
void alignment_test_object_t::test<3>()
{
const int ARR_SIZE = 7;
for(int i=0; i<ARR_SIZE; i++)
@ -93,35 +106,12 @@ void alignment_test_object_t::test<2>()
}
MyVector4a *veca = new MyVector4a[ARR_SIZE];
ensure("LLAligment veca base", is_aligned(veca,16));
for(int i=0; i<ARR_SIZE; i++)
{
ensure("LLAlignment veca unaligned", is_aligned(&veca[i],16));
std::cout << "veca[" << i << "]" << std::endl;
ensure("LLAlignment veca member unaligned", is_aligned(&veca[i],16));
}
for(int i=0; i<ARR_SIZE; i++)
{
void *aligned_addr = _aligned_malloc(sizeof(MyBigBlob),64);
MyBigBlob *bbp = new(aligned_addr) MyBigBlob;
ensure("LLAlignment bbp unaligned", is_aligned(bbp,64));
bbp->~MyBigBlob();
_aligned_free(aligned_addr);
}
ensure("LLAlignment big blob size",sizeof(MyBigBlob)==64);
void *aligned_addr = _aligned_malloc(ARR_SIZE*sizeof(MyBigBlob),64);
MyBigBlob *bba = new(aligned_addr) MyBigBlob[ARR_SIZE];
std::cout << "aligned_addr " << aligned_addr << std::endl;
std::cout << "bba " << bba << std::endl;
for(int i=0; i<ARR_SIZE; i++)
{
std::cout << "bba test " << i << std::endl;
ensure("LLAlignment bba unaligned", is_aligned(&bba[i],64));
}
for(int i=0; i<ARR_SIZE; i++)
{
bba[i].~MyBigBlob();
}
_aligned_free(aligned_addr);
}
}

View File

@ -76,6 +76,16 @@ public:
static void initClass();
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
LLDrawable() { init(); }
MEM_TYPE_NEW(LLMemType::MTYPE_DRAWABLE);

12
indra/newview/lldynamictexture.h Normal file → Executable file
View File

@ -36,6 +36,16 @@
class LLViewerDynamicTexture : public LLViewerTexture
{
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
enum
{
LL_VIEWER_DYNAMIC_TEXTURE = LLViewerTexture::DYNAMIC_TEXTURE,
@ -85,7 +95,7 @@ protected:
protected:
BOOL mClamp;
LLCoordGL mOrigin;
LLCamera mCamera;
LL_ALIGN_16(LLCamera mCamera);
typedef std::set<LLViewerDynamicTexture*> instance_list_t;
static instance_list_t sInstances[ LLViewerDynamicTexture::ORDER_COUNT ];

11
indra/newview/llface.h Normal file → Executable file
View File

@ -59,6 +59,17 @@ class LLFace
{
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
LLFace(const LLFace& rhs)
{
*this = rhs;

View File

@ -1196,7 +1196,7 @@ LLSpatialGroup::LLSpatialGroup(OctreeNode* node, LLSpatialPartition* part) :
mCurUpdatingSlotp(NULL),
mCurUpdatingTexture (NULL)
{
ll_assert_aligned(this,64);
ll_assert_aligned(this,16);
sNodeCount++;
LLMemType mt(LLMemType::MTYPE_SPACE_PARTITION);

View File

@ -68,6 +68,16 @@ protected:
~LLDrawInfo();
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
LLDrawInfo(const LLDrawInfo& rhs)
{
@ -106,7 +116,7 @@ public:
F32 mPartSize;
F32 mVSize;
LLSpatialGroup* mGroup;
LLFace* mFace; //associated face
LL_ALIGN_16(LLFace* mFace); //associated face
F32 mDistance;
U32 mDrawMode;
@ -181,7 +191,7 @@ public:
};
};
LL_ALIGN_PREFIX(64)
LL_ALIGN_PREFIX(16)
class LLSpatialGroup : public LLOctreeListener<LLDrawable>
{
friend class LLSpatialPartition;
@ -193,6 +203,16 @@ public:
*this = rhs;
}
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
const LLSpatialGroup& operator=(const LLSpatialGroup& rhs)
{
llerrs << "Illegal operation!" << llendl;

View File

@ -55,6 +55,15 @@ LL_ALIGN_PREFIX(16)
class LLViewerCamera : public LLCamera, public LLSingleton<LLViewerCamera>
{
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
typedef enum
{

View File

@ -90,6 +90,16 @@ protected:
**/
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
LLVOAvatar(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp);
virtual void markDead();
static void initClass(); // Initialize data that's only init'd once per class.

10
indra/newview/llvoavatarself.h Normal file → Executable file
View File

@ -48,6 +48,16 @@ class LLVOAvatarSelf :
**/
public:
void* operator new(size_t size)
{
return ll_aligned_malloc_16(size);
}
void operator delete(void* ptr)
{
ll_aligned_free_16(ptr);
}
LLVOAvatarSelf(const LLUUID &id, const LLPCode pcode, LLViewerRegion *regionp);
virtual ~LLVOAvatarSelf();
virtual void markDead();