Linux: Pull in Alchemy's vertex cache optimizer fix to smooth out triangle soup in certain hair/tails - Thank you!
parent
baa9c6d731
commit
79c9873cd0
|
|
@ -52,6 +52,12 @@
|
|||
#include "llmeshoptimizer.h"
|
||||
#include "lltimer.h"
|
||||
|
||||
// <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
#ifdef LL_LINUX
|
||||
#include "meshoptimizer/meshoptimizer.h"
|
||||
#endif
|
||||
// </FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
|
||||
#define DEBUG_SILHOUETTE_BINORMALS 0
|
||||
#define DEBUG_SILHOUETTE_NORMALS 0 // TomY: Use this to display normals using the silhouette
|
||||
#define DEBUG_SILHOUETTE_EDGE_MAP 0 // DaveP: Use this to display edge map using the silhouette
|
||||
|
|
@ -5432,6 +5438,204 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
// <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
#ifdef LL_LINUX
|
||||
|
||||
bool allocateVertices(LLVolumeFace* self, S32 num_verts)
|
||||
{
|
||||
bool copy = false;
|
||||
|
||||
if (!copy || !num_verts)
|
||||
{
|
||||
ll_aligned_free<64>(self->mPositions);
|
||||
self->mPositions = nullptr;
|
||||
self->mNormals = nullptr;
|
||||
self->mTexCoords = nullptr;
|
||||
}
|
||||
|
||||
if (num_verts)
|
||||
{
|
||||
const U32 new_vsize = num_verts * sizeof(LLVector4a);
|
||||
const U32 new_nsize = new_vsize;
|
||||
const U32 new_tcsize = (num_verts * sizeof(LLVector2) + 0xF) & ~0xF;
|
||||
const U32 new_size = new_vsize + new_nsize + new_tcsize;
|
||||
|
||||
//allocate new buffer space
|
||||
LLVector4a* old_buf = self->mPositions;
|
||||
self->mPositions = (LLVector4a*)ll_aligned_malloc<64>(new_size);
|
||||
if (!self->mPositions)
|
||||
{
|
||||
LL_WARNS("LLVOLUME") << "Allocation of positions vector[" << new_size << "] failed. " << LL_ENDL;
|
||||
return false;
|
||||
}
|
||||
self->mNormals = self->mPositions + num_verts;
|
||||
self->mTexCoords = (LLVector2*)(self->mNormals + num_verts);
|
||||
|
||||
if (copy && old_buf)
|
||||
{
|
||||
U32 verts_to_copy = std::min(self->mNumVertices, num_verts);
|
||||
if (verts_to_copy)
|
||||
{
|
||||
const U32 old_vsize = verts_to_copy * sizeof(LLVector4a);
|
||||
const U32 old_nsize = old_vsize;
|
||||
const U32 old_tcsize = (verts_to_copy * sizeof(LLVector2) + 0xF) & ~0xF;
|
||||
|
||||
LLVector4a::memcpyNonAliased16((F32*)self->mPositions, (F32*)old_buf, old_vsize);
|
||||
LLVector4a::memcpyNonAliased16((F32*)self->mNormals, (F32*)(old_buf + self->mNumVertices), old_nsize);
|
||||
LLVector4a::memcpyNonAliased16((F32*)self->mTexCoords, (F32*)(old_buf + self->mNumVertices * 2), old_tcsize);
|
||||
}
|
||||
ll_aligned_free<64>(old_buf);
|
||||
}
|
||||
}
|
||||
|
||||
self->mNumAllocatedVertices = num_verts;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LLVolumeFace::cacheOptimize()
|
||||
{
|
||||
llassert(!mOptimized);
|
||||
mOptimized = TRUE;
|
||||
|
||||
if (mNumVertices < 3 || mNumIndices < 3)
|
||||
{ //nothing to do
|
||||
return true;
|
||||
}
|
||||
|
||||
struct buffer_data_t {
|
||||
void** dst; // Double pointer to volume attribute data. Avoids fixup after reallocating buffers on resize.
|
||||
void* scratch; // Scratch buffer. Allocated with vert count from meshopt_generateVertexRemapMulti
|
||||
size_t stride; // Stride between continguous attributes
|
||||
};
|
||||
std::vector<meshopt_Stream> streams; // Contains data necessary for meshopt_generateVertexRemapMulti call
|
||||
std::vector<buffer_data_t> buffers; // Contains data necessary for meshopt_remapVertexBuffer calls.
|
||||
|
||||
{
|
||||
static struct { size_t offs; size_t size; size_t stride; } ref_streams[] = {
|
||||
{ (U64) &mPositions - (U64) this, sizeof(float) * 3, sizeof(mPositions[0]) },
|
||||
{ (U64) &mNormals - (U64) this, sizeof(float) * 3, sizeof(mNormals[0]) }, // Subsection of mPositions allocation
|
||||
{ (U64) &mTexCoords - (U64) this, sizeof(float) * 2, sizeof(mTexCoords[0]) }, // Subsection of mPositions allocation
|
||||
{ (U64) &mTangents - (U64) this, sizeof(float) * 3, sizeof(mTangents[0]) },
|
||||
{ (U64) &mWeights - (U64) this, sizeof(float) * 3, sizeof(mWeights[0]) },
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < sizeof(ref_streams) / sizeof(ref_streams[0]); ++i)
|
||||
{
|
||||
void** ptr = reinterpret_cast<void**>((char*)this + ref_streams[i].offs);
|
||||
if (*ptr)
|
||||
{
|
||||
streams.push_back({ *ptr, ref_streams[i].size, ref_streams[i].stride });
|
||||
buffers.push_back({ ptr, nullptr, ref_streams[i].stride });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<unsigned int> remap;
|
||||
try
|
||||
{
|
||||
remap.reserve(mNumIndices);
|
||||
}
|
||||
catch (const std::bad_alloc&)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t total_vertices = meshopt_generateVertexRemapMulti(remap.data(), mIndices, mNumIndices, mNumVertices, streams.data(), streams.size());
|
||||
meshopt_remapIndexBuffer(mIndices, mIndices, mNumIndices, remap.data());
|
||||
bool failed = false;
|
||||
for (auto& entry : buffers)
|
||||
{
|
||||
// Create scratch buffer for attribute data. Avoids extra allocs in meshopt_remapVertexBuffer calls
|
||||
void* buf_tmp = ll_aligned_malloc_16(entry.stride * total_vertices);
|
||||
if (!buf_tmp)
|
||||
{
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
entry.scratch = buf_tmp;
|
||||
// Write to scratch buffer
|
||||
meshopt_remapVertexBuffer(entry.scratch, *entry.dst, mNumVertices, entry.stride, remap.data());
|
||||
}
|
||||
|
||||
if (failed)
|
||||
{
|
||||
for (auto& entry : buffers)
|
||||
{
|
||||
// Release scratch buffer
|
||||
ll_aligned_free_16(entry.scratch);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mNumAllocatedVertices != total_vertices)
|
||||
{
|
||||
// New allocations will be transparently accessable through dereffing dest_buffers.
|
||||
if (!allocateVertices(this, total_vertices))
|
||||
{
|
||||
for (auto& entry : buffers)
|
||||
{
|
||||
// Release scratch buffer
|
||||
ll_aligned_free_16(entry.scratch);
|
||||
}
|
||||
allocateVertices(this, 0);
|
||||
allocateWeights(0);
|
||||
allocateTangents(0);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mWeights)
|
||||
{
|
||||
allocateWeights(total_vertices);
|
||||
if(!mWeights)
|
||||
{
|
||||
for (auto& entry : buffers)
|
||||
{
|
||||
// Release scratch buffer
|
||||
ll_aligned_free_16(entry.scratch);
|
||||
}
|
||||
allocateVertices(this, 0);
|
||||
allocateWeights(0);
|
||||
allocateTangents(0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (mTangents)
|
||||
{
|
||||
allocateTangents(total_vertices);
|
||||
if(!mTangents)
|
||||
{
|
||||
for (auto& entry : buffers)
|
||||
{
|
||||
// Release scratch buffer
|
||||
ll_aligned_free_16(entry.scratch);
|
||||
}
|
||||
allocateVertices(this, 0);
|
||||
allocateWeights(0);
|
||||
allocateTangents(0);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
meshopt_optimizeVertexCache(mIndices, mIndices, mNumIndices, total_vertices);
|
||||
//meshopt_optimizeOverdraw(mIndices, mIndices, mNumIndices, (float*)buffers[0].scratch, total_vertices, buffers[0].stride, 1.05f);
|
||||
meshopt_optimizeVertexFetchRemap(remap.data(), mIndices, mNumIndices, total_vertices);
|
||||
meshopt_remapIndexBuffer(mIndices, mIndices, mNumIndices, remap.data());
|
||||
for (auto& entry : buffers)
|
||||
{
|
||||
// Write to llvolume attribute buffer
|
||||
meshopt_remapVertexBuffer(*entry.dst, entry.scratch, total_vertices, entry.stride, remap.data());
|
||||
// Release scratch buffer
|
||||
ll_aligned_free_16(entry.scratch);
|
||||
}
|
||||
mNumVertices = total_vertices;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
// </FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
|
||||
bool LLVolumeFace::cacheOptimize()
|
||||
{ //optimize for vertex cache according to Forsyth method:
|
||||
|
|
@ -5447,7 +5651,7 @@ bool LLVolumeFace::cacheOptimize()
|
|||
// windows version.
|
||||
//
|
||||
|
||||
#ifndef LL_LINUX
|
||||
// #ifndef LL_LINUX // <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
LLVCacheLRU cache;
|
||||
|
||||
if (mNumVertices < 3 || mNumIndices < 3)
|
||||
|
|
@ -5691,10 +5895,11 @@ bool LLVolumeFace::cacheOptimize()
|
|||
|
||||
//std::string result = llformat("ACMR pre/post: %.3f/%.3f -- %d triangles %d breaks", pre_acmr, post_acmr, mNumIndices/3, breaks);
|
||||
//LL_INFOS() << result << LL_ENDL;
|
||||
#endif
|
||||
// #endif // <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif // <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
|
||||
void LLVolumeFace::createOctree(F32 scaler, const LLVector4a& center, const LLVector4a& size)
|
||||
{
|
||||
|
|
@ -6641,15 +6846,48 @@ void LLVolumeFace::pushVertex(const LLVector4a& pos, const LLVector4a& norm, con
|
|||
|
||||
void LLVolumeFace::allocateTangents(S32 num_verts)
|
||||
{
|
||||
// <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
#ifdef LL_LINUX
|
||||
ll_aligned_free_16(mTangents);
|
||||
mTangents = nullptr;
|
||||
if (num_verts)
|
||||
{
|
||||
mTangents = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
|
||||
if (!mTangents)
|
||||
{
|
||||
LL_WARNS("LLVOLUME") << "Allocation of binormals[" << sizeof(LLVector4a)*num_verts << "] failed" << LL_ENDL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
#else
|
||||
// </FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
ll_aligned_free_16(mTangents);
|
||||
mTangents = (LLVector4a*) ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
|
||||
#endif // <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
}
|
||||
|
||||
void LLVolumeFace::allocateWeights(S32 num_verts)
|
||||
{
|
||||
// <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
#ifdef LL_LINUX
|
||||
ll_aligned_free_16(mWeights);
|
||||
mWeights = nullptr;
|
||||
if (num_verts)
|
||||
{
|
||||
mWeights = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
|
||||
if (!mWeights)
|
||||
{
|
||||
LL_WARNS("LLVOLUME") << "Allocation of weights[" << sizeof(LLVector4a) * num_verts << "] failed" << LL_ENDL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
return;
|
||||
#else
|
||||
// </FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
ll_aligned_free_16(mWeights);
|
||||
mWeights = (LLVector4a*)ll_aligned_malloc_16(sizeof(LLVector4a)*num_verts);
|
||||
|
||||
#endif // <FS:Zi> Use Alchemy's vertex cache optimizer for Linux. Thank you!
|
||||
}
|
||||
|
||||
void LLVolumeFace::allocateJointIndices(S32 num_verts)
|
||||
|
|
|
|||
Loading…
Reference in New Issue