Dave Parks 2014-06-19 13:14:42 -05:00
commit d0428575fb
7 changed files with 159 additions and 38 deletions

View File

@ -87,6 +87,7 @@ LLShaderFeatures::LLShaderFeatures()
, mIndexedTextureChannels(0)
, disableTextureIndex(false)
, hasAlphaMask(false)
, attachNothing(false)
{
}
@ -119,28 +120,31 @@ struct LLGLSLShaderCompareTimeElapsed
};
//static
void LLGLSLShader::finishProfile()
void LLGLSLShader::finishProfile(bool emit_report)
{
sProfileEnabled = false;
std::vector<LLGLSLShader*> sorted;
for (std::set<LLGLSLShader*>::iterator iter = sInstances.begin(); iter != sInstances.end(); ++iter)
if (emit_report)
{
sorted.push_back(*iter);
}
std::vector<LLGLSLShader*> sorted;
std::sort(sorted.begin(), sorted.end(), LLGLSLShaderCompareTimeElapsed());
for (std::set<LLGLSLShader*>::iterator iter = sInstances.begin(); iter != sInstances.end(); ++iter)
{
sorted.push_back(*iter);
}
for (std::vector<LLGLSLShader*>::iterator iter = sorted.begin(); iter != sorted.end(); ++iter)
{
(*iter)->dumpStats();
}
std::sort(sorted.begin(), sorted.end(), LLGLSLShaderCompareTimeElapsed());
for (std::vector<LLGLSLShader*>::iterator iter = sorted.begin(); iter != sorted.end(); ++iter)
{
(*iter)->dumpStats();
}
LL_INFOS() << "-----------------------------------" << LL_ENDL;
LL_INFOS() << "Total rendering time: " << llformat("%.4f ms", sTotalTimeElapsed/1000000.f) << LL_ENDL;
LL_INFOS() << "Total samples drawn: " << llformat("%.4f million", sTotalSamplesDrawn/1000000.f) << LL_ENDL;
LL_INFOS() << "Total triangles drawn: " << llformat("%.3f million", sTotalTrianglesDrawn/1000000.f) << LL_ENDL;
}
}
void LLGLSLShader::clearStats()
@ -175,7 +179,7 @@ void LLGLSLShader::dumpStats()
}
}
LL_INFOS() << "=============================================" << LL_ENDL;
F32 ms = mTimeElapsed/1000000.f;
F32 seconds = ms/1000.f;
@ -221,6 +225,7 @@ void LLGLSLShader::placeProfileQuery()
#if !LL_DARWIN
if (mTimerQuery == 0)
{
glGenQueriesARB(1, &mSamplesQuery);
glGenQueriesARB(1, &mTimerQuery);
}
@ -257,7 +262,7 @@ void LLGLSLShader::placeProfileQuery()
}
glBeginQueryARB(GL_SAMPLES_PASSED, 1);
glBeginQueryARB(GL_SAMPLES_PASSED, mSamplesQuery);
glBeginQueryARB(GL_TIME_ELAPSED, mTimerQuery);
#endif
}
@ -272,7 +277,7 @@ void LLGLSLShader::readProfileQuery(U32 count, U32 mode)
glGetQueryObjectui64v(mTimerQuery, GL_QUERY_RESULT, &time_elapsed);
U64 samples_passed = 0;
glGetQueryObjectui64v(1, GL_QUERY_RESULT, &samples_passed);
glGetQueryObjectui64v(mSamplesQuery, GL_QUERY_RESULT, &samples_passed);
sTotalTimeElapsed += time_elapsed;
mTimeElapsed += time_elapsed;
@ -307,14 +312,15 @@ LLGLSLShader::LLGLSLShader()
mShaderLevel(0),
mShaderGroup(SG_DEFAULT),
mUniformsDirty(FALSE),
mTimerQuery(0)
mTimerQuery(0),
mSamplesQuery(0)
{
}
LLGLSLShader::~LLGLSLShader()
{
}
void LLGLSLShader::unload()
@ -349,6 +355,18 @@ void LLGLSLShader::unload()
mProgramObject = 0;
}
if (mTimerQuery)
{
glDeleteQueriesARB(1, &mTimerQuery);
mTimerQuery = 0;
}
if (mSamplesQuery)
{
glDeleteQueriesARB(1, &mSamplesQuery);
mSamplesQuery = 0;
}
//hack to make apple not complain
glGetError();

View File

@ -51,6 +51,7 @@ public:
S32 mIndexedTextureChannels;
bool disableTextureIndex;
bool hasAlphaMask;
bool attachNothing;
// char numLights;
@ -80,7 +81,7 @@ public:
static bool sNoFixedFunction;
static void initProfile();
static void finishProfile();
static void finishProfile(bool emit_report = true);
static void startProfile();
static void stopProfile(U32 count, U32 mode);
@ -184,6 +185,7 @@ public:
//statistcis for profiling shader performance
U32 mTimerQuery;
U32 mSamplesQuery;
U64 mTimeElapsed;
static U64 sTotalTimeElapsed;
U32 mTrianglesDrawn;

View File

@ -388,6 +388,7 @@ void LLRenderTarget::release()
//
if (mFBO && (mTex.size() > 1))
{
glBindFramebuffer(GL_FRAMEBUFFER, mFBO);
S32 z;
for (z = mTex.size() - 1; z >= 1; z--)
{

View File

@ -73,7 +73,11 @@ BOOL LLShaderMgr::attachShaderFeatures(LLGLSLShader * shader)
{
llassert_always(shader != NULL);
LLShaderFeatures *features = & shader->mFeatures;
if (features->attachNothing)
{
return TRUE;
}
//////////////////////////////////////
// Attach Vertex Shader Features First
//////////////////////////////////////

View File

@ -417,13 +417,67 @@ bool LLFeatureManager::parseFeatureTable(std::string filename)
return parse_ok;
}
F32 gpu_benchmark();
bool LLFeatureManager::loadGPUClass()
{
// defaults
mGPUClass = GPU_CLASS_UNKNOWN;
mGPUString = gGLManager.getRawGLString();
mGPUSupported = FALSE;
//get memory bandwidth from benchmark
F32 gbps = gpu_benchmark();
if (gbps < 0.f)
{ //couldn't bench, use GLVersion
if (gGLManager.mGLVersion < 2.f)
{
mGPUClass = GPU_CLASS_0;
}
else if (gGLManager.mGLVersion < 3.f)
{
mGPUClass = GPU_CLASS_1;
}
else if (gGLManager.mGLVersion < 3.3f)
{
mGPUClass = GPU_CLASS_2;
}
else if (gGLManager.mGLVersion < 4.f)
{
mGPUClass = GPU_CLASS_3;
}
else
{
mGPUClass = GPU_CLASS_4;
}
}
else if (gbps < 5.f)
{
mGPUClass = GPU_CLASS_0;
}
else if (gbps < 10.f)
{
mGPUClass = GPU_CLASS_1;
}
else if (gbps < 20.f)
{
mGPUClass = GPU_CLASS_2;
}
else if (gbps < 40.f)
{
mGPUClass = GPU_CLASS_3;
}
else if (gbps < 80.f)
{
mGPUClass = GPU_CLASS_4;
}
else
{
mGPUClass = GPU_CLASS_5;
}
// defaults
mGPUString = gGLManager.getRawGLString();
mGPUSupported = TRUE;
#if 0
// first table is in the app dir
std::string app_path = gDirUtilp->getAppRODataDir();
app_path += gDirUtilp->getDirDelimiter();
@ -451,8 +505,8 @@ bool LLFeatureManager::loadGPUClass()
{
parse_ok = parseGPUTable(app_path);
}
return parse_ok; // indicates that the file parsed correctly, not that the gpu was recognized
#endif
return true; // indicates that the file parsed correctly, not that the gpu was recognized
}
@ -730,6 +784,7 @@ void LLFeatureManager::init()
void LLFeatureManager::applyRecommendedSettings()
{
loadGPUClass();
// apply saved settings
// cap the level at 2 (high)
U32 level = llmax(GPU_CLASS_0, llmin(mGPUClass, GPU_CLASS_5));

View File

@ -879,13 +879,32 @@ void LLViewerObjectList::renderObjectBeacons()
}
void gpu_benchmark()
F32 gpu_benchmark()
{
if (!LLGLSLShader::sNoFixedFunction)
if (!gGLManager.mHasShaderObjects)
{ //don't bother benchmarking the fixed function
return;
return -1.f;
}
if (gBenchmarkProgram.mProgramObject == 0)
{
LLViewerShaderMgr::instance()->initAttribsAndUniforms();
gBenchmarkProgram.mName = "Benchmark Shader";
gBenchmarkProgram.mFeatures.attachNothing = true;
gBenchmarkProgram.mShaderFiles.clear();
gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkV.glsl", GL_VERTEX_SHADER_ARB));
gBenchmarkProgram.mShaderFiles.push_back(std::make_pair("interface/benchmarkF.glsl", GL_FRAGMENT_SHADER_ARB));
gBenchmarkProgram.mShaderLevel = 1;
if (!gBenchmarkProgram.createShader(NULL, NULL))
{
return -1.f;
}
}
LLGLDisable blend(GL_BLEND);
//measure memory bandwidth by:
// - allocating a batch of textures and render targets
// - rendering those textures to those render targets
@ -951,6 +970,8 @@ void gpu_benchmark()
//wait for any previoius GL commands to finish
glFinish();
bool busted_finish = false;
for (S32 c = -1; c < samples; ++c)
{
LLTimer timer;
@ -965,7 +986,18 @@ void gpu_benchmark()
}
//wait for current batch of copies to finish
glFinish();
if (busted_finish)
{
//read a pixel off the last target since some drivers seem to ignore glFinish
dest[count-1].bindTarget();
U32 pixel = 0;
glReadPixels(0,0,1,1,GL_RGBA, GL_UNSIGNED_BYTE, &pixel);
dest[count-1].flush();
}
else
{
glFinish();
}
F32 time = timer.getElapsedTimeF32();
@ -976,13 +1008,20 @@ void gpu_benchmark()
F32 gbps = gb/time;
results.push_back(gbps);
if (!gGLManager.mHasTimerQuery && !busted_finish && gbps > 128.f)
{ //unrealistically high bandwidth for a card without timer queries, glFinish is probably ignored
busted_finish = true;
}
else
{
results.push_back(gbps);
}
}
}
gBenchmarkProgram.unbind();
LLGLSLShader::finishProfile();
LLGLSLShader::finishProfile(false);
LLImageGL::deleteTextures(count, source);
@ -993,20 +1032,22 @@ void gpu_benchmark()
LL_INFOS() << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to CPU timers" << LL_ENDL;
F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f;
F32 seconds = ms/1000.f;
F64 samples_drawn = res*res*count*samples;
F32 samples_sec = (samples_drawn/1000000000.0)/seconds;
gbps = samples_sec*8;
if (gGLManager.mHasTimerQuery)
{
F32 ms = gBenchmarkProgram.mTimeElapsed/1000000.f;
F32 seconds = ms/1000.f;
F64 samples_drawn = res*res*count*samples;
F32 samples_sec = (samples_drawn/1000000000.0)/seconds;
gbps = samples_sec*8;
LL_INFOS() << "Memory bandwidth is " << llformat("%.3f", gbps) << "GB/sec according to ARB_timer_query" << LL_ENDL;
}
else
{
LL_INFOS() << "ARB_timer_query unavailable." << LL_ENDL;
}
return gbps;
}

View File

@ -7187,7 +7187,7 @@ class LLAdvancedClickRenderProfile: public view_listener_t
}
};
void gpu_benchmark();
F32 gpu_benchmark();
class LLAdvancedClickRenderBenchmark: public view_listener_t
{