/*-------------------------------------------------------------------------
* drawElements Quality Program OpenGL ES 3.0 Module
* -------------------------------------------------
*
* Copyright 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief Flush and finish tests.
*//*--------------------------------------------------------------------*/
#include "es3fFlushFinishTests.hpp"
#include "gluRenderContext.hpp"
#include "gluObjectWrapper.hpp"
#include "gluShaderProgram.hpp"
#include "gluDrawUtil.hpp"
#include "glsCalibration.hpp"
#include "tcuTestLog.hpp"
#include "tcuRenderTarget.hpp"
#include "tcuCPUWarmup.hpp"
#include "glwEnums.hpp"
#include "glwFunctions.hpp"
#include "deRandom.hpp"
#include "deStringUtil.hpp"
#include "deClock.h"
#include "deThread.h"
#include "deMath.h"
#include <algorithm>
namespace deqp
{
namespace gles3
{
namespace Functional
{
using std::vector;
using std::string;
using tcu::TestLog;
using tcu::Vec2;
using deqp::gls::theilSenLinearRegression;
using deqp::gls::LineParameters;
namespace
{
enum
{
MAX_VIEWPORT_SIZE = 256,
MAX_SAMPLE_DURATION_US = 150*1000,
WAIT_TIME_MS = 200,
MIN_DRAW_CALL_COUNT = 10,
MAX_DRAW_CALL_COUNT = 1<<20,
MAX_SHADER_ITER_COUNT = 1<<10,
NUM_SAMPLES = 50,
NUM_VERIFICATION_SAMPLES = 3,
MAX_CALIBRATION_ATTEMPTS = 5
};
DE_STATIC_ASSERT(MAX_SAMPLE_DURATION_US < 1000*WAIT_TIME_MS);
const float NO_CORR_COEF_THRESHOLD = 0.1f;
const float FLUSH_COEF_THRESHOLD = 0.2f;
const float CORRELATED_COEF_THRESHOLD = 0.5f;
const float CALIBRATION_VERIFICATION_THRESHOLD = 0.10f; // Rendering time needs to be within 10% of MAX_SAMPLE_DURATION_US
static void busyWait (int milliseconds)
{
const deUint64 startTime = deGetMicroseconds();
float v = 2.0f;
for (;;)
{
for (int i = 0; i < 10; i++)
v = deFloatSin(v);
if (deGetMicroseconds()-startTime >= deUint64(1000*milliseconds))
break;
}
}
class CalibrationFailedException : public std::runtime_error
{
public:
CalibrationFailedException (const std::string& reason) : std::runtime_error(reason) {}
};
class FlushFinishCase : public TestCase
{
public:
enum ExpectedBehavior
{
EXPECT_COEF_LESS_THAN = 0,
EXPECT_COEF_GREATER_THAN,
};
FlushFinishCase (Context& context,
const char* name,
const char* description,
ExpectedBehavior waitBehavior,
float waitThreshold,
ExpectedBehavior readBehavior,
float readThreshold);
~FlushFinishCase (void);
void init (void);
void deinit (void);
IterateResult iterate (void);
struct Sample
{
int numDrawCalls;
deUint64 submitTime;
deUint64 waitTime;
deUint64 readPixelsTime;
};
struct CalibrationParams
{
int numItersInShader;
int maxDrawCalls;
};
protected:
virtual void waitForGL (void) = 0;
private:
FlushFinishCase (const FlushFinishCase&);
FlushFinishCase& operator= (const FlushFinishCase&);
CalibrationParams calibrate (void);
void verifyCalibration (const CalibrationParams& params);
void analyzeResults (const std::vector<Sample>& samples, const CalibrationParams& calibrationParams);
void setupRenderState (void);
void setShaderIterCount (int numIters);
void render (int numDrawCalls);
void readPixels (void);
const ExpectedBehavior m_waitBehavior;
const float m_waitThreshold;
const ExpectedBehavior m_readBehavior;
const float m_readThreshold;
glu::ShaderProgram* m_program;
int m_iterCountLoc;
};
FlushFinishCase::FlushFinishCase (Context& context, const char* name, const char* description, ExpectedBehavior waitBehavior, float waitThreshold, ExpectedBehavior readBehavior, float readThreshold)
: TestCase (context, name, description)
, m_waitBehavior (waitBehavior)
, m_waitThreshold (waitThreshold)
, m_readBehavior (readBehavior)
, m_readThreshold (readThreshold)
, m_program (DE_NULL)
, m_iterCountLoc (0)
{
}
FlushFinishCase::~FlushFinishCase (void)
{
FlushFinishCase::deinit();
}
void FlushFinishCase::init (void)
{
DE_ASSERT(!m_program);
m_program = new glu::ShaderProgram(m_context.getRenderContext(),
glu::ProgramSources()
<< glu::VertexSource(
"#version 300 es\n"
"in highp vec4 a_position;\n"
"out highp vec4 v_coord;\n"
"void main (void)\n"
"{\n"
" gl_Position = a_position;\n"
" v_coord = a_position;\n"
"}\n")
<< glu::FragmentSource(
"#version 300 es\n"
"uniform highp int u_numIters;\n"
"in highp vec4 v_coord;\n"
"out mediump vec4 o_color;\n"
"void main (void)\n"
"{\n"
" highp vec4 color = v_coord;\n"
" for (int i = 0; i < u_numIters; i++)\n"
" color = sin(color);\n"
" o_color = color;\n"
"}\n"));
if (!m_program->isOk())
{
m_testCtx.getLog() << *m_program;
delete m_program;
m_program = DE_NULL;
TCU_FAIL("Compile failed");
}
m_iterCountLoc = m_context.getRenderContext().getFunctions().getUniformLocation(m_program->getProgram(), "u_numIters");
TCU_CHECK(m_iterCountLoc >= 0);
}
void FlushFinishCase::deinit (void)
{
delete m_program;
m_program = DE_NULL;
}
tcu::TestLog& operator<< (tcu::TestLog& log, const FlushFinishCase::Sample& sample)
{
log << TestLog::Message << sample.numDrawCalls << " calls:\t" << sample.submitTime << " us submit,\t" << sample.waitTime << " us wait,\t" << sample.readPixelsTime << " us read" << TestLog::EndMessage;
return log;
}
void FlushFinishCase::setupRenderState (void)
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
const int posLoc = gl.getAttribLocation(m_program->getProgram(), "a_position");
const int viewportW = de::min<int>(m_context.getRenderTarget().getWidth(), MAX_VIEWPORT_SIZE);
const int viewportH = de::min<int>(m_context.getRenderTarget().getHeight(), MAX_VIEWPORT_SIZE);
static const float s_positions[] =
{
-1.0f, -1.0f,
+1.0f, -1.0f,
-1.0f, +1.0f,
+1.0f, +1.0f
};
TCU_CHECK(posLoc >= 0);
gl.viewport(0, 0, viewportW, viewportH);
gl.useProgram(m_program->getProgram());
gl.enableVertexAttribArray(posLoc);
gl.vertexAttribPointer(posLoc, 2, GL_FLOAT, GL_FALSE, 0, &s_positions[0]);
gl.enable(GL_BLEND);
gl.blendFunc(GL_ONE, GL_ONE);
gl.blendEquation(GL_FUNC_ADD);
GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to set up render state");
}
void FlushFinishCase::setShaderIterCount (int numIters)
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
gl.uniform1i(m_iterCountLoc, numIters);
}
void FlushFinishCase::render (int numDrawCalls)
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
const deUint8 indices[] = { 0, 1, 2, 2, 1, 3 };
gl.clear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT|GL_STENCIL_BUFFER_BIT);
for (int ndx = 0; ndx < numDrawCalls; ndx++)
gl.drawElements(GL_TRIANGLES, DE_LENGTH_OF_ARRAY(indices), GL_UNSIGNED_BYTE, &indices[0]);
}
void FlushFinishCase::readPixels (void)
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
deUint8 tmp[4];
gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, &tmp);
}
FlushFinishCase::CalibrationParams FlushFinishCase::calibrate (void)
{
tcu::ScopedLogSection section (m_testCtx.getLog(), "CalibrationInfo", "Calibration info");
CalibrationParams params;
// Step 1: find iteration count that results in rougly 1/10th of target maximum sample duration.
{
const deUint64 targetDurationUs = MAX_SAMPLE_DURATION_US/100;
deUint64 prevDuration = 0;
int prevIterCount = 1;
int curIterCount = 1;
m_testCtx.getLog() << TestLog::Message << "Calibrating shader iteration count, target duration = " << targetDurationUs << " us" << TestLog::EndMessage;
for (;;)
{
deUint64 curDuration;
setShaderIterCount(curIterCount);
render(1); // \note Submit time is ignored
{
const deUint64 startTime = deGetMicroseconds();
readPixels();
curDuration = deGetMicroseconds()-startTime;
}
m_testCtx.getLog() << TestLog::Message << "Duration with " << curIterCount << " iterations = " << curDuration << " us" << TestLog::EndMessage;
if (curDuration > targetDurationUs)
{
if (curIterCount > 1)
{
// Compute final count by using linear estimation.
const float a = float(curDuration - prevDuration) / float(curIterCount - prevIterCount);
const float b = float(prevDuration) - a*float(prevIterCount);
const float est = (float(targetDurationUs) - b) / a;
curIterCount = de::clamp(deFloorFloatToInt32(est), 1, int(MAX_SHADER_ITER_COUNT));
}
// else: Settle on 1.
break;
}
else if (curIterCount >= MAX_SHADER_ITER_COUNT)
break; // Settle on maximum.
else
{
prevIterCount = curIterCount;
prevDuration = curDuration;
curIterCount = curIterCount*2;
}
}
params.numItersInShader = curIterCount;
m_testCtx.getLog() << TestLog::Integer("ShaderIterCount", "Shader iteration count", "", QP_KEY_TAG_NONE, params.numItersInShader);
}
// Step 2: Find draw call count that results in desired maximum time.
{
deUint64 prevDuration = 0;
int prevDrawCount = 1;
int curDrawCount = 1;
m_testCtx.getLog() << TestLog::Message << "Calibrating maximum draw call count, target duration = " << int(MAX_SAMPLE_DURATION_US) << " us" << TestLog::EndMessage;
setShaderIterCount(params.numItersInShader);
for (;;)
{
deUint64 curDuration;
render(curDrawCount); // \note Submit time is ignored
{
const deUint64 startTime = deGetMicroseconds();
readPixels();
curDuration = deGetMicroseconds()-startTime;
}
m_testCtx.getLog() << TestLog::Message << "Duration with " << curDrawCount << " draw calls = " << curDuration << " us" << TestLog::EndMessage;
if (curDuration > MAX_SAMPLE_DURATION_US)
{
if (curDrawCount > 1)
{
// Compute final count by using linear estimation.
const float a = float(curDuration - prevDuration) / float(curDrawCount - prevDrawCount);
const float b = float(prevDuration) - a*float(prevDrawCount);
const float est = (float(MAX_SAMPLE_DURATION_US) - b) / a;
curDrawCount = de::clamp(deFloorFloatToInt32(est), 1, int(MAX_DRAW_CALL_COUNT));
}
// else: Settle on 1.
break;
}
else if (curDrawCount >= MAX_DRAW_CALL_COUNT)
break; // Settle on maximum.
else
{
prevDrawCount = curDrawCount;
prevDuration = curDuration;
curDrawCount = curDrawCount*2;
}
}
params.maxDrawCalls = curDrawCount;
m_testCtx.getLog() << TestLog::Integer("MaxDrawCalls", "Maximum number of draw calls", "", QP_KEY_TAG_NONE, params.maxDrawCalls);
}
// Sanity check.
if (params.maxDrawCalls < MIN_DRAW_CALL_COUNT)
throw CalibrationFailedException("Calibration failed, maximum draw call count is too low");
return params;
}
void FlushFinishCase::verifyCalibration (const CalibrationParams& params)
{
setShaderIterCount(params.numItersInShader);
for (int sampleNdx = 0; sampleNdx < NUM_VERIFICATION_SAMPLES; sampleNdx++)
{
deUint64 readStartTime;
render(params.maxDrawCalls);
readStartTime = deGetMicroseconds();
readPixels();
{
const deUint64 renderDuration = deGetMicroseconds()-readStartTime;
const float relativeDelta = float(double(renderDuration) / double(MAX_SAMPLE_DURATION_US)) - 1.0f;
if (!de::inBounds(relativeDelta, -CALIBRATION_VERIFICATION_THRESHOLD, CALIBRATION_VERIFICATION_THRESHOLD))
{
std::ostringstream msg;
msg << "ERROR: Unstable performance, got " << renderDuration << " us read time, "
<< de::floatToString(relativeDelta*100.0f, 1) << "% diff to estimated " << (int)MAX_SAMPLE_DURATION_US << " us";
throw CalibrationFailedException(msg.str());
}
}
}
}
struct CompareSampleDrawCount
{
bool operator() (const FlushFinishCase::Sample& a, const FlushFinishCase::Sample& b) const { return a.numDrawCalls < b.numDrawCalls; }
};
std::vector<Vec2> getPointsFromSamples (const std::vector<FlushFinishCase::Sample>& samples, const deUint64 FlushFinishCase::Sample::*field)
{
vector<Vec2> points(samples.size());
for (size_t ndx = 0; ndx < samples.size(); ndx++)
points[ndx] = Vec2(float(samples[ndx].numDrawCalls), float(samples[ndx].*field));
return points;
}
template<typename T>
T getMaximumValue (const std::vector<FlushFinishCase::Sample>& samples, const T FlushFinishCase::Sample::*field)
{
DE_ASSERT(!samples.empty());
T maxVal = samples[0].*field;
for (size_t ndx = 1; ndx < samples.size(); ndx++)
maxVal = de::max(maxVal, samples[ndx].*field);
return maxVal;
}
void FlushFinishCase::analyzeResults (const std::vector<Sample>& samples, const CalibrationParams& calibrationParams)
{
const vector<Vec2> waitTimes = getPointsFromSamples(samples, &Sample::waitTime);
const vector<Vec2> readTimes = getPointsFromSamples(samples, &Sample::readPixelsTime);
const LineParameters waitLine = theilSenLinearRegression(waitTimes);
const LineParameters readLine = theilSenLinearRegression(readTimes);
const float normWaitCoef = waitLine.coefficient * float(calibrationParams.maxDrawCalls) / float(MAX_SAMPLE_DURATION_US);
const float normReadCoef = readLine.coefficient * float(calibrationParams.maxDrawCalls) / float(MAX_SAMPLE_DURATION_US);
bool allOk = true;
{
tcu::ScopedLogSection section (m_testCtx.getLog(), "Samples", "Samples");
vector<Sample> sortedSamples (samples.begin(), samples.end());
std::sort(sortedSamples.begin(), sortedSamples.end(), CompareSampleDrawCount());
for (vector<Sample>::const_iterator iter = sortedSamples.begin(); iter != sortedSamples.end(); ++iter)
m_testCtx.getLog() << *iter;
}
m_testCtx.getLog() << TestLog::Float("WaitCoefficient", "Wait coefficient", "", QP_KEY_TAG_NONE, waitLine.coefficient)
<< TestLog::Float("ReadCoefficient", "Read coefficient", "", QP_KEY_TAG_NONE, readLine.coefficient)
<< TestLog::Float("NormalizedWaitCoefficient", "Normalized wait coefficient", "", QP_KEY_TAG_NONE, normWaitCoef)
<< TestLog::Float("NormalizedReadCoefficient", "Normalized read coefficient", "", QP_KEY_TAG_NONE, normReadCoef);
{
const bool waitCorrelated = normWaitCoef > CORRELATED_COEF_THRESHOLD;
const bool readCorrelated = normReadCoef > CORRELATED_COEF_THRESHOLD;
const bool waitNotCorr = normWaitCoef < NO_CORR_COEF_THRESHOLD;
const bool readNotCorr = normReadCoef < NO_CORR_COEF_THRESHOLD;
if (waitCorrelated || waitNotCorr)
m_testCtx.getLog() << TestLog::Message << "Wait time is" << (waitCorrelated ? "" : " NOT") << " correlated to rendering workload size." << TestLog::EndMessage;
else
m_testCtx.getLog() << TestLog::Message << "Warning: Wait time correlation to rendering workload size is unclear." << TestLog::EndMessage;
if (readCorrelated || readNotCorr)
m_testCtx.getLog() << TestLog::Message << "Read time is" << (readCorrelated ? "" : " NOT") << " correlated to rendering workload size." << TestLog::EndMessage;
else
m_testCtx.getLog() << TestLog::Message << "Warning: Read time correlation to rendering workload size is unclear." << TestLog::EndMessage;
}
for (int ndx = 0; ndx < 2; ndx++)
{
const float coef = ndx == 0 ? normWaitCoef : normReadCoef;
const char* name = ndx == 0 ? "wait" : "read";
const ExpectedBehavior behavior = ndx == 0 ? m_waitBehavior : m_readBehavior;
const float threshold = ndx == 0 ? m_waitThreshold : m_readThreshold;
const bool isOk = behavior == EXPECT_COEF_GREATER_THAN ? coef > threshold :
behavior == EXPECT_COEF_LESS_THAN ? coef < threshold : false;
const char* cmpName = behavior == EXPECT_COEF_GREATER_THAN ? "greater than" :
behavior == EXPECT_COEF_LESS_THAN ? "less than" : DE_NULL;
if (!isOk)
{
m_testCtx.getLog() << TestLog::Message << "ERROR: Expected " << name << " coefficient to be " << cmpName << " " << threshold << TestLog::EndMessage;
allOk = false;
}
}
m_testCtx.setTestResult(allOk ? QP_TEST_RESULT_PASS : QP_TEST_RESULT_FAIL,
allOk ? "Pass" : "Suspicious performance behavior");
}
FlushFinishCase::IterateResult FlushFinishCase::iterate (void)
{
vector<Sample> samples (NUM_SAMPLES);
CalibrationParams params;
tcu::warmupCPU();
setupRenderState();
// Do one full render cycle.
{
setShaderIterCount(1);
render(1);
readPixels();
}
// Calibrate.
for (int calibrationRoundNdx = 0; /* until done */; calibrationRoundNdx++)
{
try
{
m_testCtx.touchWatchdog();
params = calibrate();
verifyCalibration(params);
break;
}
catch (const CalibrationFailedException& e)
{
m_testCtx.getLog() << e;
if (calibrationRoundNdx < MAX_CALIBRATION_ATTEMPTS)
{
m_testCtx.getLog() << TestLog::Message
<< "Retrying calibration (" << (calibrationRoundNdx+1) << " / " << (int)MAX_CALIBRATION_ATTEMPTS << ")"
<< TestLog::EndMessage;
}
else
{
m_testCtx.setTestResult(QP_TEST_RESULT_COMPATIBILITY_WARNING, e.what());
return STOP;
}
}
}
// Do measurement.
{
de::Random rnd (123);
setShaderIterCount(params.numItersInShader);
for (size_t ndx = 0; ndx < samples.size(); ndx++)
{
const int drawCallCount = rnd.getInt(1, params.maxDrawCalls);
const deUint64 submitStartTime = deGetMicroseconds();
deUint64 waitStartTime;
deUint64 readStartTime;
deUint64 readFinishTime;
render(drawCallCount);
waitStartTime = deGetMicroseconds();
waitForGL();
readStartTime = deGetMicroseconds();
readPixels();
readFinishTime = deGetMicroseconds();
samples[ndx].numDrawCalls = drawCallCount;
samples[ndx].submitTime = waitStartTime-submitStartTime;
samples[ndx].waitTime = readStartTime-waitStartTime;
samples[ndx].readPixelsTime = readFinishTime-readStartTime;
m_testCtx.touchWatchdog();
}
}
// Analyze - sets test case result.
analyzeResults(samples, params);
return STOP;
}
class WaitOnlyCase : public FlushFinishCase
{
public:
WaitOnlyCase (Context& context)
: FlushFinishCase(context, "wait", "Wait only", EXPECT_COEF_LESS_THAN, NO_CORR_COEF_THRESHOLD, EXPECT_COEF_GREATER_THAN, -1000.0f /* practically nothing is expected */)
{
}
void init (void)
{
m_testCtx.getLog() << TestLog::Message << int(WAIT_TIME_MS) << " ms busy wait" << TestLog::EndMessage;
FlushFinishCase::init();
}
protected:
void waitForGL (void)
{
busyWait(WAIT_TIME_MS);
}
};
class FlushOnlyCase : public FlushFinishCase
{
public:
FlushOnlyCase (Context& context)
: FlushFinishCase(context, "flush", "Flush only", EXPECT_COEF_LESS_THAN, FLUSH_COEF_THRESHOLD, EXPECT_COEF_GREATER_THAN, CORRELATED_COEF_THRESHOLD)
{
}
void init (void)
{
m_testCtx.getLog() << TestLog::Message << "Single call to glFlush()" << TestLog::EndMessage;
FlushFinishCase::init();
}
protected:
void waitForGL (void)
{
m_context.getRenderContext().getFunctions().flush();
}
};
class FlushWaitCase : public FlushFinishCase
{
public:
FlushWaitCase (Context& context)
: FlushFinishCase(context, "flush_wait", "Wait after flushing", EXPECT_COEF_LESS_THAN, FLUSH_COEF_THRESHOLD, EXPECT_COEF_LESS_THAN, NO_CORR_COEF_THRESHOLD)
{
}
void init (void)
{
m_testCtx.getLog() << TestLog::Message << "glFlush() followed by " << int(WAIT_TIME_MS) << " ms busy wait" << TestLog::EndMessage;
FlushFinishCase::init();
}
protected:
void waitForGL (void)
{
m_context.getRenderContext().getFunctions().flush();
busyWait(WAIT_TIME_MS);
}
};
class FinishOnlyCase : public FlushFinishCase
{
public:
FinishOnlyCase (Context& context)
: FlushFinishCase(context, "finish", "Finish only", EXPECT_COEF_GREATER_THAN, CORRELATED_COEF_THRESHOLD, EXPECT_COEF_LESS_THAN, NO_CORR_COEF_THRESHOLD)
{
}
void init (void)
{
m_testCtx.getLog() << TestLog::Message << "Single call to glFinish()" << TestLog::EndMessage;
FlushFinishCase::init();
}
protected:
void waitForGL (void)
{
m_context.getRenderContext().getFunctions().finish();
}
};
class FinishWaitCase : public FlushFinishCase
{
public:
FinishWaitCase (Context& context)
: FlushFinishCase(context, "finish_wait", "Finish and wait", EXPECT_COEF_GREATER_THAN, CORRELATED_COEF_THRESHOLD, EXPECT_COEF_LESS_THAN, NO_CORR_COEF_THRESHOLD)
{
}
void init (void)
{
m_testCtx.getLog() << TestLog::Message << "glFinish() followed by " << int(WAIT_TIME_MS) << " ms busy wait" << TestLog::EndMessage;
FlushFinishCase::init();
}
protected:
void waitForGL (void)
{
m_context.getRenderContext().getFunctions().finish();
busyWait(WAIT_TIME_MS);
}
};
} // anonymous
FlushFinishTests::FlushFinishTests (Context& context)
: TestCaseGroup(context, "flush_finish", "Flush and Finish tests")
{
}
FlushFinishTests::~FlushFinishTests (void)
{
}
void FlushFinishTests::init (void)
{
addChild(new WaitOnlyCase (m_context));
addChild(new FlushOnlyCase (m_context));
addChild(new FlushWaitCase (m_context));
addChild(new FinishOnlyCase (m_context));
addChild(new FinishWaitCase (m_context));
}
} // Functional
} // gles3
} // deqp