/*-------------------------------------------------------------------------
* drawElements Quality Program OpenGL ES 3.1 Module
* -------------------------------------------------
*
* Copyright 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief Indirect compute dispatch tests.
*//*--------------------------------------------------------------------*/
#include "es31fIndirectComputeDispatchTests.hpp"
#include "gluObjectWrapper.hpp"
#include "gluRenderContext.hpp"
#include "gluShaderProgram.hpp"
#include "glwFunctions.hpp"
#include "glwEnums.hpp"
#include "tcuVector.hpp"
#include "tcuStringTemplate.hpp"
#include "tcuTestLog.hpp"
#include "deStringUtil.hpp"
#include <vector>
#include <string>
#include <map>
namespace deqp
{
namespace gles31
{
namespace Functional
{
using tcu::UVec3;
using tcu::TestLog;
using std::vector;
using std::string;
using std::map;
// \todo [2014-02-17 pyry] Should be extended with following:
// Negative:
// - no active shader program
// - indirect negative or not aligned
// - indirect + size outside buffer bounds
// - no buffer bound to DRAW_INDIRECT_BUFFER
// - (implict) buffer mapped
// Robustness:
// - lot of small work group launches
// - very large work group size
// - no synchronization, touched by gpu
// - compute program overwiting buffer
namespace
{
enum
{
RESULT_BLOCK_BASE_SIZE = (3+1)*(int)sizeof(deUint32), // uvec3 + uint
RESULT_BLOCK_EXPECTED_COUNT_OFFSET = 0,
RESULT_BLOCK_NUM_PASSED_OFFSET = 3*(int)sizeof(deUint32),
INDIRECT_COMMAND_SIZE = 3*(int)sizeof(deUint32)
};
enum GenBuffer
{
GEN_BUFFER_UPLOAD = 0,
GEN_BUFFER_COMPUTE,
GEN_BUFFER_LAST
};
glu::ProgramSources genVerifySources (const UVec3& workGroupSize)
{
static const char* s_verifyDispatchTmpl =
"#version 310 es\n"
"layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n"
"layout(binding = 0, std430) buffer Result\n"
"{\n"
" uvec3 expectedGroupCount;\n"
" coherent uint numPassed;\n"
"} result;\n"
"void main (void)\n"
"{\n"
" if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n"
" atomicAdd(result.numPassed, 1u);\n"
"}\n";
map<string, string> args;
args["LOCAL_SIZE_X"] = de::toString(workGroupSize.x());
args["LOCAL_SIZE_Y"] = de::toString(workGroupSize.y());
args["LOCAL_SIZE_Z"] = de::toString(workGroupSize.z());
return glu::ProgramSources() << glu::ComputeSource(tcu::StringTemplate(s_verifyDispatchTmpl).specialize(args));
}
class IndirectDispatchCase : public TestCase
{
public:
IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer);
~IndirectDispatchCase (void);
IterateResult iterate (void);
protected:
struct DispatchCommand
{
deIntptr offset;
UVec3 numWorkGroups;
DispatchCommand (void) : offset(0) {}
DispatchCommand (deIntptr offset_, const UVec3& numWorkGroups_) : offset(offset_), numWorkGroups(numWorkGroups_) {}
};
GenBuffer m_genBuffer;
deUintptr m_bufferSize;
UVec3 m_workGroupSize;
vector<DispatchCommand> m_commands;
void createCommandBuffer (deUint32 buffer) const;
void createResultBuffer (deUint32 buffer) const;
bool verifyResultBuffer (deUint32 buffer);
void createCmdBufferUpload (deUint32 buffer) const;
void createCmdBufferCompute (deUint32 buffer) const;
private:
IndirectDispatchCase (const IndirectDispatchCase&);
IndirectDispatchCase& operator= (const IndirectDispatchCase&);
};
IndirectDispatchCase::IndirectDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer)
: TestCase (context, name, description)
, m_genBuffer (genBuffer)
, m_bufferSize (0)
{
}
IndirectDispatchCase::~IndirectDispatchCase (void)
{
}
static int getResultBlockAlignedSize (const glw::Functions& gl)
{
const int baseSize = RESULT_BLOCK_BASE_SIZE;
int alignment = 0;
gl.getIntegerv(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT, &alignment);
if (alignment == 0 || (baseSize % alignment == 0))
return baseSize;
else
return (baseSize/alignment + 1)*alignment;
}
void IndirectDispatchCase::createCommandBuffer (deUint32 buffer) const
{
switch (m_genBuffer)
{
case GEN_BUFFER_UPLOAD: createCmdBufferUpload (buffer); break;
case GEN_BUFFER_COMPUTE: createCmdBufferCompute (buffer); break;
default:
DE_ASSERT(false);
}
}
void IndirectDispatchCase::createCmdBufferUpload (deUint32 buffer) const
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
vector<deUint8> data (m_bufferSize);
for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
{
DE_STATIC_ASSERT(INDIRECT_COMMAND_SIZE >= sizeof(deUint32)*3);
DE_ASSERT(cmdIter->offset >= 0);
DE_ASSERT(cmdIter->offset%sizeof(deUint32) == 0);
DE_ASSERT(cmdIter->offset + INDIRECT_COMMAND_SIZE <= (deIntptr)m_bufferSize);
deUint32* const dstPtr = (deUint32*)&data[cmdIter->offset];
dstPtr[0] = cmdIter->numWorkGroups[0];
dstPtr[1] = cmdIter->numWorkGroups[1];
dstPtr[2] = cmdIter->numWorkGroups[2];
}
gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)data.size(), &data[0], GL_STATIC_DRAW);
}
void IndirectDispatchCase::createCmdBufferCompute (deUint32 buffer) const
{
std::ostringstream src;
// Header
src <<
"#version 310 es\n"
"layout(local_size_x = 1) in;\n"
"layout(std430, binding = 1) buffer Out\n"
"{\n"
" highp uint data[];\n"
"};\n"
"void writeCmd (uint offset, uvec3 numWorkGroups)\n"
"{\n"
" data[offset+0u] = numWorkGroups.x;\n"
" data[offset+1u] = numWorkGroups.y;\n"
" data[offset+2u] = numWorkGroups.z;\n"
"}\n"
"void main (void)\n"
"{\n";
// Commands
for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
{
const deUint32 offs = (deUint32)(cmdIter->offset/4);
DE_ASSERT((deIntptr)offs*4 == cmdIter->offset);
src << "\twriteCmd(" << offs << "u, uvec3("
<< cmdIter->numWorkGroups.x() << "u, "
<< cmdIter->numWorkGroups.y() << "u, "
<< cmdIter->numWorkGroups.z() << "u));\n";
}
src << "}\n";
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
glu::ShaderProgram program (m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(src.str()));
m_testCtx.getLog() << program;
if (!program.isOk())
TCU_FAIL("Compile failed");
gl.useProgram(program.getProgram());
gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
gl.bufferData(GL_DISPATCH_INDIRECT_BUFFER, (glw::GLsizeiptr)m_bufferSize, DE_NULL, GL_STATIC_DRAW);
gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, buffer);
GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
gl.dispatchCompute(1,1,1);
GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute() failed");
gl.memoryBarrier(GL_COMMAND_BARRIER_BIT);
GLU_EXPECT_NO_ERROR(gl.getError(), "glMemoryBarrier(GL_COMMAND_BARRIER_BIT) failed");
}
}
void IndirectDispatchCase::createResultBuffer (deUint32 buffer) const
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
const int resultBlockSize = getResultBlockAlignedSize(gl);
const int resultBufferSize = resultBlockSize*(int)m_commands.size();
vector<deUint8> data (resultBufferSize);
for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
{
deUint8* const dstPtr = &data[resultBlockSize*cmdNdx];
*(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 0*4) = m_commands[cmdNdx].numWorkGroups[0];
*(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 1*4) = m_commands[cmdNdx].numWorkGroups[1];
*(deUint32*)(dstPtr + RESULT_BLOCK_EXPECTED_COUNT_OFFSET + 2*4) = m_commands[cmdNdx].numWorkGroups[2];
*(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0;
}
gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizei)data.size(), &data[0], GL_STATIC_READ);
}
deUint32 computeInvocationCount (const UVec3& workGroupSize, const UVec3& numWorkGroups)
{
const int numInvocationsPerGroup = workGroupSize[0]*workGroupSize[1]*workGroupSize[2];
const int numGroups = numWorkGroups[0]*numWorkGroups[1]*numWorkGroups[2];
return numInvocationsPerGroup*numGroups;
}
bool IndirectDispatchCase::verifyResultBuffer (deUint32 buffer)
{
const glw::Functions& gl = m_context.getRenderContext().getFunctions();
const int resultBlockSize = getResultBlockAlignedSize(gl);
const int resultBufferSize = resultBlockSize*(int)m_commands.size();
void* mapPtr = DE_NULL;
bool allOk = true;
try
{
gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
mapPtr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, resultBufferSize, GL_MAP_READ_BIT);
GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange() failed");
TCU_CHECK(mapPtr);
for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
{
const DispatchCommand& cmd = m_commands[cmdNdx];
const deUint8* const srcPtr = (const deUint8*)mapPtr + cmdNdx*resultBlockSize;
const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET);
const deUint32 expectedCount = computeInvocationCount(m_workGroupSize, cmd.numWorkGroups);
// Verify numPassed.
if (numPassed != expectedCount)
{
m_testCtx.getLog() << TestLog::Message << "ERROR: got invalid result for invocation " << cmdNdx
<< ": got numPassed = " << numPassed << ", expected " << expectedCount
<< TestLog::EndMessage;
allOk = false;
}
}
}
catch (...)
{
if (mapPtr)
gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer() failed");
return allOk;
}
IndirectDispatchCase::IterateResult IndirectDispatchCase::iterate (void)
{
const glu::RenderContext& renderCtx = m_context.getRenderContext();
const glw::Functions& gl = renderCtx.getFunctions();
const glu::ShaderProgram program (renderCtx, genVerifySources(m_workGroupSize));
glu::Buffer cmdBuffer (renderCtx);
glu::Buffer resultBuffer (renderCtx);
m_testCtx.getLog() << program;
TCU_CHECK_MSG(program.isOk(), "Compile failed");
m_testCtx.getLog() << TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << TestLog::EndMessage;
{
tcu::ScopedLogSection section(m_testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_commands.size()) + " in total)");
for (size_t cmdNdx = 0; cmdNdx < m_commands.size(); cmdNdx++)
m_testCtx.getLog() << TestLog::Message << cmdNdx << ": " << "offset = " << m_commands[cmdNdx].offset
<< ", numWorkGroups = " << m_commands[cmdNdx].numWorkGroups
<< TestLog::EndMessage;
}
createResultBuffer(*resultBuffer);
createCommandBuffer(*cmdBuffer);
gl.useProgram(program.getProgram());
gl.bindBuffer(GL_DISPATCH_INDIRECT_BUFFER, *cmdBuffer);
GLU_EXPECT_NO_ERROR(gl.getError(), "State setup failed");
{
const int resultBlockAlignedSize = getResultBlockAlignedSize(gl);
deIntptr curOffset = 0;
for (vector<DispatchCommand>::const_iterator cmdIter = m_commands.begin(); cmdIter != m_commands.end(); ++cmdIter)
{
gl.bindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, *resultBuffer, (glw::GLintptr)curOffset, resultBlockAlignedSize);
gl.dispatchComputeIndirect((glw::GLintptr)cmdIter->offset);
curOffset += resultBlockAlignedSize;
}
}
GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchComputeIndirect() failed");
if (verifyResultBuffer(*resultBuffer))
m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
else
m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Invalid values in result buffer");
return STOP;
}
class SingleDispatchCase : public IndirectDispatchCase
{
public:
SingleDispatchCase (Context& context, const char* name, const char* description, GenBuffer genBuffer, deUintptr bufferSize, deUintptr offset, const UVec3& workGroupSize, const UVec3& numWorkGroups)
: IndirectDispatchCase(context, name, description, genBuffer)
{
m_bufferSize = bufferSize;
m_workGroupSize = workGroupSize;
m_commands.push_back(DispatchCommand(offset, numWorkGroups));
}
};
class MultiDispatchCase : public IndirectDispatchCase
{
public:
MultiDispatchCase (Context& context, GenBuffer genBuffer)
: IndirectDispatchCase(context, "multi_dispatch", "Dispatch multiple compute commands from single buffer", genBuffer)
{
m_bufferSize = 1<<10;
m_workGroupSize = UVec3(3,1,2);
m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
m_commands.push_back(DispatchCommand(INDIRECT_COMMAND_SIZE, UVec3(2,1,1)));
m_commands.push_back(DispatchCommand(104, UVec3(1,3,1)));
m_commands.push_back(DispatchCommand(40, UVec3(1,1,7)));
m_commands.push_back(DispatchCommand(52, UVec3(1,1,4)));
}
};
class MultiDispatchReuseCommandCase : public IndirectDispatchCase
{
public:
MultiDispatchReuseCommandCase (Context& context, GenBuffer genBuffer)
: IndirectDispatchCase(context, "multi_dispatch_reuse_command", "Dispatch multiple compute commands from single buffer", genBuffer)
{
m_bufferSize = 1<<10;
m_workGroupSize = UVec3(3,1,2);
m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
m_commands.push_back(DispatchCommand(0, UVec3(1,1,1)));
m_commands.push_back(DispatchCommand(104, UVec3(1,3,1)));
m_commands.push_back(DispatchCommand(104, UVec3(1,3,1)));
m_commands.push_back(DispatchCommand(52, UVec3(1,1,4)));
m_commands.push_back(DispatchCommand(52, UVec3(1,1,4)));
}
};
} // anonymous
IndirectComputeDispatchTests::IndirectComputeDispatchTests (Context& context)
: TestCaseGroup(context, "indirect_dispatch", "Indirect dispatch tests")
{
}
IndirectComputeDispatchTests::~IndirectComputeDispatchTests (void)
{
}
void IndirectComputeDispatchTests::init (void)
{
static const struct
{
const char* name;
GenBuffer gen;
} s_genBuffer[] =
{
{ "upload_buffer", GEN_BUFFER_UPLOAD },
{ "gen_in_compute", GEN_BUFFER_COMPUTE }
};
static const struct
{
const char* name;
const char* description;
deUintptr bufferSize;
deUintptr offset;
UVec3 workGroupSize;
UVec3 numWorkGroups;
} s_singleDispatchCases[] =
{
// Name Desc BufferSize Offs WorkGroupSize NumWorkGroups
{ "single_invocation", "Single invocation only from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(1,1,1) },
{ "multiple_groups", "Multiple groups dispatched from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(2,3,5) },
{ "multiple_groups_multiple_invocations", "Multiple groups of size 2x3x1 from offset 0", INDIRECT_COMMAND_SIZE, 0, UVec3(2,3,1), UVec3(1,2,3) },
{ "small_offset", "Small offset", 16+INDIRECT_COMMAND_SIZE, 16, UVec3(1,1,1), UVec3(1,1,1) },
{ "large_offset", "Large offset", (2<<20), (1<<20) + 12, UVec3(1,1,1), UVec3(1,1,1) },
{ "large_offset_multiple_invocations", "Large offset, multiple invocations", (2<<20), (1<<20) + 12, UVec3(2,3,1), UVec3(1,2,3) },
{ "empty_command", "Empty command", INDIRECT_COMMAND_SIZE, 0, UVec3(1,1,1), UVec3(0,0,0) },
};
for (int genNdx = 0; genNdx < DE_LENGTH_OF_ARRAY(s_genBuffer); genNdx++)
{
const GenBuffer genBuf = s_genBuffer[genNdx].gen;
tcu::TestCaseGroup* const genGroup = new tcu::TestCaseGroup(m_testCtx, s_genBuffer[genNdx].name, "");
addChild(genGroup);
for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_singleDispatchCases); ndx++)
genGroup->addChild(new SingleDispatchCase(m_context,
s_singleDispatchCases[ndx].name,
s_singleDispatchCases[ndx].description,
genBuf,
s_singleDispatchCases[ndx].bufferSize,
s_singleDispatchCases[ndx].offset,
s_singleDispatchCases[ndx].workGroupSize,
s_singleDispatchCases[ndx].numWorkGroups));
genGroup->addChild(new MultiDispatchCase (m_context, genBuf));
genGroup->addChild(new MultiDispatchReuseCommandCase (m_context, genBuf));
}
}
} // Functional
} // gles31
} // deqp