/*------------------------------------------------------------------------- * drawElements Quality Program OpenGL ES 3.1 Module * ------------------------------------------------- * * Copyright 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Basic Compute Shader Tests. *//*--------------------------------------------------------------------*/ #include "es31fBasicComputeShaderTests.hpp" #include "gluShaderProgram.hpp" #include "gluObjectWrapper.hpp" #include "gluRenderContext.hpp" #include "gluProgramInterfaceQuery.hpp" #include "gluContextInfo.hpp" #include "glwFunctions.hpp" #include "glwEnums.hpp" #include "tcuTestLog.hpp" #include "deRandom.hpp" #include "deStringUtil.hpp" #include "deMemory.h" namespace deqp { namespace gles31 { namespace Functional { using std::string; using std::vector; using tcu::TestLog; using namespace glu; //! Utility for mapping buffers. class BufferMemMap { public: BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access) : m_gl (gl) , m_target (target) , m_ptr (DE_NULL) { m_ptr = gl.mapBufferRange(target, offset, size, access); GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()"); TCU_CHECK(m_ptr); } ~BufferMemMap (void) { m_gl.unmapBuffer(m_target); } void* getPtr (void) const { return m_ptr; } void* operator* (void) const { return m_ptr; } private: BufferMemMap (const BufferMemMap& other); BufferMemMap& operator= (const BufferMemMap& other); const glw::Functions& m_gl; const deUint32 m_target; void* m_ptr; }; namespace { class EmptyComputeShaderCase : public TestCase { public: EmptyComputeShaderCase (Context& context) : TestCase(context, "empty", "Empty shader") { } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = 1) in;\n" "void main (void) {}\n"; const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const glw::Functions& gl = m_context.getRenderContext().getFunctions(); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); gl.useProgram(program.getProgram()); gl.dispatchCompute(1, 1, 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } }; class UBOToSSBOInvertCase : public TestCase { public: UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_numValues (numValues) , m_localSize (localSize) , m_workSize (workSize) { DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "uniform Input {\n" << " uint values[" << m_numValues << "];\n" << "} ub_in;\n" << "layout(binding = 1) buffer Output {\n" << " uint values[" << m_numValues << "];\n" << "} sb_out;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const Buffer inputBuffer (m_context.getRenderContext()); const Buffer outputBuffer (m_context.getRenderContext()); std::vector<deUint32> inputValues (m_numValues); // Compute input values. { de::Random rnd(0x111223f); for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) inputValues[ndx] = rnd.getUint32(); } m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex); gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer); gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); { const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++) *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; } gl.uniformBlockBinding(program.getProgram(), blockIndex, 0); gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); const deUint32 ref = ~inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class CopyInvertSSBOCase : public TestCase { public: CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_numValues (numValues) , m_localSize (localSize) , m_workSize (workSize) { DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Input {\n" << " uint values[" << m_numValues << "];\n" << "} sb_in;\n" << "layout (binding = 1) buffer Output {\n" << " uint values[" << m_numValues << "];\n" << "} sb_out;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const Buffer inputBuffer (m_context.getRenderContext()); const Buffer outputBuffer (m_context.getRenderContext()); std::vector<deUint32> inputValues (m_numValues); // Compute input values. { de::Random rnd(0x124fef); for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) inputValues[ndx] = rnd.getUint32(); } m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); const deUint32 ref = ~inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class InvertSSBOInPlaceCase : public TestCase { public: InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_numValues (numValues) , m_isSized (isSized) , m_localSize (localSize) , m_workSize (workSize) { DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer InOut {\n" << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" << "} sb_inout;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); const Buffer outputBuffer (m_context.getRenderContext()); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const deUint32 blockSize = valueInfo.arrayStride*(deUint32)m_numValues; std::vector<deUint32> inputValues (m_numValues); // Compute input values. { de::Random rnd(0x82ce7f); for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) inputValues[ndx] = rnd.getUint32(); } TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT); for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); const deUint32 ref = ~inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const bool m_isSized; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class WriteToMultipleSSBOCase : public TestCase { public: WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_numValues (numValues) , m_isSized (isSized) , m_localSize (localSize) , m_workSize (workSize) { DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Out0 {\n" << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" << "} sb_out0;\n" << "layout(binding = 1) buffer Out1 {\n" << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" << "} sb_out1;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n" << "\n" << " {\n" << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out0.values[offset + ndx] = offset + ndx;\n" << " }\n" << " {\n" << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n" << " }\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); const Buffer outputBuffer0 (m_context.getRenderContext()); const deUint32 value0Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values"); const InterfaceVariableInfo value0Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index); const deUint32 block0Size = value0Info.arrayStride*(deUint32)m_numValues; const Buffer outputBuffer1 (m_context.getRenderContext()); const deUint32 value1Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values"); const InterfaceVariableInfo value1Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index); const deUint32 block1Size = value1Info.arrayStride*(deUint32)m_numValues; TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0)); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); } { gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT); for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx)); const deUint32 ref = ndx; if (res != ref) throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); } } gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT); for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx)); const deUint32 ref = m_numValues - ndx; if (res != ref) throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const bool m_isSized; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class SSBOLocalBarrierCase : public TestCase { public: SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_localSize (localSize) , m_workSize (workSize) { } IterateResult iterate (void) { const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer (m_context.getRenderContext()); const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; const int numValues = workGroupSize*workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " coherent uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "shared uint offsets[" << workGroupSize << "];\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" << "\n" << " sb_out.values[globalOffs + localOffs] = globalOffs;\n" << " memoryBarrierBuffer();\n" << " barrier();\n" << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" << " memoryBarrierBuffer();\n" << " barrier();\n" << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n" << "}\n"; const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) { for (int localOffs = 0; localOffs < workGroupSize; localOffs++) { const int globalOffs = groupNdx*workGroupSize; const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); const int offs0 = localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize); const int offs1 = localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize); const deUint32 ref = (deUint32)(globalOffs + offs0 + offs1); if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); } } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class SSBOBarrierCase : public TestCase { public: SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_workSize (workSize) { } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); const char* const glslVersionDeclaration = getGLSLVersionDeclaration(glslVersion); std::ostringstream src0; src0 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "uniform uint u_baseVal;\n" "layout(binding = 1) buffer Output {\n" " uint values[];\n" "};\n" "void main (void) {\n" " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" " values[offset] = u_baseVal+offset;\n" "}\n"; std::ostringstream src1; src1 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "uniform uint u_baseVal;\n" "layout(binding = 1) buffer Input {\n" " uint values[];\n" "};\n" "layout(binding = 0) buffer Output {\n" " coherent uint sum;\n" "};\n" "void main (void) {\n" " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" " uint value = values[offset];\n" " atomicAdd(sum, value);\n" "}\n"; const ShaderProgram program0 (m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str())); const ShaderProgram program1 (m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str())); const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer tempBuffer (m_context.getRenderContext()); const Buffer outputBuffer (m_context.getRenderContext()); const deUint32 baseValue = 127; m_testCtx.getLog() << program0 << program1; if (!program0.isOk() || !program1.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; // Temp buffer setup { const deUint32 valueIndex = gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const deUint32 bufferSize = valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2]; gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed"); } // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); deMemset(bufMap.getPtr(), 0, blockSize); } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.useProgram(program0.getProgram()); gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); gl.useProgram(program1.getProgram()); gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset)); deUint32 ref = 0; for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++) ref += baseValue + (deUint32)ndx; if (res != ref) { m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; throw tcu::TestError("Comparison failed"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_workSize; }; class BasicSharedVarCase : public TestCase { public: BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_localSize (localSize) , m_workSize (workSize) { } IterateResult iterate (void) { const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer (m_context.getRenderContext()); const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; const int numValues = workGroupSize*workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "shared uint offsets[" << workGroupSize << "];\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" << "\n" << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n" << " barrier();\n" << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n" << "}\n"; const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) { for (int localOffs = 0; localOffs < workGroupSize; localOffs++) { const int globalOffs = groupNdx*workGroupSize; const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); const deUint32 ref = (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1)); if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); } } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class SharedVarAtomicOpCase : public TestCase { public: SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_localSize (localSize) , m_workSize (workSize) { } IterateResult iterate (void) { const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer (m_context.getRenderContext()); const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; const int numValues = workGroupSize*workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "shared uint count;\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << "\n" << " count = 0u;\n" << " barrier();\n" << " uint oldVal = atomicAdd(count, 1u);\n" << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n" << "}\n"; const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) { for (int localOffs = 0; localOffs < workGroupSize; localOffs++) { const int globalOffs = groupNdx*workGroupSize; const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs))); const deUint32 ref = (deUint32)(localOffs+1); if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); } } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class CopyImageToSSBOCase : public TestCase { public: CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize) : TestCase (context, name, description) , m_localSize (localSize) , m_imageSize (imageSize) { DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" << "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n" << "} sb_out;\n\n" << "void main (void) {\n" << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n" << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer (m_context.getRenderContext()); const Texture inputTexture (m_context.getRenderContext()); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const tcu::IVec2 workSize = m_imageSize / m_localSize; de::Random rnd (0xab2c7); vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input values for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) *i = rnd.getUint32(); // Input image setup gl.bindTexture(GL_TEXTURE_2D, *inputTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 1 gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(workSize[0], workSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx)); const deUint32 ref = inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec2 m_localSize; const tcu::IVec2 m_imageSize; }; class CopySSBOToImageCase : public TestCase { public: CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize) : TestCase (context, name, description) , m_localSize (localSize) , m_imageSize (imageSize) { DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" << "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n" << "buffer Input {\n" << " uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n" << "} sb_in;\n\n" << "void main (void) {\n" << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer inputBuffer (m_context.getRenderContext()); const Texture outputTexture (m_context.getRenderContext()); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const tcu::IVec2 workSize = m_imageSize / m_localSize; de::Random rnd (0x77238ac2); vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input values for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) *i = rnd.getUint32(); // Input buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output image setup gl.bindTexture(GL_TEXTURE_2D, *outputTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 1 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Dispatch compute workload gl.dispatchCompute(workSize[0], workSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { Framebuffer fbo (m_context.getRenderContext()); vector<deUint32> pixels (inputValues.size()*4); gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); // \note In ES3 we have to use GL_RGBA_INTEGER gl.readBuffer(GL_COLOR_ATTACHMENT0); gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) { const deUint32 res = pixels[ndx*4]; const deUint32 ref = inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx)); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec2 m_localSize; const tcu::IVec2 m_imageSize; }; class ImageAtomicOpCase : public TestCase { public: ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize) : TestCase (context, name, description) , m_localSize (localSize) , m_imageSize (imageSize) { } void init (void) { if (!glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2))) if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic")) throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension"); } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); const bool supportsES32 = glu::contextSupports(m_context.getRenderContext().getType(), glu::ApiType::es(3, 2)); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << (supportsES32 ? "\n" : "#extension GL_OES_shader_image_atomic : require\n") << "layout (local_size_x = " << m_localSize << ") in;\n" << "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n" << "buffer Input {\n" << " uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n" << "} sb_in;\n\n" << "void main (void) {\n" << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" << "\n" << " if (gl_LocalInvocationIndex == 0u)\n" << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n" << " barrier();\n" << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n" << "}\n"; const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer inputBuffer (m_context.getRenderContext()); const Texture outputTexture (m_context.getRenderContext()); const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); de::Random rnd (0x77238ac2); vector<deUint32> inputValues (m_imageSize[0]*m_imageSize[1]*m_localSize); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input values for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i) *i = rnd.getUint32(); // Input buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size()); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++) *(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output image setup gl.bindTexture(GL_TEXTURE_2D, *outputTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 1 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Dispatch compute workload gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { Framebuffer fbo (m_context.getRenderContext()); vector<deUint32> pixels (m_imageSize[0]*m_imageSize[1]*4); gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); // \note In ES3 we have to use GL_RGBA_INTEGER gl.readBuffer(GL_COLOR_ATTACHMENT0); gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++) { const deUint32 res = pixels[pixelNdx*4]; deUint32 ref = 0; for (int offs = 0; offs < m_localSize; offs++) ref += inputValues[pixelNdx*m_localSize + offs]; if (res != ref) throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx)); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_localSize; const tcu::IVec2 m_imageSize; }; class ImageBarrierCase : public TestCase { public: ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize) : TestCase (context, name, description) , m_workSize (workSize) { } IterateResult iterate (void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); const char* const glslVersionDeclaration = getGLSLVersionDeclaration(glslVersion); std::ostringstream src0; src0 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "uniform uint u_baseVal;\n" "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n" "void main (void) {\n" " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n" "}\n"; std::ostringstream src1; src1 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n" "layout(binding = 0) buffer Output {\n" " coherent uint sum;\n" "};\n" "void main (void) {\n" " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n" " atomicAdd(sum, value);\n" "}\n"; const ShaderProgram program0 (m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str())); const ShaderProgram program1 (m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str())); const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Texture tempTexture (m_context.getRenderContext()); const Buffer outputBuffer (m_context.getRenderContext()); const deUint32 baseValue = 127; m_testCtx.getLog() << program0 << program1; if (!program0.isOk() || !program1.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; // Temp texture setup gl.bindTexture(GL_TEXTURE_2D, *tempTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 2 gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); deMemset(bufMap.getPtr(), 0, blockSize); } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.useProgram(program0.getProgram()); gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); gl.useProgram(program1.getProgram()); gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); // Read back and compare { const deUint32 blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset)); deUint32 ref = 0; for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++) ref += baseValue + (deUint32)ndx; if (res != ref) { m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; throw tcu::TestError("Comparison failed"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec2 m_workSize; }; class AtomicCounterCase : public TestCase { public: AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize) : TestCase (context, name, description) , m_localSize (localSize) , m_workSize (workSize) { } IterateResult iterate (void) { const glw::Functions& gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer (m_context.getRenderContext()); const Buffer counterBuffer (m_context.getRenderContext()); const int workGroupSize = m_localSize[0]*m_localSize[1]*m_localSize[2]; const int workGroupCount = m_workSize[0]*m_workSize[1]*m_workSize[2]; const int numValues = workGroupSize*workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" << "\n" << " uint oldVal = atomicCounterIncrement(u_count);\n" << " sb_out.values[globalOffs+localOffs] = oldVal;\n" << "}\n"; const ShaderProgram program (m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Atomic counter buffer setup { const deUint32 uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); const deUint32 bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); const deUint32 bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer); gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ); { const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT); deMemset(memMap.getPtr(), 0, (int)bufferSize); } gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed"); } // Output buffer setup { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare atomic counter { const deUint32 uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); const deUint32 uniformOffset = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET); const deUint32 bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); const deUint32 bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); const BufferMemMap bufMap (gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT); const deUint32 resVal = *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset)); if (resVal != (deUint32)numValues) throw tcu::TestError("Invalid atomic counter value"); } // Read back and compare SSBO { const deUint32 blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const deUint32 valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap (gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); deUint32 valSum = 0; deUint32 refSum = 0; for (int valNdx = 0; valNdx < numValues; valNdx++) { const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx)); valSum += res; refSum += (deUint32)valNdx; if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues)) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]"); } if (valSum != refSum) throw tcu::TestError("Total sum of values in Output.values doesn't match"); } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; } // anonymous BasicComputeShaderTests::BasicComputeShaderTests (Context& context) : TestCaseGroup(context, "basic", "Basic Compute Shader Tests") { } BasicComputeShaderTests::~BasicComputeShaderTests (void) { } void BasicComputeShaderTests::init (void) { addChild(new EmptyComputeShaderCase(m_context)); addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2,1,4), tcu::IVec3(1,1,1))); addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1))); addChild(new UBOToSSBOInvertCase (m_context, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,1,1), tcu::IVec3(2,4,1))); addChild(new CopyInvertSSBOCase (m_context, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new InvertSSBOInPlaceCase (m_context, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1,4,2), tcu::IVec3(2,2,4))); addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); addChild(new SSBOLocalBarrierCase (m_context, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); addChild(new SSBOBarrierCase (m_context, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1,1,1))); addChild(new SSBOBarrierCase (m_context, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11,5,7))); addChild(new BasicSharedVarCase (m_context, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new BasicSharedVarCase (m_context, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); addChild(new BasicSharedVarCase (m_context, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); addChild(new BasicSharedVarCase (m_context, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); addChild(new SharedVarAtomicOpCase (m_context, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); addChild(new CopyImageToSSBOCase (m_context, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1,1), tcu::IVec2(64,64))); addChild(new CopyImageToSSBOCase (m_context, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2,4), tcu::IVec2(512,512))); addChild(new CopySSBOToImageCase (m_context, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1,1), tcu::IVec2(64,64))); addChild(new CopySSBOToImageCase (m_context, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2,4), tcu::IVec2(512,512))); addChild(new ImageAtomicOpCase (m_context, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64,64))); addChild(new ImageAtomicOpCase (m_context, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64,64))); addChild(new ImageBarrierCase (m_context, "image_barrier_single", "Image barrier", tcu::IVec2(1,1))); addChild(new ImageBarrierCase (m_context, "image_barrier_multiple", "Image barrier", tcu::IVec2(64,64))); addChild(new AtomicCounterCase (m_context, "atomic_counter_single_invocation", "Basic atomic counter test", tcu::IVec3(1,1,1), tcu::IVec3(1,1,1))); addChild(new AtomicCounterCase (m_context, "atomic_counter_single_group", "Basic atomic counter test", tcu::IVec3(3,2,5), tcu::IVec3(1,1,1))); addChild(new AtomicCounterCase (m_context, "atomic_counter_multiple_invocations", "Basic atomic counter test", tcu::IVec3(1,1,1), tcu::IVec3(2,5,4))); addChild(new AtomicCounterCase (m_context, "atomic_counter_multiple_groups", "Basic atomic counter test", tcu::IVec3(3,4,1), tcu::IVec3(2,7,3))); } } // Functional } // gles31 } // deqp