es31fSynchronizationTests.cpp - Android社区 - https://www.androidos.net.cn/

/*-------------------------------------------------------------------------
 * drawElements Quality Program OpenGL ES 3.1 Module
 * -------------------------------------------------
 *
 * Copyright 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *//*!
 * \file
 * \brief Synchronization Tests
 *//*--------------------------------------------------------------------*/

#include "es31fSynchronizationTests.hpp"
#include "tcuTestLog.hpp"
#include "tcuSurface.hpp"
#include "tcuRenderTarget.hpp"
#include "gluRenderContext.hpp"
#include "gluShaderProgram.hpp"
#include "gluObjectWrapper.hpp"
#include "gluPixelTransfer.hpp"
#include "gluContextInfo.hpp"
#include "glwFunctions.hpp"
#include "glwEnums.hpp"
#include "deStringUtil.hpp"
#include "deSharedPtr.hpp"
#include "deMemory.h"
#include "deRandom.hpp"

#include <map>

namespace deqp
{
namespace gles31
{
namespace Functional
{
namespace
{

static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
{
	std::vector<deUint32> chainDelta(valueChain.size());

for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];

// chainDelta contains now the actual additions applied to the value
	// check there exists an addition ramp form 1 to ...
	std::sort(chainDelta.begin(), chainDelta.end());

for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
	{
		if ((int)chainDelta[callNdx] != callNdx+1)
		{
			invalidOperationNdx = callNdx;
			errorDelta = chainDelta[callNdx];
			errorExpected = callNdx+1;

return false;
		}
	}

return true;
}

static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
{
	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
	GLU_EXPECT_NO_ERROR(gl.getError(), "map");

if (!ptr)
		throw tcu::TestError("mapBufferRange returned NULL");

result.resize(numElements);
	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);

if (gl.unmapBuffer(target) == GL_FALSE)
		throw tcu::TestError("unmapBuffer returned false");
}

static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
{
	std::vector<deUint32> vec;

readBuffer(gl, target, 1, vec);

return vec[0];
}

//! Generate a ramp of values from 1 to numElements, and shuffle it
void generateShuffledRamp (int numElements, std::vector<int>& ramp)
{
	de::Random rng(0xabcd);

// some positive (non-zero) unique values
	ramp.resize(numElements);
	for (int callNdx = 0; callNdx < numElements; ++callNdx)
		ramp[callNdx] = callNdx + 1;

rng.shuffle(ramp.begin(), ramp.end());
}

class InterInvocationTestCase : public TestCase
{
public:
	enum StorageType
	{
		STORAGE_BUFFER = 0,
		STORAGE_IMAGE,

STORAGE_LAST
	};
	enum CaseFlags
	{
		FLAG_ATOMIC				= 0x1,
		FLAG_ALIASING_STORAGES	= 0x2,
		FLAG_IN_GROUP			= 0x4,
	};

InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
						~InterInvocationTestCase	(void);

private:
	void				init						(void);
	void				deinit						(void);
	IterateResult		iterate						(void);

void				runCompute					(void);
	bool				verifyResults				(void);
	virtual std::string	genShaderSource				(void) const = 0;

protected:
	std::string			genBarrierSource			(void) const;

const StorageType	m_storage;
	const bool			m_useAtomic;
	const bool			m_aliasingStorages;
	const bool			m_syncWithGroup;
	const int			m_workWidth;				// !< total work width
	const int			m_workHeight;				// !<     ...    height
	const int			m_localWidth;				// !< group width
	const int			m_localHeight;				// !< group height
	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation

private:
	glw::GLuint			m_storageBuf;
	glw::GLuint			m_storageTex;
	glw::GLuint			m_resultBuf;
	glu::ShaderProgram*	m_program;
};

InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
	: TestCase					(context, name, desc)
	, m_storage					(storage)
	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
	, m_workWidth				(256)
	, m_workHeight				(256)
	, m_localWidth				(16)
	, m_localHeight				(8)
	, m_elementsPerInvocation	(8)
	, m_storageBuf				(0)
	, m_storageTex				(0)
	, m_resultBuf				(0)
	, m_program					(DE_NULL)
{
	DE_ASSERT(m_storage < STORAGE_LAST);
	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
}

InterInvocationTestCase::~InterInvocationTestCase (void)
{
	deinit();
}

void InterInvocationTestCase::init (void)
{
	const glw::Functions& gl = m_context.getRenderContext().getFunctions();

// requirements

if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");

// program

m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
	m_testCtx.getLog() << *m_program;
	if (!m_program->isOk())
		throw tcu::TestError("could not build program");

// source

if (m_storage == STORAGE_BUFFER)
	{
		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
		const int				bufferSize		= bufferElements * sizeof(deUint32);
		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);

m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;

gl.genBuffers(1, &m_storageBuf);
		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
	}
	else if (m_storage == STORAGE_IMAGE)
	{
		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
		const int				bufferSize		= bufferElements * sizeof(deUint32);

m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;

gl.genTextures(1, &m_storageTex);
		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");

// Zero-fill
		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;

{
			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
		}
	}
	else
		DE_ASSERT(DE_FALSE);

// destination

{
		const int				bufferElements	= m_workWidth * m_workHeight;
		const int				bufferSize		= bufferElements * sizeof(deUint32);
		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);

m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;

gl.genBuffers(1, &m_resultBuf);
		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
	}
}

void InterInvocationTestCase::deinit (void)
{
	if (m_storageBuf)
	{
		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
		m_storageBuf = DE_NULL;
	}

if (m_storageTex)
	{
		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
		m_storageTex = DE_NULL;
	}

if (m_resultBuf)
	{
		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
		m_resultBuf = DE_NULL;
	}

delete m_program;
	m_program = DE_NULL;
}

InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
{
	// Dispatch
	runCompute();

// Verify buffer contents
	if (verifyResults())
		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
	else
		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());

return STOP;
}

void InterInvocationTestCase::runCompute (void)
{
	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
	const int				groupsX	= m_workWidth / m_localWidth;
	const int				groupsY	= m_workHeight / m_localHeight;

DE_ASSERT((m_workWidth % m_localWidth) == 0);
	DE_ASSERT((m_workHeight % m_localHeight) == 0);

m_testCtx.getLog()
		<< tcu::TestLog::Message
		<< "Dispatching compute.\n"
		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
		<< tcu::TestLog::EndMessage;

gl.useProgram(m_program->getProgram());

// source
	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
	{
		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
	}
	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
	{
		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");

m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
	}
	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
	{
		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
	}
	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
	{
		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);

GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");

m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
	}
	else
		DE_ASSERT(DE_FALSE);

// destination
	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");

// dispatch
	gl.dispatchCompute(groupsX, groupsY, 1);
	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
}

bool InterInvocationTestCase::verifyResults (void)
{
	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
	const int				errorFloodThreshold	= 5;
	int						numErrorsLogged		= 0;
	const void*				mapped				= DE_NULL;
	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
	bool					error				= false;

gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");

// copy to properly aligned array
	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));

if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
		throw tcu::TestError("memory map store corrupted");

// check the results
	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
	{
		if (results[ndx] != 1)
		{
			error = true;

if (numErrorsLogged == 0)
				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
			if (numErrorsLogged++ < errorFloodThreshold)
				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
			else
			{
				// after N errors, no point continuing verification
				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
				break;
			}
		}
	}

if (!error)
		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
	return !error;
}

std::string InterInvocationTestCase::genBarrierSource (void) const
{
	std::ostringstream buf;

if (m_syncWithGroup)
	{
		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
		//       we only require intra-workgroup synchronization.
		buf << "\n"
			<< "	groupMemoryBarrier();\n"
			<< "	barrier();\n"
			<< "\n";
	}
	else if (m_storage == STORAGE_BUFFER)
	{
		DE_ASSERT(!m_syncWithGroup);

// Waiting only for data written by this invocation. Since all buffer reads and writes are
		// processed in order (within a single invocation), we don't have to do anything.
		buf << "\n";
	}
	else if (m_storage == STORAGE_IMAGE)
	{
		DE_ASSERT(!m_syncWithGroup);

// Waiting only for data written by this invocation. But since operations complete in undefined
		// order, we have to wait for them to complete.
		buf << "\n"
			<< "	memoryBarrierImage();\n"
			<< "\n";
	}
	else
		DE_ASSERT(DE_FALSE);

return buf.str();
}

class InvocationBasicCase : public InterInvocationTestCase
{
public:
							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
private:
	std::string				genShaderSource			(void) const;
	virtual std::string		genShaderMainBlock		(void) const = 0;
};

InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
	: InterInvocationTestCase(context, name, desc, storage, flags)
{
}

std::string InvocationBasicCase::genShaderSource (void) const
{
	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
	std::ostringstream	buf;

buf << "#version 310 es\n"
		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
		<< "layout(binding=0, std430) buffer Output\n"
		<< "{\n"
		<< "	highp int values[];\n"
		<< "} sb_result;\n";

if (m_storage == STORAGE_BUFFER)
		buf << "layout(binding=1, std430) coherent buffer Storage\n"
			<< "{\n"
			<< "	highp int values[];\n"
			<< "} sb_store;\n"
			<< "\n"
			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
			<< "{\n"
			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
			<< "}\n";
	else if (m_storage == STORAGE_IMAGE)
		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
			<< "\n"
			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
			<< "{\n"
			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
			<< "}\n";
	else
		DE_ASSERT(DE_FALSE);

buf << "\n"
		<< "void main (void)\n"
		<< "{\n"
		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
		<< "	bool allOk      = true;\n"
		<< "\n"
		<< genShaderMainBlock()
		<< "\n"
		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
		<< "}\n";

return buf.str();
}

class InvocationWriteReadCase : public InvocationBasicCase
{
public:
					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
private:
	std::string		genShaderMainBlock			(void) const;
};

InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
	: InvocationBasicCase(context, name, desc, storage, flags)
{
}

std::string InvocationWriteReadCase::genShaderMainBlock (void) const
{
	std::ostringstream buf;

// write

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
		else
			DE_ASSERT(DE_FALSE);
	}

// barrier

buf << genBarrierSource();

// read

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");

if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
		else
			DE_ASSERT(DE_FALSE);
	}

return buf.str();
}

class InvocationReadWriteCase : public InvocationBasicCase
{
public:
					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
private:
	std::string		genShaderMainBlock			(void) const;
};

InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
	: InvocationBasicCase(context, name, desc, storage, flags)
{
}

std::string InvocationReadWriteCase::genShaderMainBlock (void) const
{
	std::ostringstream buf;

// read

if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
		else
			DE_ASSERT(DE_FALSE);
	}

// barrier

buf << genBarrierSource();

// write

return buf.str();
}

class InvocationOverWriteCase : public InvocationBasicCase
{
public:
					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
private:
	std::string		genShaderMainBlock			(void) const;
};

InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
	: InvocationBasicCase(context, name, desc, storage, flags)
{
}

std::string InvocationOverWriteCase::genShaderMainBlock (void) const
{
	std::ostringstream buf;

// write

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
		else
			DE_ASSERT(DE_FALSE);
	}

// barrier

buf << genBarrierSource();

// write over

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		// write another invocation's value or our own value depending on test type
		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");

if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
		else
			DE_ASSERT(DE_FALSE);
	}

// barrier

buf << genBarrierSource();

// read

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		// check another invocation's value or our own value depending on test type
		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");

if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
		else
			DE_ASSERT(DE_FALSE);
	}

return buf.str();
}

class InvocationAliasWriteCase : public InterInvocationTestCase
{
public:
	enum TestType
	{
		TYPE_WRITE = 0,
		TYPE_OVERWRITE,

TYPE_LAST
	};

InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
private:
	std::string		genShaderSource				(void) const;

const TestType	m_type;
};

InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
	, m_type					(type)
{
	DE_ASSERT(type < TYPE_LAST);
}

std::string InvocationAliasWriteCase::genShaderSource (void) const
{
	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
	std::ostringstream	buf;

if (m_storage == STORAGE_BUFFER)
		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
			<< "{\n"
			<< "	highp int values[];\n"
			<< "} sb_store0;\n"
			<< "layout(binding=2, std430) coherent buffer Storage1\n"
			<< "{\n"
			<< "	highp int values[];\n"
			<< "} sb_store1;\n"
			<< "\n"
			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
			<< "{\n"
			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
			<< "}\n";
	else if (m_storage == STORAGE_IMAGE)
		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
			<< "\n"
			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
			<< "{\n"
			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
			<< "}\n";
	else
		DE_ASSERT(DE_FALSE);

if (m_type == TYPE_OVERWRITE)
	{
		// write

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
		{
			if (m_storage == STORAGE_BUFFER && m_useAtomic)
				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
			else
				DE_ASSERT(DE_FALSE);
		}

// barrier

buf << genBarrierSource();
	}
	else
		DE_ASSERT(m_type == TYPE_WRITE);

// write (again)

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");

if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
		else
			DE_ASSERT(DE_FALSE);
	}

// barrier

buf << genBarrierSource();

// read

for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
	{
		if (m_storage == STORAGE_BUFFER && m_useAtomic)
			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
		else
			DE_ASSERT(DE_FALSE);
	}

// return result

buf << "\n"
		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
		<< "}\n";

return buf.str();
}

namespace op
{

struct WriteData
{
	int targetHandle;
	int seed;

static WriteData Generate(int targetHandle, int seed)
	{
		WriteData retVal;

retVal.targetHandle = targetHandle;
		retVal.seed = seed;

return retVal;
	}
};

struct ReadData
{
	int targetHandle;
	int seed;

static ReadData Generate(int targetHandle, int seed)
	{
		ReadData retVal;

retVal.targetHandle = targetHandle;
		retVal.seed = seed;

return retVal;
	}
};

struct Barrier
{
};

struct WriteDataInterleaved
{
	int		targetHandle;
	int		seed;
	bool	evenOdd;

static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
	{
		WriteDataInterleaved retVal;

retVal.targetHandle = targetHandle;
		retVal.seed = seed;
		retVal.evenOdd = evenOdd;

return retVal;
	}
};

struct ReadDataInterleaved
{
	int targetHandle;
	int seed0;
	int seed1;

static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
	{
		ReadDataInterleaved retVal;

retVal.targetHandle = targetHandle;
		retVal.seed0 = seed0;
		retVal.seed1 = seed1;

return retVal;
	}
};

struct ReadMultipleData
{
	int targetHandle0;
	int seed0;
	int targetHandle1;
	int seed1;

static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
	{
		ReadMultipleData retVal;

retVal.targetHandle0 = targetHandle0;
		retVal.seed0 = seed0;
		retVal.targetHandle1 = targetHandle1;
		retVal.seed1 = seed1;

return retVal;
	}
};

struct ReadZeroData
{
	int targetHandle;

static ReadZeroData Generate(int targetHandle)
	{
		ReadZeroData retVal;

retVal.targetHandle = targetHandle;

return retVal;
	}
};

} // namespace op

class InterCallTestCase;

class InterCallOperations
{
public:
	InterCallOperations& operator<< (const op::WriteData&);
	InterCallOperations& operator<< (const op::ReadData&);
	InterCallOperations& operator<< (const op::Barrier&);
	InterCallOperations& operator<< (const op::ReadMultipleData&);
	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
	InterCallOperations& operator<< (const op::ReadZeroData&);

private:
	struct Command
	{
		enum CommandType
		{
			TYPE_WRITE = 0,
			TYPE_READ,
			TYPE_BARRIER,
			TYPE_READ_MULTIPLE,
			TYPE_WRITE_INTERLEAVE,
			TYPE_READ_INTERLEAVE,
			TYPE_READ_ZERO,

TYPE_LAST
		};

CommandType type;

union CommandUnion
		{
			op::WriteData				write;
			op::ReadData				read;
			op::Barrier					barrier;
			op::ReadMultipleData		readMulti;
			op::WriteDataInterleaved	writeInterleave;
			op::ReadDataInterleaved		readInterleave;
			op::ReadZeroData			readZero;
		} u_cmd;
	};

friend class InterCallTestCase;

std::vector<Command> m_cmds;
};

InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_WRITE;
	m_cmds.back().u_cmd.write = cmd;

return *this;
}

InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_READ;
	m_cmds.back().u_cmd.read = cmd;

return *this;
}

InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_BARRIER;
	m_cmds.back().u_cmd.barrier = cmd;

return *this;
}

InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
	m_cmds.back().u_cmd.readMulti = cmd;

return *this;
}

InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
	m_cmds.back().u_cmd.writeInterleave = cmd;

return *this;
}

InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
	m_cmds.back().u_cmd.readInterleave = cmd;

return *this;
}

InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
{
	m_cmds.push_back(Command());
	m_cmds.back().type = Command::TYPE_READ_ZERO;
	m_cmds.back().u_cmd.readZero = cmd;

return *this;
}

class InterCallTestCase : public TestCase
{
public:
	enum StorageType
	{
		STORAGE_BUFFER = 0,
		STORAGE_IMAGE,

STORAGE_LAST
	};
	enum Flags
	{
		FLAG_USE_ATOMIC	= 1,
		FLAG_USE_INT	= 2,
	};
													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
													~InterCallTestCase			(void);

private:
	void											init						(void);
	void											deinit						(void);
	IterateResult									iterate						(void);
	bool											verifyResults				(void);

void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
	void											runCommand					(const op::Barrier&);
	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);

glw::GLuint										genStorage					(int friendlyName);
	glw::GLuint										genResultStorage			(void);
	glu::ShaderProgram*								genWriteProgram				(int seed);
	glu::ShaderProgram*								genReadProgram				(int seed);
	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
	glu::ShaderProgram*								genReadZeroProgram			(void);

const StorageType								m_storage;
	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
	const std::vector<InterCallOperations::Command>	m_cmds;
	const bool										m_useAtomic;
	const bool										m_formatInteger;

std::vector<glu::ShaderProgram*>				m_operationPrograms;
	std::vector<glw::GLuint>						m_operationResultStorages;
	std::map<int, glw::GLuint>						m_storageIDs;
};

InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
	: TestCase					(context, name, desc)
	, m_storage					(storage)
	, m_invocationGridSize		(512)
	, m_perInvocationSize		(2)
	, m_cmds					(ops.m_cmds)
	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
{
}

InterCallTestCase::~InterCallTestCase (void)
{
	deinit();
}

void InterCallTestCase::init (void)
{
	int programFriendlyName = 0;

// requirements

// generate resources and validate command list

m_operationPrograms.resize(m_cmds.size(), DE_NULL);
	m_operationResultStorages.resize(m_cmds.size(), 0);

for (int step = 0; step < (int)m_cmds.size(); ++step)
	{
		switch (m_cmds[step].type)
		{
			case InterCallOperations::Command::TYPE_WRITE:
			{
				const op::WriteData& cmd = m_cmds[step].u_cmd.write;

// new storage handle?
				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);

// program
				{
					glu::ShaderProgram* program = genWriteProgram(cmd.seed);

m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
					m_testCtx.getLog() << *program;