/*-------------------------------------------------------------------------
* drawElements C++ Base Library
* -----------------------------
*
* Copyright 2015 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*//*!
* \file
* \brief Cross-thread barrier.
*//*--------------------------------------------------------------------*/
#include "deSpinBarrier.hpp"
#include "deThread.hpp"
#include "deRandom.hpp"
#include "deInt32.h"
#include <vector>
namespace de
{
SpinBarrier::SpinBarrier (deInt32 numThreads)
: m_numCores (deGetNumAvailableLogicalCores())
, m_numThreads (numThreads)
, m_numEntered (0)
, m_numLeaving (0)
, m_numRemoved (0)
{
DE_ASSERT(numThreads > 0);
}
SpinBarrier::~SpinBarrier (void)
{
DE_ASSERT(m_numEntered == 0 && m_numLeaving == 0);
}
void SpinBarrier::reset (deUint32 numThreads)
{
// If last threads were removed, m_numEntered > 0 && m_numRemoved > 0
DE_ASSERT(m_numLeaving == 0);
DE_ASSERT(numThreads > 0);
m_numThreads = numThreads;
m_numEntered = 0;
m_numLeaving = 0;
m_numRemoved = 0;
}
inline SpinBarrier::WaitMode getWaitMode (SpinBarrier::WaitMode requested, deUint32 numCores, deInt32 numThreads)
{
if (requested == SpinBarrier::WAIT_MODE_AUTO)
return ((deUint32)numThreads <= numCores) ? SpinBarrier::WAIT_MODE_BUSY : SpinBarrier::WAIT_MODE_YIELD;
else
return requested;
}
inline void wait (SpinBarrier::WaitMode mode)
{
DE_ASSERT(mode == SpinBarrier::WAIT_MODE_YIELD || mode == SpinBarrier::WAIT_MODE_BUSY);
if (mode == SpinBarrier::WAIT_MODE_YIELD)
deYield();
}
void SpinBarrier::sync (WaitMode requestedMode)
{
const deInt32 cachedNumThreads = m_numThreads;
const WaitMode waitMode = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
deMemoryReadWriteFence();
// m_numEntered must not be touched until all threads have had
// a chance to observe it being 0.
if (m_numLeaving > 0)
{
for (;;)
{
if (m_numLeaving == 0)
break;
wait(waitMode);
}
}
// If m_numRemoved > 0, m_numThreads will decrease. If m_numThreads is decreased
// just after atomicOp and before comparison, the branch could be taken by multiple
// threads. Since m_numThreads only changes if all threads are inside the spinbarrier,
// cached value at snapshotted at the beginning of the function will be equal for
// all threads.
if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
{
// Release all waiting threads. Since this thread has not been removed, m_numLeaving will
// be >= 1 until m_numLeaving is decremented at the end of this function.
m_numThreads -= m_numRemoved;
m_numLeaving = m_numThreads;
m_numRemoved = 0;
deMemoryReadWriteFence();
m_numEntered = 0;
}
else
{
for (;;)
{
if (m_numEntered == 0)
break;
wait(waitMode);
}
}
deAtomicDecrement32(&m_numLeaving);
deMemoryReadWriteFence();
}
void SpinBarrier::removeThread (WaitMode requestedMode)
{
const deInt32 cachedNumThreads = m_numThreads;
const WaitMode waitMode = getWaitMode(requestedMode, m_numCores, cachedNumThreads);
// Wait for other threads exiting previous barrier
if (m_numLeaving > 0)
{
for (;;)
{
if (m_numLeaving == 0)
break;
wait(waitMode);
}
}
// Ask for last thread entering barrier to adjust thread count
deAtomicIncrement32(&m_numRemoved);
// See sync() - use cached value
if (deAtomicIncrement32(&m_numEntered) == cachedNumThreads)
{
// Release all waiting threads.
m_numThreads -= m_numRemoved;
m_numLeaving = m_numThreads;
m_numRemoved = 0;
deMemoryReadWriteFence();
m_numEntered = 0;
}
}
namespace
{
void singleThreadTest (SpinBarrier::WaitMode mode)
{
SpinBarrier barrier(1);
barrier.sync(mode);
barrier.sync(mode);
barrier.sync(mode);
}
class TestThread : public de::Thread
{
public:
TestThread (SpinBarrier& barrier, volatile deInt32* sharedVar, int numThreads, int threadNdx)
: m_barrier (barrier)
, m_sharedVar (sharedVar)
, m_numThreads (numThreads)
, m_threadNdx (threadNdx)
, m_busyOk ((deUint32)m_numThreads <= deGetNumAvailableLogicalCores())
{
}
void run (void)
{
const int numIters = 10000;
de::Random rnd (deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx));
for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
{
// Phase 1: count up
deAtomicIncrement32(m_sharedVar);
// Verify
m_barrier.sync(getWaitMode(rnd));
DE_TEST_ASSERT(*m_sharedVar == m_numThreads);
m_barrier.sync(getWaitMode(rnd));
// Phase 2: count down
deAtomicDecrement32(m_sharedVar);
// Verify
m_barrier.sync(getWaitMode(rnd));
DE_TEST_ASSERT(*m_sharedVar == 0);
m_barrier.sync(getWaitMode(rnd));
}
}
private:
SpinBarrier& m_barrier;
volatile deInt32* const m_sharedVar;
const int m_numThreads;
const int m_threadNdx;
const bool m_busyOk;
SpinBarrier::WaitMode getWaitMode (de::Random& rnd)
{
static const SpinBarrier::WaitMode s_allModes[] =
{
SpinBarrier::WAIT_MODE_YIELD,
SpinBarrier::WAIT_MODE_AUTO,
SpinBarrier::WAIT_MODE_BUSY,
};
const int numModes = DE_LENGTH_OF_ARRAY(s_allModes) - (m_busyOk ? 0 : 1);
return rnd.choose<SpinBarrier::WaitMode>(DE_ARRAY_BEGIN(s_allModes), DE_ARRAY_BEGIN(s_allModes) + numModes);
}
};
void multiThreadTest (int numThreads)
{
SpinBarrier barrier (numThreads);
volatile deInt32 sharedVar = 0;
std::vector<TestThread*> threads (numThreads, static_cast<TestThread*>(DE_NULL));
for (int ndx = 0; ndx < numThreads; ndx++)
{
threads[ndx] = new TestThread(barrier, &sharedVar, numThreads, ndx);
DE_TEST_ASSERT(threads[ndx]);
threads[ndx]->start();
}
for (int ndx = 0; ndx < numThreads; ndx++)
{
threads[ndx]->join();
delete threads[ndx];
}
DE_TEST_ASSERT(sharedVar == 0);
}
void singleThreadRemoveTest (SpinBarrier::WaitMode mode)
{
SpinBarrier barrier(3);
barrier.removeThread(mode);
barrier.removeThread(mode);
barrier.sync(mode);
barrier.removeThread(mode);
barrier.reset(1);
barrier.sync(mode);
barrier.reset(2);
barrier.removeThread(mode);
barrier.sync(mode);
}
class TestExitThread : public de::Thread
{
public:
TestExitThread (SpinBarrier& barrier, int numThreads, int threadNdx, SpinBarrier::WaitMode waitMode)
: m_barrier (barrier)
, m_numThreads (numThreads)
, m_threadNdx (threadNdx)
, m_waitMode (waitMode)
{
}
void run (void)
{
const int numIters = 10000;
de::Random rnd (deInt32Hash(m_numThreads) ^ deInt32Hash(m_threadNdx) ^ deInt32Hash((deInt32)m_waitMode));
const int invExitProb = 1000;
for (int iterNdx = 0; iterNdx < numIters; iterNdx++)
{
if (rnd.getInt(0, invExitProb) == 0)
{
m_barrier.removeThread(m_waitMode);
break;
}
else
m_barrier.sync(m_waitMode);
}
}
private:
SpinBarrier& m_barrier;
const int m_numThreads;
const int m_threadNdx;
const SpinBarrier::WaitMode m_waitMode;
};
void multiThreadRemoveTest (int numThreads, SpinBarrier::WaitMode waitMode)
{
SpinBarrier barrier (numThreads);
std::vector<TestExitThread*> threads (numThreads, static_cast<TestExitThread*>(DE_NULL));
for (int ndx = 0; ndx < numThreads; ndx++)
{
threads[ndx] = new TestExitThread(barrier, numThreads, ndx, waitMode);
DE_TEST_ASSERT(threads[ndx]);
threads[ndx]->start();
}
for (int ndx = 0; ndx < numThreads; ndx++)
{
threads[ndx]->join();
delete threads[ndx];
}
}
} // anonymous
void SpinBarrier_selfTest (void)
{
singleThreadTest(SpinBarrier::WAIT_MODE_YIELD);
singleThreadTest(SpinBarrier::WAIT_MODE_BUSY);
singleThreadTest(SpinBarrier::WAIT_MODE_AUTO);
multiThreadTest(1);
multiThreadTest(2);
multiThreadTest(4);
multiThreadTest(8);
multiThreadTest(16);
singleThreadRemoveTest(SpinBarrier::WAIT_MODE_YIELD);
singleThreadRemoveTest(SpinBarrier::WAIT_MODE_BUSY);
singleThreadRemoveTest(SpinBarrier::WAIT_MODE_AUTO);
multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_BUSY);
multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_AUTO);
multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_AUTO);
multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_AUTO);
multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_AUTO);
multiThreadRemoveTest(1, SpinBarrier::WAIT_MODE_YIELD);
multiThreadRemoveTest(2, SpinBarrier::WAIT_MODE_YIELD);
multiThreadRemoveTest(4, SpinBarrier::WAIT_MODE_YIELD);
multiThreadRemoveTest(8, SpinBarrier::WAIT_MODE_YIELD);
multiThreadRemoveTest(16, SpinBarrier::WAIT_MODE_YIELD);
}
} // de