// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "Renderer.hpp" #include "Clipper.hpp" #include "Surface.hpp" #include "Primitive.hpp" #include "Polygon.hpp" #include "Main/FrameBuffer.hpp" #include "Main/SwiftConfig.hpp" #include "Reactor/Reactor.hpp" #include "Shader/Constants.hpp" #include "Common/MutexLock.hpp" #include "Common/CPUID.hpp" #include "Common/Memory.hpp" #include "Common/Resource.hpp" #include "Common/Half.hpp" #include "Common/Math.hpp" #include "Common/Timer.hpp" #include "Common/Debug.hpp" #undef max bool disableServer = true; #ifndef NDEBUG unsigned int minPrimitives = 1; unsigned int maxPrimitives = 1 << 21; #endif namespace sw { extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] extern bool booleanFaceRegister; extern bool fullPixelPositionRegister; extern bool leadingVertexFirst; // Flat shading uses first vertex, else last extern bool secondaryColor; // Specular lighting is applied after texturing extern bool colorsDefaultToZero; extern bool forceWindowed; extern bool complementaryDepthBuffer; extern bool postBlendSRGB; extern bool exactColorRounding; extern TransparencyAntialiasing transparencyAntialiasing; extern bool forceClearRegisters; extern bool precacheVertex; extern bool precacheSetup; extern bool precachePixel; static const int batchSize = 128; AtomicInt threadCount(1); AtomicInt Renderer::unitCount(1); AtomicInt Renderer::clusterCount(1); TranscendentalPrecision logPrecision = ACCURATE; TranscendentalPrecision expPrecision = ACCURATE; TranscendentalPrecision rcpPrecision = ACCURATE; TranscendentalPrecision rsqPrecision = ACCURATE; bool perspectiveCorrection = true; static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding) { static bool initialized = false; if(!initialized) { sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; sw::booleanFaceRegister = conventions.booleanFaceRegister; sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; sw::leadingVertexFirst = conventions.leadingVertexFirst; sw::secondaryColor = conventions.secondaryColor; sw::colorsDefaultToZero = conventions.colorsDefaultToZero; sw::exactColorRounding = exactColorRounding; initialized = true; } } struct Parameters { Renderer *renderer; int threadIndex; }; Query::Query(Type type) : building(false), data(0), type(type), reference(1) { } void Query::addRef() { ++reference; // Atomic } void Query::release() { int ref = reference--; // Atomic ASSERT(ref >= 0); if(ref == 0) { delete this; } } DrawCall::DrawCall() { queries = 0; vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; vsDirtyConstI = 16; vsDirtyConstB = 16; psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; psDirtyConstI = 16; psDirtyConstB = 16; references = -1; data = (DrawData*)allocate(sizeof(DrawData)); data->constants = &constants; } DrawCall::~DrawCall() { delete queries; deallocate(data); } Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() { setGlobalRenderingSettings(conventions, exactColorRounding); setRenderTarget(0, 0); clipper = new Clipper(symmetricNormalizedDepth); blitter = new Blitter; updateViewMatrix = true; updateBaseMatrix = true; updateProjectionMatrix = true; updateClipPlanes = true; #if PERF_HUD resetTimers(); #endif for(int i = 0; i < 16; i++) { vertexTask[i] = 0; worker[i] = 0; resume[i] = 0; suspend[i] = 0; } threadsAwake = 0; resumeApp = new Event(); currentDraw = 0; nextDraw = 0; qHead = 0; qSize = 0; for(int i = 0; i < 16; i++) { triangleBatch[i] = 0; primitiveBatch[i] = 0; } for(int draw = 0; draw < DRAW_COUNT; draw++) { drawCall[draw] = new DrawCall(); drawList[draw] = drawCall[draw]; } for(int unit = 0; unit < 16; unit++) { primitiveProgress[unit].init(); } for(int cluster = 0; cluster < 16; cluster++) { pixelProgress[cluster].init(); } clipFlags = 0; swiftConfig = new SwiftConfig(disableServer); updateConfiguration(true); sync = new Resource(0); } Renderer::~Renderer() { sync->destruct(); delete clipper; clipper = nullptr; delete blitter; blitter = nullptr; terminateThreads(); delete resumeApp; for(int draw = 0; draw < DRAW_COUNT; draw++) { delete drawCall[draw]; } delete swiftConfig; } // This object has to be mem aligned void* Renderer::operator new(size_t size) { ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class return sw::allocate(sizeof(Renderer), 16); } void Renderer::operator delete(void * mem) { sw::deallocate(mem); } void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) { #ifndef NDEBUG if(count < minPrimitives || count > maxPrimitives) { return; } #endif context->drawType = drawType; updateConfiguration(); updateClipper(); int ss = context->getSuperSampleCount(); int ms = context->getMultiSampleCount(); bool requiresSync = false; for(int q = 0; q < ss; q++) { unsigned int oldMultiSampleMask = context->multiSampleMask; context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); if(!context->multiSampleMask) { continue; } sync->lock(sw::PRIVATE); if(update || oldMultiSampleMask != context->multiSampleMask) { vertexState = VertexProcessor::update(drawType); setupState = SetupProcessor::update(); pixelState = PixelProcessor::update(); vertexRoutine = VertexProcessor::routine(vertexState); setupRoutine = SetupProcessor::routine(setupState); pixelRoutine = PixelProcessor::routine(pixelState); } int batch = batchSize / ms; int (Renderer::*setupPrimitives)(int batch, int count); if(context->isDrawTriangle()) { switch(context->fillMode) { case FILL_SOLID: setupPrimitives = &Renderer::setupSolidTriangles; break; case FILL_WIREFRAME: setupPrimitives = &Renderer::setupWireframeTriangle; batch = 1; break; case FILL_VERTEX: setupPrimitives = &Renderer::setupVertexTriangle; batch = 1; break; default: ASSERT(false); return; } } else if(context->isDrawLine()) { setupPrimitives = &Renderer::setupLines; } else // Point draw { setupPrimitives = &Renderer::setupPoints; } DrawCall *draw = nullptr; do { for(int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->references == -1) { draw = drawCall[i]; drawList[nextDraw & DRAW_COUNT_BITS] = draw; break; } } if(!draw) { resumeApp->wait(); } } while(!draw); DrawData *data = draw->data; if(queries.size() != 0) { draw->queries = new std::list<Query*>(); bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; for(auto &query : queries) { if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) { query->addRef(); draw->queries->push_back(query); } } } draw->drawType = drawType; draw->batchSize = batch; vertexRoutine->bind(); setupRoutine->bind(); pixelRoutine->bind(); draw->vertexRoutine = vertexRoutine; draw->setupRoutine = setupRoutine; draw->pixelRoutine = pixelRoutine; draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); draw->setupPrimitives = setupPrimitives; draw->setupState = setupState; for(int i = 0; i < MAX_VERTEX_INPUTS; i++) { draw->vertexStream[i] = context->input[i].resource; data->input[i] = context->input[i].buffer; data->stride[i] = context->input[i].stride; if(draw->vertexStream[i]) { draw->vertexStream[i]->lock(PUBLIC, PRIVATE); } } if(context->indexBuffer) { data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; } draw->indexBuffer = context->indexBuffer; for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) { draw->texture[sampler] = 0; } for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) { if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) { draw->texture[sampler] = context->texture[sampler]; draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets data->mipmap[sampler] = context->sampler[sampler].getTextureData(); requiresSync |= context->sampler[sampler].requiresSync(); } } if(context->pixelShader) { if(draw->psDirtyConstF) { memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); draw->psDirtyConstF = 0; } if(draw->psDirtyConstI) { memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); draw->psDirtyConstI = 0; } if(draw->psDirtyConstB) { memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); draw->psDirtyConstB = 0; } PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); } else { for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) { draw->pUniformBuffers[i] = nullptr; } } if(context->pixelShaderModel() <= 0x0104) { for(int stage = 0; stage < 8; stage++) { if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) { data->textureStage[stage] = context->textureStage[stage].uniforms; } else break; } } if(context->vertexShader) { if(context->vertexShader->getShaderModel() >= 0x0300) { for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) { if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) { draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync(); } } } if(draw->vsDirtyConstF) { memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); draw->vsDirtyConstF = 0; } if(draw->vsDirtyConstI) { memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); draw->vsDirtyConstI = 0; } if(draw->vsDirtyConstB) { memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); draw->vsDirtyConstB = 0; } if(context->vertexShader->isInstanceIdDeclared()) { data->instanceID = context->instanceID; } VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); } else { data->ff = ff; draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; draw->vsDirtyConstI = 16; draw->vsDirtyConstB = 16; for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) { draw->vUniformBuffers[i] = nullptr; } for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) { draw->transformFeedbackBuffers[i] = nullptr; } } if(pixelState.stencilActive) { data->stencil[0] = stencil; data->stencil[1] = stencilCCW; } if(pixelState.fogActive) { data->fog = fog; } if(setupState.isDrawPoint) { data->point = point; } data->lineWidth = context->lineWidth; data->factor = factor; if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) { float ref = context->alphaReference * (1.0f / 255.0f); float margin = sw::min(ref, 1.0f - ref); if(ms == 4) { data->a2c0 = replicate(ref - margin * 0.6f); data->a2c1 = replicate(ref - margin * 0.2f); data->a2c2 = replicate(ref + margin * 0.2f); data->a2c3 = replicate(ref + margin * 0.6f); } else if(ms == 2) { data->a2c0 = replicate(ref - margin * 0.3f); data->a2c1 = replicate(ref + margin * 0.3f); } else ASSERT(false); } if(pixelState.occlusionEnabled) { for(int cluster = 0; cluster < clusterCount; cluster++) { data->occlusion[cluster] = 0; } } #if PERF_PROFILE for(int cluster = 0; cluster < clusterCount; cluster++) { for(int i = 0; i < PERF_TIMERS; i++) { data->cycles[i][cluster] = 0; } } #endif // Viewport { float W = 0.5f * viewport.width; float H = 0.5f * viewport.height; float X0 = viewport.x0 + W; float Y0 = viewport.y0 + H; float N = viewport.minZ; float F = viewport.maxZ; float Z = F - N; if(context->isDrawTriangle(false)) { N += context->depthBias; } if(complementaryDepthBuffer) { Z = -Z; N = 1 - N; } static const float X[5][16] = // Fragment offsets { {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples }; static const float Y[5][16] = // Fragment offsets { {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples }; int s = sw::log2(ss); data->Wx16 = replicate(W * 16); data->Hx16 = replicate(H * 16); data->X0x16 = replicate(X0 * 16 - 8); data->Y0x16 = replicate(Y0 * 16 - 8); data->XXXX = replicate(X[s][q] / W); data->YYYY = replicate(Y[s][q] / H); data->halfPixelX = replicate(0.5f / W); data->halfPixelY = replicate(0.5f / H); data->viewportHeight = abs(viewport.height); data->slopeDepthBias = context->slopeDepthBias; data->depthRange = Z; data->depthNear = N; draw->clipFlags = clipFlags; if(clipFlags) { if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; } } // Target { for(int index = 0; index < RENDERTARGETS; index++) { draw->renderTarget[index] = context->renderTarget[index]; if(draw->renderTarget[index]) { unsigned int layer = context->renderTargetLayer[index]; requiresSync |= context->renderTarget[index]->requiresSync(); data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true); data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); } } draw->depthBuffer = context->depthBuffer; draw->stencilBuffer = context->stencilBuffer; if(draw->depthBuffer) { unsigned int layer = context->depthBufferLayer; requiresSync |= context->depthBuffer->requiresSync(); data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true); data->depthPitchB = context->depthBuffer->getInternalPitchB(); data->depthSliceB = context->depthBuffer->getInternalSliceB(); } if(draw->stencilBuffer) { unsigned int layer = context->stencilBufferLayer; requiresSync |= context->stencilBuffer->requiresSync(); data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true); data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); } } // Scissor { data->scissorX0 = scissor.x0; data->scissorX1 = scissor.x1; data->scissorY0 = scissor.y0; data->scissorY1 = scissor.y1; } draw->primitive = 0; draw->count = count; draw->references = (count + batch - 1) / batch; schedulerMutex.lock(); ++nextDraw; // Atomic schedulerMutex.unlock(); #ifndef NDEBUG if(threadCount == 1) // Use main thread for draw execution { threadsAwake = 1; task[0].type = Task::RESUME; taskLoop(0); } else #endif { if(!threadsAwake) { suspend[0]->wait(); threadsAwake = 1; task[0].type = Task::RESUME; resume[0]->signal(); } } } // TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization. if(requiresSync) { synchronize(); } } void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) { blitter->clear(value, format, dest, clearRect, rgbaMask); } void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) { blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); } void Renderer::blit3D(Surface *source, Surface *dest) { blitter->blit3D(source, dest); } void Renderer::threadFunction(void *parameters) { Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; if(logPrecision < IEEE) { CPUID::setFlushToZero(true); CPUID::setDenormalsAreZero(true); } renderer->threadLoop(threadIndex); } void Renderer::threadLoop(int threadIndex) { while(!exitThreads) { taskLoop(threadIndex); suspend[threadIndex]->signal(); resume[threadIndex]->wait(); } } void Renderer::taskLoop(int threadIndex) { while(task[threadIndex].type != Task::SUSPEND) { scheduleTask(threadIndex); executeTask(threadIndex); } } void Renderer::findAvailableTasks() { // Find pixel tasks for(int cluster = 0; cluster < clusterCount; cluster++) { if(!pixelProgress[cluster].executing) { for(int unit = 0; unit < unitCount; unit++) { if(primitiveProgress[unit].references > 0) // Contains processed primitives { if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) { if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered { Task &task = taskQueue[qHead]; task.type = Task::PIXELS; task.primitiveUnit = unit; task.pixelCluster = cluster; pixelProgress[cluster].executing = true; // Commit to the task queue qHead = (qHead + 1) & TASK_COUNT_BITS; qSize++; break; } } } } } } // Find primitive tasks if(currentDraw == nextDraw) { return; // No more primitives to process } for(int unit = 0; unit < unitCount; unit++) { DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; int primitive = draw->primitive; int count = draw->count; if(primitive >= count) { ++currentDraw; // Atomic if(currentDraw == nextDraw) { return; // No more primitives to process } draw = drawList[currentDraw & DRAW_COUNT_BITS]; } if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit { primitive = draw->primitive; count = draw->count; int batch = draw->batchSize; primitiveProgress[unit].drawCall = currentDraw; primitiveProgress[unit].firstPrimitive = primitive; primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; draw->primitive += batch; Task &task = taskQueue[qHead]; task.type = Task::PRIMITIVES; task.primitiveUnit = unit; primitiveProgress[unit].references = -1; // Commit to the task queue qHead = (qHead + 1) & TASK_COUNT_BITS; qSize++; } } } void Renderer::scheduleTask(int threadIndex) { schedulerMutex.lock(); int curThreadsAwake = threadsAwake; if((int)qSize < threadCount - curThreadsAwake + 1) { findAvailableTasks(); } if(qSize != 0) { task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; qSize--; if(curThreadsAwake != threadCount) { int wakeup = qSize - curThreadsAwake + 1; for(int i = 0; i < threadCount && wakeup > 0; i++) { if(task[i].type == Task::SUSPEND) { suspend[i]->wait(); task[i].type = Task::RESUME; resume[i]->signal(); ++threadsAwake; // Atomic wakeup--; } } } } else { task[threadIndex].type = Task::SUSPEND; --threadsAwake; // Atomic } schedulerMutex.unlock(); } void Renderer::executeTask(int threadIndex) { #if PERF_HUD int64_t startTick = Timer::ticks(); #endif switch(task[threadIndex].type) { case Task::PRIMITIVES: { int unit = task[threadIndex].primitiveUnit; int input = primitiveProgress[unit].firstPrimitive; int count = primitiveProgress[unit].primitiveCount; DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; processPrimitiveVertices(unit, input, count, draw->count, threadIndex); #if PERF_HUD int64_t time = Timer::ticks(); vertexTime[threadIndex] += time - startTick; startTick = time; #endif int visible = 0; if(!draw->setupState.rasterizerDiscard) { visible = (this->*setupPrimitives)(unit, count); } primitiveProgress[unit].visible = visible; primitiveProgress[unit].references = clusterCount; #if PERF_HUD setupTime[threadIndex] += Timer::ticks() - startTick; #endif } break; case Task::PIXELS: { int unit = task[threadIndex].primitiveUnit; int visible = primitiveProgress[unit].visible; if(visible > 0) { int cluster = task[threadIndex].pixelCluster; Primitive *primitive = primitiveBatch[unit]; DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; DrawData *data = draw->data; PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; pixelRoutine(primitive, visible, cluster, data); } finishRendering(task[threadIndex]); #if PERF_HUD pixelTime[threadIndex] += Timer::ticks() - startTick; #endif } break; case Task::RESUME: break; case Task::SUSPEND: break; default: ASSERT(false); } } void Renderer::synchronize() { sync->lock(sw::PUBLIC); sync->unlock(); } void Renderer::finishRendering(Task &pixelTask) { int unit = pixelTask.primitiveUnit; int cluster = pixelTask.pixelCluster; DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; DrawData &data = *draw.data; int primitive = primitiveProgress[unit].firstPrimitive; int count = primitiveProgress[unit].primitiveCount; int processedPrimitives = primitive + count; pixelProgress[cluster].processedPrimitives = processedPrimitives; if(pixelProgress[cluster].processedPrimitives >= draw.count) { ++pixelProgress[cluster].drawCall; // Atomic pixelProgress[cluster].processedPrimitives = 0; } int ref = primitiveProgress[unit].references--; // Atomic if(ref == 0) { ref = draw.references--; // Atomic if(ref == 0) { #if PERF_PROFILE for(int cluster = 0; cluster < clusterCount; cluster++) { for(int i = 0; i < PERF_TIMERS; i++) { profiler.cycles[i] += data.cycles[i][cluster]; } } #endif if(draw.queries) { for(auto &query : *(draw.queries)) { switch(query->type) { case Query::FRAGMENTS_PASSED: for(int cluster = 0; cluster < clusterCount; cluster++) { query->data += data.occlusion[cluster]; } break; case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: query->data += processedPrimitives; break; default: break; } query->release(); } delete draw.queries; draw.queries = 0; } for(int i = 0; i < RENDERTARGETS; i++) { if(draw.renderTarget[i]) { draw.renderTarget[i]->unlockInternal(); } } if(draw.depthBuffer) { draw.depthBuffer->unlockInternal(); } if(draw.stencilBuffer) { draw.stencilBuffer->unlockStencil(); } for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) { if(draw.texture[i]) { draw.texture[i]->unlock(); } } for(int i = 0; i < MAX_VERTEX_INPUTS; i++) { if(draw.vertexStream[i]) { draw.vertexStream[i]->unlock(); } } if(draw.indexBuffer) { draw.indexBuffer->unlock(); } for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) { if(draw.pUniformBuffers[i]) { draw.pUniformBuffers[i]->unlock(); } if(draw.vUniformBuffers[i]) { draw.vUniformBuffers[i]->unlock(); } } for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) { if(draw.transformFeedbackBuffers[i]) { draw.transformFeedbackBuffers[i]->unlock(); } } draw.vertexRoutine->unbind(); draw.setupRoutine->unbind(); draw.pixelRoutine->unbind(); sync->unlock(); draw.references = -1; resumeApp->signal(); } } pixelProgress[cluster].executing = false; } void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) { Triangle *triangle = triangleBatch[unit]; int primitiveDrawCall = primitiveProgress[unit].drawCall; DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; DrawData *data = draw->data; VertexTask *task = vertexTask[thread]; const void *indices = data->indices; VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; if(task->vertexCache.drawCall != primitiveDrawCall) { task->vertexCache.clear(); task->vertexCache.drawCall = primitiveDrawCall; } unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size switch(draw->drawType) { case DRAW_POINTLIST: { unsigned int index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index; batch[i][1] = index; batch[i][2] = index; index += 1; } } break; case DRAW_LINELIST: { unsigned int index = 2 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index + 0; batch[i][1] = index + 1; batch[i][2] = index + 1; index += 2; } } break; case DRAW_LINESTRIP: { unsigned int index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index + 0; batch[i][1] = index + 1; batch[i][2] = index + 1; index += 1; } } break; case DRAW_LINELOOP: { unsigned int index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = (index + 0) % loop; batch[i][1] = (index + 1) % loop; batch[i][2] = (index + 1) % loop; index += 1; } } break; case DRAW_TRIANGLELIST: { unsigned int index = 3 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index + 0; batch[i][1] = index + 1; batch[i][2] = index + 2; index += 3; } } break; case DRAW_TRIANGLESTRIP: { unsigned int index = start; for(unsigned int i = 0; i < triangleCount; i++) { if(leadingVertexFirst) { batch[i][0] = index + 0; batch[i][1] = index + (index & 1) + 1; batch[i][2] = index + (~index & 1) + 1; } else { batch[i][0] = index + (index & 1); batch[i][1] = index + (~index & 1); batch[i][2] = index + 2; } index += 1; } } break; case DRAW_TRIANGLEFAN: { unsigned int index = start; for(unsigned int i = 0; i < triangleCount; i++) { if(leadingVertexFirst) { batch[i][0] = index + 1; batch[i][1] = index + 2; batch[i][2] = 0; } else { batch[i][0] = 0; batch[i][1] = index + 1; batch[i][2] = index + 2; } index += 1; } } break; case DRAW_INDEXEDPOINTLIST8: { const unsigned char *index = (const unsigned char*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = *index; batch[i][1] = *index; batch[i][2] = *index; index += 1; } } break; case DRAW_INDEXEDPOINTLIST16: { const unsigned short *index = (const unsigned short*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = *index; batch[i][1] = *index; batch[i][2] = *index; index += 1; } } break; case DRAW_INDEXEDPOINTLIST32: { const unsigned int *index = (const unsigned int*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = *index; batch[i][1] = *index; batch[i][2] = *index; index += 1; } } break; case DRAW_INDEXEDLINELIST8: { const unsigned char *index = (const unsigned char*)indices + 2 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[1]; index += 2; } } break; case DRAW_INDEXEDLINELIST16: { const unsigned short *index = (const unsigned short*)indices + 2 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[1]; index += 2; } } break; case DRAW_INDEXEDLINELIST32: { const unsigned int *index = (const unsigned int*)indices + 2 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[1]; index += 2; } } break; case DRAW_INDEXEDLINESTRIP8: { const unsigned char *index = (const unsigned char*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[1]; index += 1; } } break; case DRAW_INDEXEDLINESTRIP16: { const unsigned short *index = (const unsigned short*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[1]; index += 1; } } break; case DRAW_INDEXEDLINESTRIP32: { const unsigned int *index = (const unsigned int*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[1]; index += 1; } } break; case DRAW_INDEXEDLINELOOP8: { const unsigned char *index = (const unsigned char*)indices; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[(start + i + 0) % loop]; batch[i][1] = index[(start + i + 1) % loop]; batch[i][2] = index[(start + i + 1) % loop]; } } break; case DRAW_INDEXEDLINELOOP16: { const unsigned short *index = (const unsigned short*)indices; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[(start + i + 0) % loop]; batch[i][1] = index[(start + i + 1) % loop]; batch[i][2] = index[(start + i + 1) % loop]; } } break; case DRAW_INDEXEDLINELOOP32: { const unsigned int *index = (const unsigned int*)indices; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[(start + i + 0) % loop]; batch[i][1] = index[(start + i + 1) % loop]; batch[i][2] = index[(start + i + 1) % loop]; } } break; case DRAW_INDEXEDTRIANGLELIST8: { const unsigned char *index = (const unsigned char*)indices + 3 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[2]; index += 3; } } break; case DRAW_INDEXEDTRIANGLELIST16: { const unsigned short *index = (const unsigned short*)indices + 3 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[2]; index += 3; } } break; case DRAW_INDEXEDTRIANGLELIST32: { const unsigned int *index = (const unsigned int*)indices + 3 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[1]; batch[i][2] = index[2]; index += 3; } } break; case DRAW_INDEXEDTRIANGLESTRIP8: { const unsigned char *index = (const unsigned char*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[((start + i) & 1) + 1]; batch[i][2] = index[(~(start + i) & 1) + 1]; index += 1; } } break; case DRAW_INDEXEDTRIANGLESTRIP16: { const unsigned short *index = (const unsigned short*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[((start + i) & 1) + 1]; batch[i][2] = index[(~(start + i) & 1) + 1]; index += 1; } } break; case DRAW_INDEXEDTRIANGLESTRIP32: { const unsigned int *index = (const unsigned int*)indices + start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[0]; batch[i][1] = index[((start + i) & 1) + 1]; batch[i][2] = index[(~(start + i) & 1) + 1]; index += 1; } } break; case DRAW_INDEXEDTRIANGLEFAN8: { const unsigned char *index = (const unsigned char*)indices; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[start + i + 1]; batch[i][1] = index[start + i + 2]; batch[i][2] = index[0]; } } break; case DRAW_INDEXEDTRIANGLEFAN16: { const unsigned short *index = (const unsigned short*)indices; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[start + i + 1]; batch[i][1] = index[start + i + 2]; batch[i][2] = index[0]; } } break; case DRAW_INDEXEDTRIANGLEFAN32: { const unsigned int *index = (const unsigned int*)indices; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = index[start + i + 1]; batch[i][1] = index[start + i + 2]; batch[i][2] = index[0]; } } break; case DRAW_QUADLIST: { unsigned int index = 4 * start / 2; for(unsigned int i = 0; i < triangleCount; i += 2) { batch[i+0][0] = index + 0; batch[i+0][1] = index + 1; batch[i+0][2] = index + 2; batch[i+1][0] = index + 0; batch[i+1][1] = index + 2; batch[i+1][2] = index + 3; index += 4; } } break; default: ASSERT(false); return; } task->primitiveStart = start; task->vertexCount = triangleCount * 3; vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); } int Renderer::setupSolidTriangles(int unit, int count) { Triangle *triangle = triangleBatch[unit]; Primitive *primitive = primitiveBatch[unit]; DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; SetupProcessor::State &state = draw.setupState; const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; int ms = state.multiSample; int pos = state.positionRegister; const DrawData *data = draw.data; int visible = 0; for(int i = 0; i < count; i++, triangle++) { Vertex &v0 = triangle->v0; Vertex &v1 = triangle->v1; Vertex &v2 = triangle->v2; if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) { Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!clipper->clip(polygon, clipFlagsOr, draw)) { continue; } } if(setupRoutine(primitive, triangle, &polygon, data)) { primitive += ms; visible++; } } } return visible; } int Renderer::setupWireframeTriangle(int unit, int count) { Triangle *triangle = triangleBatch[unit]; Primitive *primitive = primitiveBatch[unit]; int visible = 0; DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; SetupProcessor::State &state = draw.setupState; const Vertex &v0 = triangle[0].v0; const Vertex &v1 = triangle[0].v1; const Vertex &v2 = triangle[0].v2; float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; if(state.cullMode == CULL_CLOCKWISE) { if(d >= 0) return 0; } else if(state.cullMode == CULL_COUNTERCLOCKWISE) { if(d <= 0) return 0; } // Copy attributes triangle[1].v0 = v1; triangle[1].v1 = v2; triangle[2].v0 = v2; triangle[2].v1 = v0; if(state.color[0][0].flat) // FIXME { for(int i = 0; i < 2; i++) { triangle[1].v0.C[i] = triangle[0].v0.C[i]; triangle[1].v1.C[i] = triangle[0].v0.C[i]; triangle[2].v0.C[i] = triangle[0].v0.C[i]; triangle[2].v1.C[i] = triangle[0].v0.C[i]; } } for(int i = 0; i < 3; i++) { if(setupLine(*primitive, *triangle, draw)) { primitive->area = 0.5f * d; primitive++; visible++; } triangle++; } return visible; } int Renderer::setupVertexTriangle(int unit, int count) { Triangle *triangle = triangleBatch[unit]; Primitive *primitive = primitiveBatch[unit]; int visible = 0; DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; SetupProcessor::State &state = draw.setupState; const Vertex &v0 = triangle[0].v0; const Vertex &v1 = triangle[0].v1; const Vertex &v2 = triangle[0].v2; float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; if(state.cullMode == CULL_CLOCKWISE) { if(d >= 0) return 0; } else if(state.cullMode == CULL_COUNTERCLOCKWISE) { if(d <= 0) return 0; } // Copy attributes triangle[1].v0 = v1; triangle[2].v0 = v2; for(int i = 0; i < 3; i++) { if(setupPoint(*primitive, *triangle, draw)) { primitive->area = 0.5f * d; primitive++; visible++; } triangle++; } return visible; } int Renderer::setupLines(int unit, int count) { Triangle *triangle = triangleBatch[unit]; Primitive *primitive = primitiveBatch[unit]; int visible = 0; DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; SetupProcessor::State &state = draw.setupState; int ms = state.multiSample; for(int i = 0; i < count; i++) { if(setupLine(*primitive, *triangle, draw)) { primitive += ms; visible++; } triangle++; } return visible; } int Renderer::setupPoints(int unit, int count) { Triangle *triangle = triangleBatch[unit]; Primitive *primitive = primitiveBatch[unit]; int visible = 0; DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; SetupProcessor::State &state = draw.setupState; int ms = state.multiSample; for(int i = 0; i < count; i++) { if(setupPoint(*primitive, *triangle, draw)) { primitive += ms; visible++; } triangle++; } return visible; } bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) { const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; const SetupProcessor::State &state = draw.setupState; const DrawData &data = *draw.data; float lineWidth = data.lineWidth; Vertex &v0 = triangle.v0; Vertex &v1 = triangle.v1; int pos = state.positionRegister; const float4 &P0 = v0.v[pos]; const float4 &P1 = v1.v[pos]; if(P0.w <= 0 && P1.w <= 0) { return false; } const float W = data.Wx16[0] * (1.0f / 16.0f); const float H = data.Hx16[0] * (1.0f / 16.0f); float dx = W * (P1.x / P1.w - P0.x / P0.w); float dy = H * (P1.y / P1.w - P0.y / P0.w); if(dx == 0 && dy == 0) { return false; } if(state.multiSample > 1) // Rectangle { float4 P[4]; int C[4]; P[0] = P0; P[1] = P1; P[2] = P1; P[3] = P0; float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); dx *= scale; dy *= scale; float dx0h = dx * P0.w / H; float dy0w = dy * P0.w / W; float dx1h = dx * P1.w / H; float dy1w = dy * P1.w / W; P[0].x += -dy0w; P[0].y += +dx0h; C[0] = clipper->computeClipFlags(P[0]); P[1].x += -dy1w; P[1].y += +dx1h; C[1] = clipper->computeClipFlags(P[1]); P[2].x += +dy1w; P[2].y += -dx1h; C[2] = clipper->computeClipFlags(P[2]); P[3].x += +dy0w; P[3].y += -dx0h; C[3] = clipper->computeClipFlags(P[3]); if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) { Polygon polygon(P, 4); int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!clipper->clip(polygon, clipFlagsOr, draw)) { return false; } } return setupRoutine(&primitive, &triangle, &polygon, &data); } } else // Diamond test convention { float4 P[8]; int C[8]; P[0] = P0; P[1] = P0; P[2] = P0; P[3] = P0; P[4] = P1; P[5] = P1; P[6] = P1; P[7] = P1; float dx0 = lineWidth * 0.5f * P0.w / W; float dy0 = lineWidth * 0.5f * P0.w / H; float dx1 = lineWidth * 0.5f * P1.w / W; float dy1 = lineWidth * 0.5f * P1.w / H; P[0].x += -dx0; C[0] = clipper->computeClipFlags(P[0]); P[1].y += +dy0; C[1] = clipper->computeClipFlags(P[1]); P[2].x += +dx0; C[2] = clipper->computeClipFlags(P[2]); P[3].y += -dy0; C[3] = clipper->computeClipFlags(P[3]); P[4].x += -dx1; C[4] = clipper->computeClipFlags(P[4]); P[5].y += +dy1; C[5] = clipper->computeClipFlags(P[5]); P[6].x += +dx1; C[6] = clipper->computeClipFlags(P[6]); P[7].y += -dy1; C[7] = clipper->computeClipFlags(P[7]); if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) { float4 L[6]; if(dx > -dy) { if(dx > dy) // Right { L[0] = P[0]; L[1] = P[1]; L[2] = P[5]; L[3] = P[6]; L[4] = P[7]; L[5] = P[3]; } else // Down { L[0] = P[0]; L[1] = P[4]; L[2] = P[5]; L[3] = P[6]; L[4] = P[2]; L[5] = P[3]; } } else { if(dx > dy) // Up { L[0] = P[0]; L[1] = P[1]; L[2] = P[2]; L[3] = P[6]; L[4] = P[7]; L[5] = P[4]; } else // Left { L[0] = P[1]; L[1] = P[2]; L[2] = P[3]; L[3] = P[7]; L[4] = P[4]; L[5] = P[5]; } } Polygon polygon(L, 6); int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!clipper->clip(polygon, clipFlagsOr, draw)) { return false; } } return setupRoutine(&primitive, &triangle, &polygon, &data); } } return false; } bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) { const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; const SetupProcessor::State &state = draw.setupState; const DrawData &data = *draw.data; Vertex &v = triangle.v0; float pSize; int pts = state.pointSizeRegister; if(state.pointSizeRegister != Unused) { pSize = v.v[pts].y; } else { pSize = data.point.pointSize[0]; } pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); float4 P[4]; int C[4]; int pos = state.positionRegister; P[0] = v.v[pos]; P[1] = v.v[pos]; P[2] = v.v[pos]; P[3] = v.v[pos]; const float X = pSize * P[0].w * data.halfPixelX[0]; const float Y = pSize * P[0].w * data.halfPixelY[0]; P[0].x -= X; P[0].y += Y; C[0] = clipper->computeClipFlags(P[0]); P[1].x += X; P[1].y += Y; C[1] = clipper->computeClipFlags(P[1]); P[2].x += X; P[2].y -= Y; C[2] = clipper->computeClipFlags(P[2]); P[3].x -= X; P[3].y -= Y; C[3] = clipper->computeClipFlags(P[3]); triangle.v1 = triangle.v0; triangle.v2 = triangle.v0; triangle.v1.X += iround(16 * 0.5f * pSize); triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner Polygon polygon(P, 4); if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) { int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!clipper->clip(polygon, clipFlagsOr, draw)) { return false; } } return setupRoutine(&primitive, &triangle, &polygon, &data); } return false; } void Renderer::initializeThreads() { unitCount = ceilPow2(threadCount); clusterCount = ceilPow2(threadCount); for(int i = 0; i < unitCount; i++) { triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); } for(int i = 0; i < threadCount; i++) { vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); vertexTask[i]->vertexCache.drawCall = -1; task[i].type = Task::SUSPEND; resume[i] = new Event(); suspend[i] = new Event(); Parameters parameters; parameters.threadIndex = i; parameters.renderer = this; exitThreads = false; worker[i] = new Thread(threadFunction, ¶meters); suspend[i]->wait(); suspend[i]->signal(); } } void Renderer::terminateThreads() { while(threadsAwake != 0) { Thread::sleep(1); } for(int thread = 0; thread < threadCount; thread++) { if(worker[thread]) { exitThreads = true; resume[thread]->signal(); worker[thread]->join(); delete worker[thread]; worker[thread] = 0; delete resume[thread]; resume[thread] = 0; delete suspend[thread]; suspend[thread] = 0; } deallocate(vertexTask[thread]); vertexTask[thread] = 0; } for(int i = 0; i < 16; i++) { deallocate(triangleBatch[i]); triangleBatch[i] = 0; deallocate(primitiveBatch[i]); primitiveBatch[i] = 0; } } void Renderer::loadConstants(const VertexShader *vertexShader) { if(!vertexShader) return; size_t count = vertexShader->getLength(); for(size_t i = 0; i < count; i++) { const Shader::Instruction *instruction = vertexShader->getInstruction(i); if(instruction->opcode == Shader::OPCODE_DEF) { int index = instruction->dst.index; float value[4]; value[0] = instruction->src[0].value[0]; value[1] = instruction->src[0].value[1]; value[2] = instruction->src[0].value[2]; value[3] = instruction->src[0].value[3]; setVertexShaderConstantF(index, value); } else if(instruction->opcode == Shader::OPCODE_DEFI) { int index = instruction->dst.index; int integer[4]; integer[0] = instruction->src[0].integer[0]; integer[1] = instruction->src[0].integer[1]; integer[2] = instruction->src[0].integer[2]; integer[3] = instruction->src[0].integer[3]; setVertexShaderConstantI(index, integer); } else if(instruction->opcode == Shader::OPCODE_DEFB) { int index = instruction->dst.index; int boolean = instruction->src[0].boolean[0]; setVertexShaderConstantB(index, &boolean); } } } void Renderer::loadConstants(const PixelShader *pixelShader) { if(!pixelShader) return; size_t count = pixelShader->getLength(); for(size_t i = 0; i < count; i++) { const Shader::Instruction *instruction = pixelShader->getInstruction(i); if(instruction->opcode == Shader::OPCODE_DEF) { int index = instruction->dst.index; float value[4]; value[0] = instruction->src[0].value[0]; value[1] = instruction->src[0].value[1]; value[2] = instruction->src[0].value[2]; value[3] = instruction->src[0].value[3]; setPixelShaderConstantF(index, value); } else if(instruction->opcode == Shader::OPCODE_DEFI) { int index = instruction->dst.index; int integer[4]; integer[0] = instruction->src[0].integer[0]; integer[1] = instruction->src[0].integer[1]; integer[2] = instruction->src[0].integer[2]; integer[3] = instruction->src[0].integer[3]; setPixelShaderConstantI(index, integer); } else if(instruction->opcode == Shader::OPCODE_DEFB) { int index = instruction->dst.index; int boolean = instruction->src[0].boolean[0]; setPixelShaderConstantB(index, &boolean); } } } void Renderer::setIndexBuffer(Resource *indexBuffer) { context->indexBuffer = indexBuffer; } void Renderer::setMultiSampleMask(unsigned int mask) { context->sampleMask = mask; } void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) { sw::transparencyAntialiasing = transparencyAntialiasing; } bool Renderer::isReadWriteTexture(int sampler) { for(int index = 0; index < RENDERTARGETS; index++) { if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) { return true; } } if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) { return true; } return false; } void Renderer::updateClipper() { if(updateClipPlanes) { if(VertexProcessor::isFixedFunction()) // User plane in world space { const Matrix &scissorWorld = getViewTransform(); if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; } else // User plane in clip space { if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; } updateClipPlanes = false; } } void Renderer::setTextureResource(unsigned int sampler, Resource *resource) { ASSERT(sampler < TOTAL_IMAGE_UNITS); context->texture[sampler] = resource; } void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) { ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); context->sampler[sampler].setTextureLevel(face, level, surface, type); } void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) { if(type == SAMPLER_PIXEL) { PixelProcessor::setTextureFilter(sampler, textureFilter); } else { VertexProcessor::setTextureFilter(sampler, textureFilter); } } void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) { if(type == SAMPLER_PIXEL) { PixelProcessor::setMipmapFilter(sampler, mipmapFilter); } else { VertexProcessor::setMipmapFilter(sampler, mipmapFilter); } } void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) { if(type == SAMPLER_PIXEL) { PixelProcessor::setGatherEnable(sampler, enable); } else { VertexProcessor::setGatherEnable(sampler, enable); } } void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) { if(type == SAMPLER_PIXEL) { PixelProcessor::setAddressingModeU(sampler, addressMode); } else { VertexProcessor::setAddressingModeU(sampler, addressMode); } } void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) { if(type == SAMPLER_PIXEL) { PixelProcessor::setAddressingModeV(sampler, addressMode); } else { VertexProcessor::setAddressingModeV(sampler, addressMode); } } void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) { if(type == SAMPLER_PIXEL) { PixelProcessor::setAddressingModeW(sampler, addressMode); } else { VertexProcessor::setAddressingModeW(sampler, addressMode); } } void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) { if(type == SAMPLER_PIXEL) { PixelProcessor::setReadSRGB(sampler, sRGB); } else { VertexProcessor::setReadSRGB(sampler, sRGB); } } void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) { if(type == SAMPLER_PIXEL) { PixelProcessor::setMipmapLOD(sampler, bias); } else { VertexProcessor::setMipmapLOD(sampler, bias); } } void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) { if(type == SAMPLER_PIXEL) { PixelProcessor::setBorderColor(sampler, borderColor); } else { VertexProcessor::setBorderColor(sampler, borderColor); } } void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) { if(type == SAMPLER_PIXEL) { PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); } else { VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); } } void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) { if(type == SAMPLER_PIXEL) { PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); } else { VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); } } void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) { if(type == SAMPLER_PIXEL) { PixelProcessor::setSwizzleR(sampler, swizzleR); } else { VertexProcessor::setSwizzleR(sampler, swizzleR); } } void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) { if(type == SAMPLER_PIXEL) { PixelProcessor::setSwizzleG(sampler, swizzleG); } else { VertexProcessor::setSwizzleG(sampler, swizzleG); } } void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) { if(type == SAMPLER_PIXEL) { PixelProcessor::setSwizzleB(sampler, swizzleB); } else { VertexProcessor::setSwizzleB(sampler, swizzleB); } } void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) { if(type == SAMPLER_PIXEL) { PixelProcessor::setSwizzleA(sampler, swizzleA); } else { VertexProcessor::setSwizzleA(sampler, swizzleA); } } void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) { if(type == SAMPLER_PIXEL) { PixelProcessor::setCompareFunc(sampler, compFunc); } else { VertexProcessor::setCompareFunc(sampler, compFunc); } } void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) { if(type == SAMPLER_PIXEL) { PixelProcessor::setBaseLevel(sampler, baseLevel); } else { VertexProcessor::setBaseLevel(sampler, baseLevel); } } void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) { if(type == SAMPLER_PIXEL) { PixelProcessor::setMaxLevel(sampler, maxLevel); } else { VertexProcessor::setMaxLevel(sampler, maxLevel); } } void Renderer::setMinLod(SamplerType type, int sampler, float minLod) { if(type == SAMPLER_PIXEL) { PixelProcessor::setMinLod(sampler, minLod); } else { VertexProcessor::setMinLod(sampler, minLod); } } void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) { if(type == SAMPLER_PIXEL) { PixelProcessor::setMaxLod(sampler, maxLod); } else { VertexProcessor::setMaxLod(sampler, maxLod); } } void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired) { if(type == SAMPLER_PIXEL) { PixelProcessor::setSyncRequired(sampler, syncRequired); } else { VertexProcessor::setSyncRequired(sampler, syncRequired); } } void Renderer::setPointSpriteEnable(bool pointSpriteEnable) { context->setPointSpriteEnable(pointSpriteEnable); } void Renderer::setPointScaleEnable(bool pointScaleEnable) { context->setPointScaleEnable(pointScaleEnable); } void Renderer::setLineWidth(float width) { context->lineWidth = width; } void Renderer::setDepthBias(float bias) { context->depthBias = bias; } void Renderer::setSlopeDepthBias(float slopeBias) { context->slopeDepthBias = slopeBias; } void Renderer::setRasterizerDiscard(bool rasterizerDiscard) { context->rasterizerDiscard = rasterizerDiscard; } void Renderer::setPixelShader(const PixelShader *shader) { context->pixelShader = shader; loadConstants(shader); } void Renderer::setVertexShader(const VertexShader *shader) { context->vertexShader = shader; loadConstants(shader); } void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) { for(unsigned int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->psDirtyConstF < index + count) { drawCall[i]->psDirtyConstF = index + count; } } for(unsigned int i = 0; i < count; i++) { PixelProcessor::setFloatConstant(index + i, value); value += 4; } } void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) { for(unsigned int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->psDirtyConstI < index + count) { drawCall[i]->psDirtyConstI = index + count; } } for(unsigned int i = 0; i < count; i++) { PixelProcessor::setIntegerConstant(index + i, value); value += 4; } } void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) { for(unsigned int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->psDirtyConstB < index + count) { drawCall[i]->psDirtyConstB = index + count; } } for(unsigned int i = 0; i < count; i++) { PixelProcessor::setBooleanConstant(index + i, *boolean); boolean++; } } void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) { for(unsigned int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->vsDirtyConstF < index + count) { drawCall[i]->vsDirtyConstF = index + count; } } for(unsigned int i = 0; i < count; i++) { VertexProcessor::setFloatConstant(index + i, value); value += 4; } } void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) { for(unsigned int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->vsDirtyConstI < index + count) { drawCall[i]->vsDirtyConstI = index + count; } } for(unsigned int i = 0; i < count; i++) { VertexProcessor::setIntegerConstant(index + i, value); value += 4; } } void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) { for(unsigned int i = 0; i < DRAW_COUNT; i++) { if(drawCall[i]->vsDirtyConstB < index + count) { drawCall[i]->vsDirtyConstB = index + count; } } for(unsigned int i = 0; i < count; i++) { VertexProcessor::setBooleanConstant(index + i, *boolean); boolean++; } } void Renderer::setModelMatrix(const Matrix &M, int i) { VertexProcessor::setModelMatrix(M, i); } void Renderer::setViewMatrix(const Matrix &V) { VertexProcessor::setViewMatrix(V); updateClipPlanes = true; } void Renderer::setBaseMatrix(const Matrix &B) { VertexProcessor::setBaseMatrix(B); updateClipPlanes = true; } void Renderer::setProjectionMatrix(const Matrix &P) { VertexProcessor::setProjectionMatrix(P); updateClipPlanes = true; } void Renderer::addQuery(Query *query) { queries.push_back(query); } void Renderer::removeQuery(Query *query) { queries.remove(query); } #if PERF_HUD int Renderer::getThreadCount() { return threadCount; } int64_t Renderer::getVertexTime(int thread) { return vertexTime[thread]; } int64_t Renderer::getSetupTime(int thread) { return setupTime[thread]; } int64_t Renderer::getPixelTime(int thread) { return pixelTime[thread]; } void Renderer::resetTimers() { for(int thread = 0; thread < threadCount; thread++) { vertexTime[thread] = 0; setupTime[thread] = 0; pixelTime[thread] = 0; } } #endif void Renderer::setViewport(const Viewport &viewport) { this->viewport = viewport; } void Renderer::setScissor(const Rect &scissor) { this->scissor = scissor; } void Renderer::setClipFlags(int flags) { clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum } void Renderer::setClipPlane(unsigned int index, const float plane[4]) { if(index < MAX_CLIP_PLANES) { userPlane[index] = plane; } else ASSERT(false); updateClipPlanes = true; } void Renderer::updateConfiguration(bool initialUpdate) { bool newConfiguration = swiftConfig->hasNewConfiguration(); if(newConfiguration || initialUpdate) { terminateThreads(); SwiftConfig::Configuration configuration = {}; swiftConfig->getConfiguration(configuration); precacheVertex = !newConfiguration && configuration.precache; precacheSetup = !newConfiguration && configuration.precache; precachePixel = !newConfiguration && configuration.precache; VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); switch(configuration.textureSampleQuality) { case 0: Sampler::setFilterQuality(FILTER_POINT); break; case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; } switch(configuration.mipmapQuality) { case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; } setPerspectiveCorrection(configuration.perspectiveCorrection); switch(configuration.transcendentalPrecision) { case 0: logPrecision = APPROXIMATE; expPrecision = APPROXIMATE; rcpPrecision = APPROXIMATE; rsqPrecision = APPROXIMATE; break; case 1: logPrecision = PARTIAL; expPrecision = PARTIAL; rcpPrecision = PARTIAL; rsqPrecision = PARTIAL; break; case 2: logPrecision = ACCURATE; expPrecision = ACCURATE; rcpPrecision = ACCURATE; rsqPrecision = ACCURATE; break; case 3: logPrecision = WHQL; expPrecision = WHQL; rcpPrecision = WHQL; rsqPrecision = WHQL; break; case 4: logPrecision = IEEE; expPrecision = IEEE; rcpPrecision = IEEE; rsqPrecision = IEEE; break; default: logPrecision = ACCURATE; expPrecision = ACCURATE; rcpPrecision = ACCURATE; rsqPrecision = ACCURATE; break; } switch(configuration.transparencyAntialiasing) { case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; default: transparencyAntialiasing = TRANSPARENCY_NONE; break; } switch(configuration.threadCount) { case -1: threadCount = CPUID::coreCount(); break; case 0: threadCount = CPUID::processAffinity(); break; default: threadCount = configuration.threadCount; break; } CPUID::setEnableSSE4_1(configuration.enableSSE4_1); CPUID::setEnableSSSE3(configuration.enableSSSE3); CPUID::setEnableSSE3(configuration.enableSSE3); CPUID::setEnableSSE2(configuration.enableSSE2); CPUID::setEnableSSE(configuration.enableSSE); for(int pass = 0; pass < 10; pass++) { optimization[pass] = configuration.optimization[pass]; } forceWindowed = configuration.forceWindowed; complementaryDepthBuffer = configuration.complementaryDepthBuffer; postBlendSRGB = configuration.postBlendSRGB; exactColorRounding = configuration.exactColorRounding; forceClearRegisters = configuration.forceClearRegisters; #ifndef NDEBUG minPrimitives = configuration.minPrimitives; maxPrimitives = configuration.maxPrimitives; #endif } if(!initialUpdate && !worker[0]) { initializeThreads(); } } }