HELLO·Android
系统源代码
IT资讯
技术文章
我的收藏
注册
登录
-
我收藏的文章
创建代码块
我的代码块
我的账号
Oreo
|
8.0.0_r4
下载
查看原文件
收藏
根目录
external
swiftshader
src
Shader
PixelRoutine.cpp
// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "PixelRoutine.hpp" #include "Renderer.hpp" #include "QuadRasterizer.hpp" #include "Surface.hpp" #include "Primitive.hpp" #include "CPUID.hpp" #include "SamplerCore.hpp" #include "Constants.hpp" #include "Debug.hpp" namespace sw { extern bool complementaryDepthBuffer; extern bool postBlendSRGB; extern bool exactColorRounding; extern bool forceClearRegisters; PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput) { if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters) { for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++) { v[i].x = Float4(0.0f); v[i].y = Float4(0.0f); v[i].z = Float4(0.0f); v[i].w = Float4(0.0f); } } } PixelRoutine::~PixelRoutine() { for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) { delete sampler[i]; } } void PixelRoutine::quad(Pointer
cBuffer[RENDERTARGETS], Pointer
&zBuffer, Pointer
&sBuffer, Int cMask[4], Int &x, Int &y) { #if PERF_PROFILE Long pipeTime = Ticks(); #endif for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) { sampler[i] = new SamplerCore(constants, state.sampler[i]); } const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive(); Int zMask[4]; // Depth mask Int sMask[4]; // Stencil mask for(unsigned int q = 0; q < state.multiSample; q++) { zMask[q] = cMask[q]; sMask[q] = cMask[q]; } for(unsigned int q = 0; q < state.multiSample; q++) { stencilTest(sBuffer, q, x, sMask[q], cMask[q]); } Float4 f; Float4 rhwCentroid; Float4 xxxx = Float4(Float(x)) + *Pointer
(primitive + OFFSET(Primitive,xQuad), 16); if(interpolateZ()) { for(unsigned int q = 0; q < state.multiSample; q++) { Float4 x = xxxx; if(state.multiSample > 1) { x -= *Pointer
(constants + OFFSET(Constants,X) + q * sizeof(float4)); } z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false); } } Bool depthPass = false; if(earlyDepthTest) { for(unsigned int q = 0; q < state.multiSample; q++) { depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); } } If(depthPass || Bool(!earlyDepthTest)) { #if PERF_PROFILE Long interpTime = Ticks(); #endif Float4 yyyy = Float4(Float(y)) + *Pointer
(primitive + OFFSET(Primitive,yQuad), 16); // Centroid locations Float4 XXXX = Float4(0.0f); Float4 YYYY = Float4(0.0f); if(state.centroid) { Float4 WWWW(1.0e-9f); for(unsigned int q = 0; q < state.multiSample; q++) { XXXX += *Pointer
(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]); YYYY += *Pointer
(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]); WWWW += *Pointer
(constants + OFFSET(Constants,weight) + 16 * cMask[q]); } WWWW = Rcp_pp(WWWW); XXXX *= WWWW; YYYY *= WWWW; XXXX += xxxx; YYYY += yyyy; } if(interpolateW()) { w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false); rhw = reciprocal(w, false, false, true); if(state.centroid) { rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false)); } } for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) { for(int component = 0; component < 4; component++) { if(state.interpolant[interpolant].component & (1 << component)) { if(!state.interpolant[interpolant].centroid) { v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); } else { v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective); } } } Float4 rcp; switch(state.interpolant[interpolant].project) { case 0: break; case 1: rcp = reciprocal(v[interpolant].y); v[interpolant].x = v[interpolant].x * rcp; break; case 2: rcp = reciprocal(v[interpolant].z); v[interpolant].x = v[interpolant].x * rcp; v[interpolant].y = v[interpolant].y * rcp; break; case 3: rcp = reciprocal(v[interpolant].w); v[interpolant].x = v[interpolant].x * rcp; v[interpolant].y = v[interpolant].y * rcp; v[interpolant].z = v[interpolant].z * rcp; break; } } if(state.fog.component) { f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective); } setBuiltins(x, y, z, w); #if PERF_PROFILE cycles[PERF_INTERP] += Ticks() - interpTime; #endif Bool alphaPass = true; if(colorUsed()) { #if PERF_PROFILE Long shaderTime = Ticks(); #endif applyShader(cMask); #if PERF_PROFILE cycles[PERF_SHADER] += Ticks() - shaderTime; #endif alphaPass = alphaTest(cMask); if((shader && shader->containsKill()) || state.alphaTestActive()) { for(unsigned int q = 0; q < state.multiSample; q++) { zMask[q] &= cMask[q]; sMask[q] &= cMask[q]; } } } If(alphaPass) { if(!earlyDepthTest) { for(unsigned int q = 0; q < state.multiSample; q++) { depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]); } } #if PERF_PROFILE Long ropTime = Ticks(); #endif If(depthPass || Bool(earlyDepthTest)) { for(unsigned int q = 0; q < state.multiSample; q++) { if(state.multiSampleMask & (1 << q)) { writeDepth(zBuffer, q, x, z[q], zMask[q]); if(state.occlusionEnabled) { occlusion += *Pointer
(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q])); } } } if(colorUsed()) { #if PERF_PROFILE AddAtomic(Pointer
(&profiler.ropOperations), 4); #endif rasterOperation(f, cBuffer, x, sMask, zMask, cMask); } } #if PERF_PROFILE cycles[PERF_ROP] += Ticks() - ropTime; #endif } } for(unsigned int q = 0; q < state.multiSample; q++) { if(state.multiSampleMask & (1 << q)) { writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]); } } #if PERF_PROFILE cycles[PERF_PIPE] += Ticks() - pipeTime; #endif } Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer
planeEquation, bool flat, bool perspective) { Float4 interpolant = *Pointer
(planeEquation + OFFSET(PlaneEquation,C), 16); if(!flat) { interpolant += x * *Pointer
(planeEquation + OFFSET(PlaneEquation,A), 16) + y * *Pointer
(planeEquation + OFFSET(PlaneEquation,B), 16); if(perspective) { interpolant *= rhw; } } return interpolant; } void PixelRoutine::stencilTest(Pointer
&sBuffer, int q, Int &x, Int &sMask, Int &cMask) { if(!state.stencilActive) { return; } // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask) Pointer
buffer = sBuffer + 2 * x; if(q > 0) { buffer += q * *Pointer
(data + OFFSET(DrawData,stencilSliceB)); } Byte8 value = As
(Long1(*Pointer
(buffer))); Byte8 valueCCW = value; if(!state.noStencilMask) { value &= *Pointer
(data + OFFSET(DrawData,stencil[0].testMaskQ)); } stencilTest(value, state.stencilCompareMode, false); if(state.twoSidedStencil) { if(!state.noStencilMaskCCW) { valueCCW &= *Pointer
(data + OFFSET(DrawData,stencil[1].testMaskQ)); } stencilTest(valueCCW, state.stencilCompareModeCCW, true); value &= *Pointer
(primitive + OFFSET(Primitive,clockwiseMask)); valueCCW &= *Pointer
(primitive + OFFSET(Primitive,invClockwiseMask)); value |= valueCCW; } sMask = SignMask(value) & cMask; } void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW) { Byte8 equal; switch(stencilCompareMode) { case STENCIL_ALWAYS: value = Byte8(0xFFFFFFFFFFFFFFFF); break; case STENCIL_NEVER: value = Byte8(0x0000000000000000); break; case STENCIL_LESS: // a < b ~ b > a value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); value = CmpGT(As
(value), *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); break; case STENCIL_EQUAL: value = CmpEQ(value, *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); break; case STENCIL_NOTEQUAL: // a != b ~ !(a == b) value = CmpEQ(value, *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); value ^= Byte8(0xFFFFFFFFFFFFFFFF); break; case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b) equal = value; equal = CmpEQ(equal, *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ))); value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); value = CmpGT(As
(value), *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); value |= equal; break; case STENCIL_GREATER: // a > b equal = *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)); value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); equal = CmpGT(As
(equal), As
(value)); value = equal; break; case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a) value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); value = CmpGT(As
(value), *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ))); value ^= Byte8(0xFFFFFFFFFFFFFFFF); break; default: ASSERT(false); } } Bool PixelRoutine::depthTest(Pointer
&zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask) { if(!state.depthTestActive) { return true; } Float4 Z = z; if(shader && shader->depthOverride()) { if(complementaryDepthBuffer) { Z = Float4(1.0f) - oDepth; } else { Z = oDepth; } } Pointer
buffer; Int pitch; if(!state.quadLayoutDepthBuffer) { buffer = zBuffer + 4 * x; pitch = *Pointer
(data + OFFSET(DrawData,depthPitchB)); } else { buffer = zBuffer + 8 * x; } if(q > 0) { buffer += q * *Pointer
(data + OFFSET(DrawData,depthSliceB)); } Float4 zValue; if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) { if(!state.quadLayoutDepthBuffer) { // FIXME: Properly optimizes? zValue.xy = *Pointer
(buffer); zValue.zw = *Pointer
(buffer + pitch - 8); } else { zValue = *Pointer
(buffer, 16); } } Int4 zTest; switch(state.depthCompareMode) { case DEPTH_ALWAYS: // Optimized break; case DEPTH_NEVER: // Optimized break; case DEPTH_EQUAL: zTest = CmpEQ(zValue, Z); break; case DEPTH_NOTEQUAL: zTest = CmpNEQ(zValue, Z); break; case DEPTH_LESS: if(complementaryDepthBuffer) { zTest = CmpLT(zValue, Z); } else { zTest = CmpNLE(zValue, Z); } break; case DEPTH_GREATEREQUAL: if(complementaryDepthBuffer) { zTest = CmpNLT(zValue, Z); } else { zTest = CmpLE(zValue, Z); } break; case DEPTH_LESSEQUAL: if(complementaryDepthBuffer) { zTest = CmpLE(zValue, Z); } else { zTest = CmpNLT(zValue, Z); } break; case DEPTH_GREATER: if(complementaryDepthBuffer) { zTest = CmpNLE(zValue, Z); } else { zTest = CmpLT(zValue, Z); } break; default: ASSERT(false); } switch(state.depthCompareMode) { case DEPTH_ALWAYS: zMask = cMask; break; case DEPTH_NEVER: zMask = 0x0; break; default: zMask = SignMask(zTest) & cMask; break; } if(state.stencilActive) { zMask &= sMask; } return zMask != 0; } void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha) { Short4 cmp; Short4 equal; switch(state.alphaCompareMode) { case ALPHA_ALWAYS: aMask = 0xF; break; case ALPHA_NEVER: aMask = 0x0; break; case ALPHA_EQUAL: cmp = CmpEQ(alpha, *Pointer
(data + OFFSET(DrawData,factor.alphaReference4))); aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); break; case ALPHA_NOTEQUAL: // a != b ~ !(a == b) cmp = CmpEQ(alpha, *Pointer
(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); break; case ALPHA_LESS: // a < b ~ b > a cmp = CmpGT(*Pointer
(data + OFFSET(DrawData,factor.alphaReference4)), alpha); aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); break; case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate equal = CmpEQ(alpha, *Pointer
(data + OFFSET(DrawData,factor.alphaReference4))); cmp = CmpGT(alpha, *Pointer
(data + OFFSET(DrawData,factor.alphaReference4))); cmp |= equal; aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); break; case ALPHA_LESSEQUAL: // a <= b ~ !(a > b) cmp = CmpGT(alpha, *Pointer
(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); break; case ALPHA_GREATER: // a > b cmp = CmpGT(alpha, *Pointer
(data + OFFSET(DrawData,factor.alphaReference4))); aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000))); break; default: ASSERT(false); } } void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha) { Int4 coverage0 = CmpNLT(alpha, *Pointer
(data + OFFSET(DrawData,a2c0))); Int4 coverage1 = CmpNLT(alpha, *Pointer
(data + OFFSET(DrawData,a2c1))); Int4 coverage2 = CmpNLT(alpha, *Pointer
(data + OFFSET(DrawData,a2c2))); Int4 coverage3 = CmpNLT(alpha, *Pointer
(data + OFFSET(DrawData,a2c3))); Int aMask0 = SignMask(coverage0); Int aMask1 = SignMask(coverage1); Int aMask2 = SignMask(coverage2); Int aMask3 = SignMask(coverage3); cMask[0] &= aMask0; cMask[1] &= aMask1; cMask[2] &= aMask2; cMask[3] &= aMask3; } void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog) { if(!state.fogActive) { return; } if(state.pixelFogMode != FOG_NONE) { pixelFog(fog); fog = Min(fog, Float4(1.0f)); fog = Max(fog, Float4(0.0f)); } c0.x -= *Pointer
(data + OFFSET(DrawData,fog.colorF[0])); c0.y -= *Pointer
(data + OFFSET(DrawData,fog.colorF[1])); c0.z -= *Pointer
(data + OFFSET(DrawData,fog.colorF[2])); c0.x *= fog; c0.y *= fog; c0.z *= fog; c0.x += *Pointer
(data + OFFSET(DrawData,fog.colorF[0])); c0.y += *Pointer
(data + OFFSET(DrawData,fog.colorF[1])); c0.z += *Pointer
(data + OFFSET(DrawData,fog.colorF[2])); } void PixelRoutine::pixelFog(Float4 &visibility) { Float4 &zw = visibility; if(state.pixelFogMode != FOG_NONE) { if(state.wBasedFog) { zw = rhw; } else { if(complementaryDepthBuffer) { zw = Float4(1.0f) - z[0]; } else { zw = z[0]; } } } switch(state.pixelFogMode) { case FOG_NONE: break; case FOG_LINEAR: zw *= *Pointer
(data + OFFSET(DrawData,fog.scale)); zw += *Pointer
(data + OFFSET(DrawData,fog.offset)); break; case FOG_EXP: zw *= *Pointer
(data + OFFSET(DrawData,fog.densityE)); zw = exponential2(zw, true); break; case FOG_EXP2: zw *= zw; zw *= *Pointer
(data + OFFSET(DrawData,fog.density2E)); zw = exponential2(zw, true); break; default: ASSERT(false); } } void PixelRoutine::writeDepth(Pointer
&zBuffer, int q, Int &x, Float4 &z, Int &zMask) { if(!state.depthWriteEnable) { return; } Float4 Z = z; if(shader && shader->depthOverride()) { if(complementaryDepthBuffer) { Z = Float4(1.0f) - oDepth; } else { Z = oDepth; } } Pointer
buffer; Int pitch; if(!state.quadLayoutDepthBuffer) { buffer = zBuffer + 4 * x; pitch = *Pointer
(data + OFFSET(DrawData,depthPitchB)); } else { buffer = zBuffer + 8 * x; } if(q > 0) { buffer += q * *Pointer
(data + OFFSET(DrawData,depthSliceB)); } Float4 zValue; if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable)) { if(!state.quadLayoutDepthBuffer) { // FIXME: Properly optimizes? zValue.xy = *Pointer
(buffer); zValue.zw = *Pointer
(buffer + pitch - 8); } else { zValue = *Pointer
(buffer, 16); } } Z = As
(As
(Z) & *Pointer
(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16)); zValue = As
(As
(zValue) & *Pointer
(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16)); Z = As
(As
(Z) | As
(zValue)); if(!state.quadLayoutDepthBuffer) { // FIXME: Properly optimizes? *Pointer
(buffer) = Float2(Z.xy); *Pointer
(buffer + pitch) = Float2(Z.zw); } else { *Pointer
(buffer, 16) = Z; } } void PixelRoutine::writeStencil(Pointer
&sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask) { if(!state.stencilActive) { return; } if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP) { if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP)) { return; } } if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW)) { return; } Pointer
buffer = sBuffer + 2 * x; if(q > 0) { buffer += q * *Pointer
(data + OFFSET(DrawData,stencilSliceB)); } Byte8 bufferValue = As
(Long1(*Pointer
(buffer))); Byte8 newValue; stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask); if(!state.noStencilWriteMask) { Byte8 maskedValue = bufferValue; newValue &= *Pointer
(data + OFFSET(DrawData,stencil[0].writeMaskQ)); maskedValue &= *Pointer
(data + OFFSET(DrawData,stencil[0].invWriteMaskQ)); newValue |= maskedValue; } if(state.twoSidedStencil) { Byte8 newValueCCW; stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask); if(!state.noStencilWriteMaskCCW) { Byte8 maskedValue = bufferValue; newValueCCW &= *Pointer
(data + OFFSET(DrawData,stencil[1].writeMaskQ)); maskedValue &= *Pointer
(data + OFFSET(DrawData,stencil[1].invWriteMaskQ)); newValueCCW |= maskedValue; } newValue &= *Pointer
(primitive + OFFSET(Primitive,clockwiseMask)); newValueCCW &= *Pointer
(primitive + OFFSET(Primitive,invClockwiseMask)); newValue |= newValueCCW; } newValue &= *Pointer
(constants + OFFSET(Constants,maskB4Q) + 8 * cMask); bufferValue &= *Pointer
(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask); newValue |= bufferValue; *Pointer
(buffer) = UInt(As
(newValue)); } void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask) { Byte8 &pass = newValue; Byte8 fail; Byte8 zFail; stencilOperation(pass, bufferValue, stencilPassOperation, CCW); if(stencilZFailOperation != stencilPassOperation) { stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW); } if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) { stencilOperation(fail, bufferValue, stencilFailOperation, CCW); } if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation) { if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same { pass &= *Pointer
(constants + OFFSET(Constants,maskB4Q) + 8 * zMask); zFail &= *Pointer
(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask); pass |= zFail; } pass &= *Pointer
(constants + OFFSET(Constants,maskB4Q) + 8 * sMask); fail &= *Pointer
(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask); pass |= fail; } } void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW) { switch(operation) { case OPERATION_KEEP: output = bufferValue; break; case OPERATION_ZERO: output = Byte8(0x0000000000000000); break; case OPERATION_REPLACE: output = *Pointer
(data + OFFSET(DrawData,stencil[CCW].referenceQ)); break; case OPERATION_INCRSAT: output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); break; case OPERATION_DECRSAT: output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1)); break; case OPERATION_INVERT: output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF); break; case OPERATION_INCR: output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1); break; case OPERATION_DECR: output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1); break; default: ASSERT(false); } } void PixelRoutine::blendFactor(const Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorActive) { switch(blendFactorActive) { case BLEND_ZERO: // Optimized break; case BLEND_ONE: // Optimized break; case BLEND_SOURCE: blendFactor.x = current.x; blendFactor.y = current.y; blendFactor.z = current.z; break; case BLEND_INVSOURCE: blendFactor.x = Short4(0xFFFFu) - current.x; blendFactor.y = Short4(0xFFFFu) - current.y; blendFactor.z = Short4(0xFFFFu) - current.z; break; case BLEND_DEST: blendFactor.x = pixel.x; blendFactor.y = pixel.y; blendFactor.z = pixel.z; break; case BLEND_INVDEST: blendFactor.x = Short4(0xFFFFu) - pixel.x; blendFactor.y = Short4(0xFFFFu) - pixel.y; blendFactor.z = Short4(0xFFFFu) - pixel.z; break; case BLEND_SOURCEALPHA: blendFactor.x = current.w; blendFactor.y = current.w; blendFactor.z = current.w; break; case BLEND_INVSOURCEALPHA: blendFactor.x = Short4(0xFFFFu) - current.w; blendFactor.y = Short4(0xFFFFu) - current.w; blendFactor.z = Short4(0xFFFFu) - current.w; break; case BLEND_DESTALPHA: blendFactor.x = pixel.w; blendFactor.y = pixel.w; blendFactor.z = pixel.w; break; case BLEND_INVDESTALPHA: blendFactor.x = Short4(0xFFFFu) - pixel.w; blendFactor.y = Short4(0xFFFFu) - pixel.w; blendFactor.z = Short4(0xFFFFu) - pixel.w; break; case BLEND_SRCALPHASAT: blendFactor.x = Short4(0xFFFFu) - pixel.w; blendFactor.x = Min(As
(blendFactor.x), As
(current.w)); blendFactor.y = blendFactor.x; blendFactor.z = blendFactor.x; break; case BLEND_CONSTANT: blendFactor.x = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[0])); blendFactor.y = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[1])); blendFactor.z = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[2])); break; case BLEND_INVCONSTANT: blendFactor.x = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[0])); blendFactor.y = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[1])); blendFactor.z = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[2])); break; case BLEND_CONSTANTALPHA: blendFactor.x = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[3])); blendFactor.y = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[3])); blendFactor.z = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[3])); break; case BLEND_INVCONSTANTALPHA: blendFactor.x = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); blendFactor.y = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); blendFactor.z = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); break; default: ASSERT(false); } } void PixelRoutine::blendFactorAlpha(const Vector4s &blendFactor, const Vector4s ¤t, const Vector4s &pixel, BlendFactor blendFactorAlphaActive) { switch(blendFactorAlphaActive) { case BLEND_ZERO: // Optimized break; case BLEND_ONE: // Optimized break; case BLEND_SOURCE: blendFactor.w = current.w; break; case BLEND_INVSOURCE: blendFactor.w = Short4(0xFFFFu) - current.w; break; case BLEND_DEST: blendFactor.w = pixel.w; break; case BLEND_INVDEST: blendFactor.w = Short4(0xFFFFu) - pixel.w; break; case BLEND_SOURCEALPHA: blendFactor.w = current.w; break; case BLEND_INVSOURCEALPHA: blendFactor.w = Short4(0xFFFFu) - current.w; break; case BLEND_DESTALPHA: blendFactor.w = pixel.w; break; case BLEND_INVDESTALPHA: blendFactor.w = Short4(0xFFFFu) - pixel.w; break; case BLEND_SRCALPHASAT: blendFactor.w = Short4(0xFFFFu); break; case BLEND_CONSTANT: case BLEND_CONSTANTALPHA: blendFactor.w = *Pointer
(data + OFFSET(DrawData,factor.blendConstant4W[3])); break; case BLEND_INVCONSTANT: case BLEND_INVCONSTANTALPHA: blendFactor.w = *Pointer
(data + OFFSET(DrawData,factor.invBlendConstant4W[3])); break; default: ASSERT(false); } } bool PixelRoutine::isSRGB(int index) const { return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8; } void PixelRoutine::readPixel(int index, Pointer
&cBuffer, Int &x, Vector4s &pixel) { Short4 c01; Short4 c23; Pointer
buffer; Pointer
buffer2; switch(state.targetFormat[index]) { case FORMAT_R5G6B5: buffer = cBuffer + 2 * x; buffer2 = buffer + *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); c01 = As
(Int2(*Pointer
(buffer), *Pointer
(buffer2))); pixel.x = c01 & Short4(0xF800u); pixel.y = (c01 & Short4(0x07E0u)) << 5; pixel.z = (c01 & Short4(0x001Fu)) << 11; pixel.w = Short4(0xFFFFu); break; case FORMAT_A8R8G8B8: buffer = cBuffer + 4 * x; c01 = *Pointer
(buffer); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer
(buffer); pixel.z = c01; pixel.y = c01; pixel.z = UnpackLow(As
(pixel.z), As
(c23)); pixel.y = UnpackHigh(As
(pixel.y), As
(c23)); pixel.x = pixel.z; pixel.z = UnpackLow(As
(pixel.z), As
(pixel.y)); pixel.x = UnpackHigh(As
(pixel.x), As
(pixel.y)); pixel.y = pixel.z; pixel.w = pixel.x; pixel.x = UnpackLow(As
(pixel.x), As
(pixel.x)); pixel.y = UnpackHigh(As
(pixel.y), As
(pixel.y)); pixel.z = UnpackLow(As
(pixel.z), As
(pixel.z)); pixel.w = UnpackHigh(As
(pixel.w), As
(pixel.w)); break; case FORMAT_A8B8G8R8: case FORMAT_SRGB8_A8: buffer = cBuffer + 4 * x; c01 = *Pointer
(buffer); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer
(buffer); pixel.z = c01; pixel.y = c01; pixel.z = UnpackLow(As
(pixel.z), As
(c23)); pixel.y = UnpackHigh(As
(pixel.y), As
(c23)); pixel.x = pixel.z; pixel.z = UnpackLow(As
(pixel.z), As
(pixel.y)); pixel.x = UnpackHigh(As
(pixel.x), As
(pixel.y)); pixel.y = pixel.z; pixel.w = pixel.x; pixel.x = UnpackLow(As
(pixel.z), As
(pixel.z)); pixel.y = UnpackHigh(As
(pixel.y), As
(pixel.y)); pixel.z = UnpackLow(As
(pixel.w), As
(pixel.w)); pixel.w = UnpackHigh(As
(pixel.w), As
(pixel.w)); break; case FORMAT_A8: buffer = cBuffer + 1 * x; pixel.w = Insert(pixel.w, *Pointer
(buffer), 0); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); pixel.w = Insert(pixel.w, *Pointer
(buffer), 1); pixel.w = UnpackLow(As
(pixel.w), As
(pixel.w)); pixel.x = Short4(0x0000); pixel.y = Short4(0x0000); pixel.z = Short4(0x0000); break; case FORMAT_X8R8G8B8: buffer = cBuffer + 4 * x; c01 = *Pointer
(buffer); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer
(buffer); pixel.z = c01; pixel.y = c01; pixel.z = UnpackLow(As
(pixel.z), As
(c23)); pixel.y = UnpackHigh(As
(pixel.y), As
(c23)); pixel.x = pixel.z; pixel.z = UnpackLow(As
(pixel.z), As
(pixel.y)); pixel.x = UnpackHigh(As
(pixel.x), As
(pixel.y)); pixel.y = pixel.z; pixel.x = UnpackLow(As
(pixel.x), As
(pixel.x)); pixel.y = UnpackHigh(As
(pixel.y), As
(pixel.y)); pixel.z = UnpackLow(As
(pixel.z), As
(pixel.z)); pixel.w = Short4(0xFFFFu); break; case FORMAT_X8B8G8R8: case FORMAT_SRGB8_X8: buffer = cBuffer + 4 * x; c01 = *Pointer
(buffer); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); c23 = *Pointer
(buffer); pixel.z = c01; pixel.y = c01; pixel.z = UnpackLow(As
(pixel.z), As
(c23)); pixel.y = UnpackHigh(As
(pixel.y), As
(c23)); pixel.x = pixel.z; pixel.z = UnpackLow(As
(pixel.z), As
(pixel.y)); pixel.x = UnpackHigh(As
(pixel.x), As
(pixel.y)); pixel.y = pixel.z; pixel.w = pixel.x; pixel.x = UnpackLow(As
(pixel.z), As
(pixel.z)); pixel.y = UnpackHigh(As
(pixel.y), As
(pixel.y)); pixel.z = UnpackLow(As
(pixel.w), As
(pixel.w)); pixel.w = Short4(0xFFFFu); break; case FORMAT_A8G8R8B8Q: UNIMPLEMENTED(); // pixel.z = UnpackLow(As
(pixel.z), *Pointer
(cBuffer + 8 * x + 0)); // pixel.x = UnpackHigh(As
(pixel.x), *Pointer
(cBuffer + 8 * x + 0)); // pixel.y = UnpackLow(As
(pixel.y), *Pointer
(cBuffer + 8 * x + 8)); // pixel.w = UnpackHigh(As
(pixel.w), *Pointer
(cBuffer + 8 * x + 8)); break; case FORMAT_X8G8R8B8Q: UNIMPLEMENTED(); // pixel.z = UnpackLow(As
(pixel.z), *Pointer
(cBuffer + 8 * x + 0)); // pixel.x = UnpackHigh(As
(pixel.x), *Pointer
(cBuffer + 8 * x + 0)); // pixel.y = UnpackLow(As
(pixel.y), *Pointer
(cBuffer + 8 * x + 8)); // pixel.w = Short4(0xFFFFu); break; case FORMAT_A16B16G16R16: buffer = cBuffer; pixel.x = *Pointer
(buffer + 8 * x); pixel.y = *Pointer
(buffer + 8 * x + 8); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); pixel.z = *Pointer
(buffer + 8 * x); pixel.w = *Pointer
(buffer + 8 * x + 8); transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w); break; case FORMAT_G16R16: buffer = cBuffer; pixel.x = *Pointer
(buffer + 4 * x); buffer += *Pointer
(data + OFFSET(DrawData, colorPitchB[index])); pixel.y = *Pointer
(buffer + 4 * x); pixel.z = pixel.x; pixel.x = As
(UnpackLow(pixel.x, pixel.y)); pixel.z = As
(UnpackHigh(pixel.z, pixel.y)); pixel.y = pixel.z; pixel.x = As
(UnpackLow(pixel.x, pixel.z)); pixel.y = As
(UnpackHigh(pixel.y, pixel.z)); pixel.z = Short4(0xFFFFu); pixel.w = Short4(0xFFFFu); break; default: ASSERT(false); } if((postBlendSRGB && state.writeSRGB) || isSRGB(index)) { sRGBtoLinear16_12_16(pixel); } } void PixelRoutine::alphaBlend(int index, Pointer
&cBuffer, Vector4s ¤t, Int &x) { if(!state.alphaBlendActive) { return; } Vector4s pixel; readPixel(index, cBuffer, x, pixel); // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor Vector4s sourceFactor; Vector4s destFactor; blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor); blendFactor(destFactor, current, pixel, state.destBlendFactor); if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO) { current.x = MulHigh(As
(current.x), As
(sourceFactor.x)); current.y = MulHigh(As
(current.y), As
(sourceFactor.y)); current.z = MulHigh(As
(current.z), As
(sourceFactor.z)); } if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO) { pixel.x = MulHigh(As
(pixel.x), As
(destFactor.x)); pixel.y = MulHigh(As
(pixel.y), As
(destFactor.y)); pixel.z = MulHigh(As
(pixel.z), As
(destFactor.z)); } switch(state.blendOperation) { case BLENDOP_ADD: current.x = AddSat(As
(current.x), As
(pixel.x)); current.y = AddSat(As
(current.y), As
(pixel.y)); current.z = AddSat(As
(current.z), As
(pixel.z)); break; case BLENDOP_SUB: current.x = SubSat(As
(current.x), As
(pixel.x)); current.y = SubSat(As
(current.y), As
(pixel.y)); current.z = SubSat(As
(current.z), As