// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "VertexPipeline.hpp" #include "Vertex.hpp" #include "Renderer.hpp" #include "Debug.hpp" #include <string.h> #include <stdlib.h> #include <stdio.h> #undef max #undef min namespace sw { extern bool secondaryColor; VertexPipeline::VertexPipeline(const VertexProcessor::State &state) : VertexRoutine(state, 0) { } VertexPipeline::~VertexPipeline() { } Vector4f VertexPipeline::transformBlend(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) { Vector4f dst; if(state.vertexBlendMatrixCount == 0) { dst = transform(src, matrix, homogeneous); } else { UInt index0[4]; UInt index1[4]; UInt index2[4]; UInt index3[4]; if(state.indexedVertexBlendEnable) { for(int i = 0; i < 4; i++) { Float4 B = v[BlendIndices].x; UInt indices; switch(i) { case 0: indices = As<UInt>(Float(B.x)); break; case 1: indices = As<UInt>(Float(B.y)); break; case 2: indices = As<UInt>(Float(B.z)); break; case 3: indices = As<UInt>(Float(B.w)); break; } index0[i] = (indices & 0x000000FF) << 6; index1[i] = (indices & 0x0000FF00) >> 2; index2[i] = (indices & 0x00FF0000) >> 10; index3[i] = (indices & 0xFF000000) >> 18; } } else { for(int i = 0; i < 4; i++) { index0[i] = 0 * 64; index1[i] = 1 * 64; index2[i] = 2 * 64; index3[i] = 3 * 64; } } Float4 weight0; Float4 weight1; Float4 weight2; Float4 weight3; switch(state.vertexBlendMatrixCount) { case 4: weight2 = v[BlendWeight].z; case 3: weight1 = v[BlendWeight].y; case 2: weight0 = v[BlendWeight].x; case 1: break; } if(state.vertexBlendMatrixCount == 1) { dst = transform(src, matrix, index0, homogeneous); } else if(state.vertexBlendMatrixCount == 2) { weight1 = Float4(1.0f) - weight0; Vector4f pos0; Vector4f pos1; pos0 = transform(src, matrix, index0, homogeneous); pos1 = transform(src, matrix, index1, homogeneous); dst.x = pos0.x * weight0 + pos1.x * weight1; // FIXME: Vector4f operators dst.y = pos0.y * weight0 + pos1.y * weight1; dst.z = pos0.z * weight0 + pos1.z * weight1; dst.w = pos0.w * weight0 + pos1.w * weight1; } else if(state.vertexBlendMatrixCount == 3) { weight2 = Float4(1.0f) - (weight0 + weight1); Vector4f pos0; Vector4f pos1; Vector4f pos2; pos0 = transform(src, matrix, index0, homogeneous); pos1 = transform(src, matrix, index1, homogeneous); pos2 = transform(src, matrix, index2, homogeneous); dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2; dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2; dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2; dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2; } else if(state.vertexBlendMatrixCount == 4) { weight3 = Float4(1.0f) - (weight0 + weight1 + weight2); Vector4f pos0; Vector4f pos1; Vector4f pos2; Vector4f pos3; pos0 = transform(src, matrix, index0, homogeneous); pos1 = transform(src, matrix, index1, homogeneous); pos2 = transform(src, matrix, index2, homogeneous); pos3 = transform(src, matrix, index3, homogeneous); dst.x = pos0.x * weight0 + pos1.x * weight1 + pos2.x * weight2 + pos3.x * weight3; dst.y = pos0.y * weight0 + pos1.y * weight1 + pos2.y * weight2 + pos3.y * weight3; dst.z = pos0.z * weight0 + pos1.z * weight1 + pos2.z * weight2 + pos3.z * weight3; dst.w = pos0.w * weight0 + pos1.w * weight1 + pos2.w * weight2 + pos3.w * weight3; } } return dst; } void VertexPipeline::pipeline() { Vector4f position; Vector4f normal; if(!state.preTransformed) { position = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.transformT)), true); } else { position = v[PositionT]; } o[Pos].x = position.x; o[Pos].y = position.y; o[Pos].z = position.z; o[Pos].w = position.w; Vector4f vertexPosition = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); if(state.vertexNormalActive) { normal = transformBlend(v[Normal], Pointer<Byte>(data + OFFSET(DrawData,ff.normalTransformT)), false); if(state.normalizeNormals) { normal = normalize(normal); } } if(!state.vertexLightingActive) { // FIXME: Don't process if not used at all if(state.diffuseActive && state.input[Color0]) { Vector4f diffuse = v[Color0]; o[C0].x = diffuse.x; o[C0].y = diffuse.y; o[C0].z = diffuse.z; o[C0].w = diffuse.w; } else { o[C0].x = Float4(1.0f); o[C0].y = Float4(1.0f); o[C0].z = Float4(1.0f); o[C0].w = Float4(1.0f); } // FIXME: Don't process if not used at all if(state.specularActive && state.input[Color1]) { Vector4f specular = v[Color1]; o[C1].x = specular.x; o[C1].y = specular.y; o[C1].z = specular.z; o[C1].w = specular.w; } else { o[C1].x = Float4(0.0f); o[C1].y = Float4(0.0f); o[C1].z = Float4(0.0f); o[C1].w = Float4(1.0f); } } else { o[C0].x = Float4(0.0f); o[C0].y = Float4(0.0f); o[C0].z = Float4(0.0f); o[C0].w = Float4(0.0f); o[C1].x = Float4(0.0f); o[C1].y = Float4(0.0f); o[C1].z = Float4(0.0f); o[C1].w = Float4(0.0f); Vector4f ambient; Float4 globalAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.globalAmbient)); // FIXME: Unpack ambient.x = globalAmbient.x; ambient.y = globalAmbient.y; ambient.z = globalAmbient.z; for(int i = 0; i < 8; i++) { if(!(state.vertexLightActive & (1 << i))) { continue; } Vector4f L; // Light vector Float4 att; // Attenuation // Attenuation { Float4 d; // Distance L.x = L.y = L.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightPosition[i])); // FIXME: Unpack L.x = L.x.xxxx; L.y = L.y.yyyy; L.z = L.z.zzzz; L.x -= vertexPosition.x; L.y -= vertexPosition.y; L.z -= vertexPosition.z; d = dot3(L, L); d = RcpSqrt_pp(d); // FIXME: Sufficient precision? L.x *= d; L.y *= d; L.z *= d; d = Rcp_pp(d); // FIXME: Sufficient precision? Float4 q = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationQuadratic[i])); Float4 l = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationLinear[i])); Float4 c = *Pointer<Float4>(data + OFFSET(DrawData,ff.attenuationConstant[i])); att = Rcp_pp((q * d + l) * d + c); } // Ambient per light { Float4 lightAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightAmbient[i])); // FIXME: Unpack ambient.x = ambient.x + lightAmbient.x * att; ambient.y = ambient.y + lightAmbient.y * att; ambient.z = ambient.z + lightAmbient.z * att; } // Diffuse if(state.vertexNormalActive) { Float4 dot; dot = dot3(L, normal); dot = Max(dot, Float4(0.0f)); dot *= att; Vector4f diff; if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) { diff.x = diff.y = diff.z = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse)); // FIXME: Unpack diff.x = diff.x.xxxx; diff.y = diff.y.yyyy; diff.z = diff.z.zzzz; } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) { diff = v[Color0]; } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) { diff = v[Color1]; } else ASSERT(false); Float4 lightDiffuse = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightDiffuse[i])); o[C0].x = o[C0].x + diff.x * dot * lightDiffuse.x; // FIXME: Clamp first? o[C0].y = o[C0].y + diff.y * dot * lightDiffuse.y; // FIXME: Clamp first? o[C0].z = o[C0].z + diff.z * dot * lightDiffuse.z; // FIXME: Clamp first? } // Specular if(state.vertexSpecularActive) { Vector4f S; Vector4f C; // Camera vector Float4 pow; pow = *Pointer<Float>(data + OFFSET(DrawData,ff.materialShininess)); S.x = Float4(0.0f) - vertexPosition.x; S.y = Float4(0.0f) - vertexPosition.y; S.z = Float4(0.0f) - vertexPosition.z; C = normalize(S); S.x = L.x + C.x; S.y = L.y + C.y; S.z = L.z + C.z; C = normalize(S); Float4 dot = Max(dot3(C, normal), Float4(0.0f)); // FIXME: max(dot3(C, normal), 0) Float4 P = power(dot, pow); P *= att; Vector4f spec; if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) { Float4 materialSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular)); // FIXME: Unpack spec.x = materialSpecular.x; spec.y = materialSpecular.y; spec.z = materialSpecular.z; } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) { spec = v[Color0]; } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) { spec = v[Color1]; } else ASSERT(false); Float4 lightSpecular = *Pointer<Float4>(data + OFFSET(DrawData,ff.lightSpecular[i])); spec.x *= lightSpecular.x; spec.y *= lightSpecular.y; spec.z *= lightSpecular.z; spec.x *= P; spec.y *= P; spec.z *= P; spec.x = Max(spec.x, Float4(0.0f)); spec.y = Max(spec.y, Float4(0.0f)); spec.z = Max(spec.z, Float4(0.0f)); if(secondaryColor) { o[C1].x = o[C1].x + spec.x; o[C1].y = o[C1].y + spec.y; o[C1].z = o[C1].z + spec.z; } else { o[C0].x = o[C0].x + spec.x; o[C0].y = o[C0].y + spec.y; o[C0].z = o[C0].z + spec.z; } } } if(state.vertexAmbientMaterialSourceActive == MATERIAL_MATERIAL) { Float4 materialAmbient = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialAmbient)); // FIXME: Unpack ambient.x = ambient.x * materialAmbient.x; ambient.y = ambient.y * materialAmbient.y; ambient.z = ambient.z * materialAmbient.z; } else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR1) { Vector4f materialDiffuse = v[Color0]; ambient.x = ambient.x * materialDiffuse.x; ambient.y = ambient.y * materialDiffuse.y; ambient.z = ambient.z * materialDiffuse.z; } else if(state.vertexAmbientMaterialSourceActive == MATERIAL_COLOR2) { Vector4f materialSpecular = v[Color1]; ambient.x = ambient.x * materialSpecular.x; ambient.y = ambient.y * materialSpecular.y; ambient.z = ambient.z * materialSpecular.z; } else ASSERT(false); o[C0].x = o[C0].x + ambient.x; o[C0].y = o[C0].y + ambient.y; o[C0].z = o[C0].z + ambient.z; // Emissive if(state.vertexEmissiveMaterialSourceActive == MATERIAL_MATERIAL) { Float4 materialEmission = *Pointer<Float4>(data + OFFSET(DrawData,ff.materialEmission)); // FIXME: Unpack o[C0].x = o[C0].x + materialEmission.x; o[C0].y = o[C0].y + materialEmission.y; o[C0].z = o[C0].z + materialEmission.z; } else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR1) { Vector4f materialSpecular = v[Color0]; o[C0].x = o[C0].x + materialSpecular.x; o[C0].y = o[C0].y + materialSpecular.y; o[C0].z = o[C0].z + materialSpecular.z; } else if(state.vertexEmissiveMaterialSourceActive == MATERIAL_COLOR2) { Vector4f materialSpecular = v[Color1]; o[C0].x = o[C0].x + materialSpecular.x; o[C0].y = o[C0].y + materialSpecular.y; o[C0].z = o[C0].z + materialSpecular.z; } else ASSERT(false); // Diffuse alpha component if(state.vertexDiffuseMaterialSourceActive == MATERIAL_MATERIAL) { o[C0].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialDiffuse[0]))).wwww; // FIXME: Unpack } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR1) { Vector4f alpha = v[Color0]; o[C0].w = alpha.w; } else if(state.vertexDiffuseMaterialSourceActive == MATERIAL_COLOR2) { Vector4f alpha = v[Color1]; o[C0].w = alpha.w; } else ASSERT(false); if(state.vertexSpecularActive) { // Specular alpha component if(state.vertexSpecularMaterialSourceActive == MATERIAL_MATERIAL) { o[C1].w = Float4(*Pointer<Float4>(data + OFFSET(DrawData,ff.materialSpecular[3]))).wwww; // FIXME: Unpack } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR1) { Vector4f alpha = v[Color0]; o[C1].w = alpha.w; } else if(state.vertexSpecularMaterialSourceActive == MATERIAL_COLOR2) { Vector4f alpha = v[Color1]; o[C1].w = alpha.w; } else ASSERT(false); } } if(state.fogActive) { Float4 f; if(!state.rangeFogActive) { f = Abs(vertexPosition.z); } else { f = Sqrt(dot3(vertexPosition, vertexPosition)); // FIXME: f = length(vertexPosition); } switch(state.vertexFogMode) { case FOG_NONE: if(state.specularActive) { o[Fog].x = o[C1].w; } else { o[Fog].x = Float4(0.0f); } break; case FOG_LINEAR: o[Fog].x = f * *Pointer<Float4>(data + OFFSET(DrawData,fog.scale)) + *Pointer<Float4>(data + OFFSET(DrawData,fog.offset)); break; case FOG_EXP: o[Fog].x = exponential2(f * *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE)), true); break; case FOG_EXP2: o[Fog].x = exponential2((f * f) * *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E)), true); break; default: ASSERT(false); } } for(int stage = 0; stage < 8; stage++) { processTextureCoordinate(stage, normal, position); } processPointSize(); } void VertexPipeline::processTextureCoordinate(int stage, Vector4f &normal, Vector4f &position) { if(state.output[T0 + stage].write) { int i = state.textureState[stage].texCoordIndexActive; switch(state.textureState[stage].texGenActive) { case TEXGEN_NONE: { Vector4f &&varying = v[TexCoord0 + i]; o[T0 + stage].x = varying.x; o[T0 + stage].y = varying.y; o[T0 + stage].z = varying.z; o[T0 + stage].w = varying.w; } break; case TEXGEN_PASSTHRU: { Vector4f &&varying = v[TexCoord0 + i]; o[T0 + stage].x = varying.x; o[T0 + stage].y = varying.y; o[T0 + stage].z = varying.z; o[T0 + stage].w = varying.w; if(state.input[TexCoord0 + i]) { switch(state.input[TexCoord0 + i].count) { case 1: o[T0 + stage].y = Float4(1.0f); o[T0 + stage].z = Float4(0.0f); o[T0 + stage].w = Float4(0.0f); break; case 2: o[T0 + stage].z = Float4(1.0f); o[T0 + stage].w = Float4(0.0f); break; case 3: o[T0 + stage].w = Float4(1.0f); break; case 4: break; default: ASSERT(false); } } } break; case TEXGEN_NORMAL: { Vector4f Nc; // Normal vector in camera space if(state.vertexNormalActive) { Nc = normal; } else { Nc.x = Float4(0.0f); Nc.y = Float4(0.0f); Nc.z = Float4(0.0f); } Nc.w = Float4(1.0f); o[T0 + stage].x = Nc.x; o[T0 + stage].y = Nc.y; o[T0 + stage].z = Nc.z; o[T0 + stage].w = Nc.w; } break; case TEXGEN_POSITION: { Vector4f Pn = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); // Position in camera space Pn.w = Float4(1.0f); o[T0 + stage].x = Pn.x; o[T0 + stage].y = Pn.y; o[T0 + stage].z = Pn.z; o[T0 + stage].w = Pn.w; } break; case TEXGEN_REFLECTION: { Vector4f R; // Reflection vector if(state.vertexNormalActive) { Vector4f Nc; // Normal vector in camera space Nc = normal; if(state.localViewerActive) { Vector4f Ec; // Eye vector in camera space Vector4f N2; Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); Ec = normalize(Ec); // R = E - 2 * N * (E . N) Float4 dot = Float4(2.0f) * dot3(Ec, Nc); R.x = Ec.x - Nc.x * dot; R.y = Ec.y - Nc.y * dot; R.z = Ec.z - Nc.z * dot; } else { // u = -2 * Nz * Nx // v = -2 * Nz * Ny // w = 1 - 2 * Nz * Nz R.x = -Float4(2.0f) * Nc.z * Nc.x; R.y = -Float4(2.0f) * Nc.z * Nc.y; R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; } } else { R.x = Float4(0.0f); R.y = Float4(0.0f); R.z = Float4(0.0f); } R.w = Float4(1.0f); o[T0 + stage].x = R.x; o[T0 + stage].y = R.y; o[T0 + stage].z = R.z; o[T0 + stage].w = R.w; } break; case TEXGEN_SPHEREMAP: { Vector4f R; // Reflection vector if(state.vertexNormalActive) { Vector4f Nc; // Normal vector in camera space Nc = normal; if(state.localViewerActive) { Vector4f Ec; // Eye vector in camera space Vector4f N2; Ec = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); Ec = normalize(Ec); // R = E - 2 * N * (E . N) Float4 dot = Float4(2.0f) * dot3(Ec, Nc); R.x = Ec.x - Nc.x * dot; R.y = Ec.y - Nc.y * dot; R.z = Ec.z - Nc.z * dot; } else { // u = -2 * Nz * Nx // v = -2 * Nz * Ny // w = 1 - 2 * Nz * Nz R.x = -Float4(2.0f) * Nc.z * Nc.x; R.y = -Float4(2.0f) * Nc.z * Nc.y; R.z = Float4(1.0f) - Float4(2.0f) * Nc.z * Nc.z; } } else { R.x = Float4(0.0f); R.y = Float4(0.0f); R.z = Float4(0.0f); } R.z -= Float4(1.0f); R = normalize(R); R.x = Float4(0.5f) * R.x + Float4(0.5f); R.y = Float4(0.5f) * R.y + Float4(0.5f); R.z = Float4(1.0f); R.w = Float4(0.0f); o[T0 + stage].x = R.x; o[T0 + stage].y = R.y; o[T0 + stage].z = R.z; o[T0 + stage].w = R.w; } break; default: ASSERT(false); } Vector4f texTrans0; Vector4f texTrans1; Vector4f texTrans2; Vector4f texTrans3; Vector4f T; Vector4f t; T.x = o[T0 + stage].x; T.y = o[T0 + stage].y; T.z = o[T0 + stage].z; T.w = o[T0 + stage].w; switch(state.textureState[stage].textureTransformCountActive) { case 4: texTrans3.x = texTrans3.y = texTrans3.z = texTrans3.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][3])); // FIXME: Unpack texTrans3.x = texTrans3.x.xxxx; texTrans3.y = texTrans3.y.yyyy; texTrans3.z = texTrans3.z.zzzz; texTrans3.w = texTrans3.w.wwww; t.w = dot4(T, texTrans3); case 3: texTrans2.x = texTrans2.y = texTrans2.z = texTrans2.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][2])); // FIXME: Unpack texTrans2.x = texTrans2.x.xxxx; texTrans2.y = texTrans2.y.yyyy; texTrans2.z = texTrans2.z.zzzz; texTrans2.w = texTrans2.w.wwww; t.z = dot4(T, texTrans2); case 2: texTrans1.x = texTrans1.y = texTrans1.z = texTrans1.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][1])); // FIXME: Unpack texTrans1.x = texTrans1.x.xxxx; texTrans1.y = texTrans1.y.yyyy; texTrans1.z = texTrans1.z.zzzz; texTrans1.w = texTrans1.w.wwww; t.y = dot4(T, texTrans1); case 1: texTrans0.x = texTrans0.y = texTrans0.z = texTrans0.w = *Pointer<Float4>(data + OFFSET(DrawData,ff.textureTransform[stage][0])); // FIXME: Unpack texTrans0.x = texTrans0.x.xxxx; texTrans0.y = texTrans0.y.yyyy; texTrans0.z = texTrans0.z.zzzz; texTrans0.w = texTrans0.w.wwww; t.x = dot4(T, texTrans0); o[T0 + stage].x = t.x; o[T0 + stage].y = t.y; o[T0 + stage].z = t.z; o[T0 + stage].w = t.w; case 0: break; default: ASSERT(false); } } } void VertexPipeline::processPointSize() { if(!state.pointSizeActive) { return; // Use global pointsize } if(state.input[PointSize]) { o[Pts].y = v[PointSize].x; } else { o[Pts].y = *Pointer<Float4>(data + OFFSET(DrawData,point.pointSize)); } if(state.pointScaleActive && !state.preTransformed) { Vector4f p = transformBlend(v[Position], Pointer<Byte>(data + OFFSET(DrawData,ff.cameraTransformT)), true); Float4 d = Sqrt(dot3(p, p)); // FIXME: length(p); Float4 A = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleA)); // FIXME: Unpack Float4 B = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleB)); // FIXME: Unpack Float4 C = *Pointer<Float>(data + OFFSET(DrawData,point.pointScaleC)); // FIXME: Unpack A = RcpSqrt_pp(A + d * (B + d * C)); o[Pts].y = o[Pts].y * Float4(*Pointer<Float>(data + OFFSET(DrawData,viewportHeight))) * A; // FIXME: Unpack } } Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, bool homogeneous) { Vector4f dst; if(homogeneous) { Float4 m[4][4]; for(int j = 0; j < 4; j++) { for(int i = 0; i < 4; i++) { m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); } } dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + src.w * m[0][3]; dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + src.w * m[1][3]; dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + src.w * m[2][3]; dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + src.w * m[3][3]; } else { Float4 m[3][3]; for(int j = 0; j < 3; j++) { for(int i = 0; i < 3; i++) { m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j); m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j); m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j); m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j); } } dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; } return dst; } Vector4f VertexPipeline::transform(const Register &src, const Pointer<Byte> &matrix, UInt index[4], bool homogeneous) { Vector4f dst; if(homogeneous) { Float4 m[4][4]; for(int j = 0; j < 4; j++) { for(int i = 0; i < 4; i++) { m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); } } dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2] + m[0][3]; dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2] + m[1][3]; dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2] + m[2][3]; dst.w = src.x * m[3][0] + src.y * m[3][1] + src.z * m[3][2] + m[3][3]; } else { Float4 m[3][3]; for(int j = 0; j < 3; j++) { for(int i = 0; i < 3; i++) { m[j][i].x = *Pointer<Float>(matrix + 16 * i + 4 * j + index[0]); m[j][i].y = *Pointer<Float>(matrix + 16 * i + 4 * j + index[1]); m[j][i].z = *Pointer<Float>(matrix + 16 * i + 4 * j + index[2]); m[j][i].w = *Pointer<Float>(matrix + 16 * i + 4 * j + index[3]); } } dst.x = src.x * m[0][0] + src.y * m[0][1] + src.z * m[0][2]; dst.y = src.x * m[1][0] + src.y * m[1][1] + src.z * m[1][2]; dst.z = src.x * m[2][0] + src.y * m[2][1] + src.z * m[2][2]; } return dst; } Vector4f VertexPipeline::normalize(Vector4f &src) { Vector4f dst; Float4 rcpLength = RcpSqrt_pp(dot3(src, src)); dst.x = src.x * rcpLength; dst.y = src.y * rcpLength; dst.z = src.z * rcpLength; return dst; } Float4 VertexPipeline::power(Float4 &src0, Float4 &src1) { Float4 dst = src0; dst = dst * dst; dst = dst * dst; dst = Float4(As<Int4>(dst) - As<Int4>(Float4(1.0f))); dst *= src1; dst = As<Float4>(Int4(dst) + As<Int4>(Float4(1.0f))); dst = RcpSqrt_pp(dst); dst = RcpSqrt_pp(dst); return dst; } }