/* * Copyright 2012 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "GrGaussianConvolutionFragmentProcessor.h" #include "GrTexture.h" #include "GrTextureProxy.h" #include "glsl/GrGLSLFragmentProcessor.h" #include "glsl/GrGLSLFragmentShaderBuilder.h" #include "glsl/GrGLSLProgramDataManager.h" #include "glsl/GrGLSLUniformHandler.h" // For brevity using UniformHandle = GrGLSLProgramDataManager::UniformHandle; using Direction = GrGaussianConvolutionFragmentProcessor::Direction; class GrGLConvolutionEffect : public GrGLSLFragmentProcessor { public: void emitCode(EmitArgs&) override; static inline void GenKey(const GrProcessor&, const GrShaderCaps&, GrProcessorKeyBuilder*); protected: void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override; private: UniformHandle fKernelUni; UniformHandle fImageIncrementUni; UniformHandle fBoundsUni; typedef GrGLSLFragmentProcessor INHERITED; }; void GrGLConvolutionEffect::emitCode(EmitArgs& args) { const GrGaussianConvolutionFragmentProcessor& ce = args.fFp.cast<GrGaussianConvolutionFragmentProcessor>(); GrGLSLUniformHandler* uniformHandler = args.fUniformHandler; fImageIncrementUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kHalf2_GrSLType, "ImageIncrement"); if (ce.useBounds()) { fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kHalf2_GrSLType, "Bounds"); } int width = ce.width(); int arrayCount = (width + 3) / 4; SkASSERT(4 * arrayCount >= width); fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag, kHalf4_GrSLType, "Kernel", arrayCount); GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder; SkString coords2D = fragBuilder->ensureCoords2D(args.fTransformedCoords[0]); fragBuilder->codeAppendf("%s = half4(0, 0, 0, 0);", args.fOutputColor); const GrShaderVar& kernel = uniformHandler->getUniformVariable(fKernelUni); const char* imgInc = uniformHandler->getUniformCStr(fImageIncrementUni); fragBuilder->codeAppendf("float2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), imgInc); fragBuilder->codeAppend("float2 coordSampled = half2(0, 0);"); // Manually unroll loop because some drivers don't; yields 20-30% speedup. const char* kVecSuffix[4] = {".x", ".y", ".z", ".w"}; for (int i = 0; i < width; i++) { SkString index; SkString kernelIndex; index.appendS32(i / 4); kernel.appendArrayAccess(index.c_str(), &kernelIndex); kernelIndex.append(kVecSuffix[i & 0x3]); fragBuilder->codeAppend("coordSampled = coord;"); if (ce.useBounds()) { // We used to compute a bool indicating whether we're in bounds or not, cast it to a // float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems // to have a bug that caused corruption. const char* bounds = uniformHandler->getUniformCStr(fBoundsUni); const char* component = ce.direction() == Direction::kY ? "y" : "x"; switch (ce.mode()) { case GrTextureDomain::kClamp_Mode: { fragBuilder->codeAppendf("coordSampled.%s = clamp(coord.%s, %s.x, %s.y);\n", component, component, bounds, bounds); break; } case GrTextureDomain::kRepeat_Mode: { fragBuilder->codeAppendf("coordSampled.%s = " "mod(coord.%s - %s.x, %s.y - %s.x) + %s.x;\n", component, component, bounds, bounds, bounds, bounds); break; } case GrTextureDomain::kDecal_Mode: { fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {", component, bounds, component, bounds); break; } default: { SK_ABORT("Unsupported operation."); } } } fragBuilder->codeAppendf("%s += ", args.fOutputColor); fragBuilder->appendTextureLookup(args.fTexSamplers[0], "coordSampled"); fragBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str()); if (GrTextureDomain::kDecal_Mode == ce.mode()) { fragBuilder->codeAppend("}"); } fragBuilder->codeAppendf("coord += %s;\n", imgInc); } fragBuilder->codeAppendf("%s *= %s;\n", args.fOutputColor, args.fInputColor); } void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman, const GrFragmentProcessor& processor) { const GrGaussianConvolutionFragmentProcessor& conv = processor.cast<GrGaussianConvolutionFragmentProcessor>(); GrSurfaceProxy* proxy = conv.textureSampler(0).proxy(); GrTexture& texture = *proxy->peekTexture(); float imageIncrement[2] = {0}; float ySign = proxy->origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f; switch (conv.direction()) { case Direction::kX: imageIncrement[0] = 1.0f / texture.width(); break; case Direction::kY: imageIncrement[1] = ySign / texture.height(); break; default: SK_ABORT("Unknown filter direction."); } pdman.set2fv(fImageIncrementUni, 1, imageIncrement); if (conv.useBounds()) { float bounds[2] = {0}; bounds[0] = conv.bounds()[0]; bounds[1] = conv.bounds()[1]; if (GrTextureDomain::kClamp_Mode == conv.mode()) { bounds[0] += SK_ScalarHalf; bounds[1] -= SK_ScalarHalf; } if (Direction::kX == conv.direction()) { SkScalar inv = SkScalarInvert(SkIntToScalar(texture.width())); bounds[0] *= inv; bounds[1] *= inv; } else { SkScalar inv = SkScalarInvert(SkIntToScalar(texture.height())); if (proxy->origin() != kTopLeft_GrSurfaceOrigin) { float tmp = bounds[0]; bounds[0] = 1.0f - (inv * bounds[1]); bounds[1] = 1.0f - (inv * tmp); } else { bounds[0] *= inv; bounds[1] *= inv; } } SkASSERT(bounds[0] <= bounds[1]); pdman.set2f(fBoundsUni, bounds[0], bounds[1]); } int width = conv.width(); int arrayCount = (width + 3) / 4; SkASSERT(4 * arrayCount >= width); pdman.set4fv(fKernelUni, arrayCount, conv.kernel()); } void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrShaderCaps&, GrProcessorKeyBuilder* b) { const GrGaussianConvolutionFragmentProcessor& conv = processor.cast<GrGaussianConvolutionFragmentProcessor>(); uint32_t key = conv.radius(); key <<= 3; key |= Direction::kY == conv.direction() ? 0x4 : 0x0; key |= static_cast<uint32_t>(conv.mode()); b->add32(key); } /////////////////////////////////////////////////////////////////////////////// static void fill_in_1D_gaussian_kernel(float* kernel, int width, float gaussianSigma, int radius) { const float twoSigmaSqrd = 2.0f * gaussianSigma * gaussianSigma; if (SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero)) { for (int i = 0; i < width; ++i) { kernel[i] = 0.0f; } return; } const float denom = 1.0f / twoSigmaSqrd; float sum = 0.0f; for (int i = 0; i < width; ++i) { float x = static_cast<float>(i - radius); // Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian // is dropped here, since we renormalize the kernel below. kernel[i] = sk_float_exp(-x * x * denom); sum += kernel[i]; } // Normalize the kernel float scale = 1.0f / sum; for (int i = 0; i < width; ++i) { kernel[i] *= scale; } } GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor( sk_sp<GrTextureProxy> proxy, Direction direction, int radius, float gaussianSigma, GrTextureDomain::Mode mode, int bounds[2]) : INHERITED(kGrGaussianConvolutionFragmentProcessor_ClassID, ModulateForSamplerOptFlags(proxy->config(), mode == GrTextureDomain::kDecal_Mode)) , fCoordTransform(proxy.get()) , fTextureSampler(std::move(proxy)) , fRadius(radius) , fDirection(direction) , fMode(mode) { // Make sure the sampler's ctor uses the clamp wrap mode SkASSERT(fTextureSampler.samplerState().wrapModeX() == GrSamplerState::WrapMode::kClamp && fTextureSampler.samplerState().wrapModeY() == GrSamplerState::WrapMode::kClamp); this->addCoordTransform(&fCoordTransform); this->setTextureSamplerCnt(1); SkASSERT(radius <= kMaxKernelRadius); fill_in_1D_gaussian_kernel(fKernel, this->width(), gaussianSigma, this->radius()); memcpy(fBounds, bounds, sizeof(fBounds)); } GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor( const GrGaussianConvolutionFragmentProcessor& that) : INHERITED(kGrGaussianConvolutionFragmentProcessor_ClassID, that.optimizationFlags()) , fCoordTransform(that.fCoordTransform) , fTextureSampler(that.fTextureSampler) , fRadius(that.fRadius) , fDirection(that.fDirection) , fMode(that.fMode) { this->addCoordTransform(&fCoordTransform); this->setTextureSamplerCnt(1); memcpy(fKernel, that.fKernel, that.width() * sizeof(float)); memcpy(fBounds, that.fBounds, sizeof(fBounds)); } void GrGaussianConvolutionFragmentProcessor::onGetGLSLProcessorKey(const GrShaderCaps& caps, GrProcessorKeyBuilder* b) const { GrGLConvolutionEffect::GenKey(*this, caps, b); } GrGLSLFragmentProcessor* GrGaussianConvolutionFragmentProcessor::onCreateGLSLInstance() const { return new GrGLConvolutionEffect; } bool GrGaussianConvolutionFragmentProcessor::onIsEqual(const GrFragmentProcessor& sBase) const { const GrGaussianConvolutionFragmentProcessor& s = sBase.cast<GrGaussianConvolutionFragmentProcessor>(); return (this->radius() == s.radius() && this->direction() == s.direction() && this->mode() == s.mode() && 0 == memcmp(fBounds, s.fBounds, sizeof(fBounds)) && 0 == memcmp(fKernel, s.fKernel, this->width() * sizeof(float))); } /////////////////////////////////////////////////////////////////////////////// GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrGaussianConvolutionFragmentProcessor); #if GR_TEST_UTILS std::unique_ptr<GrFragmentProcessor> GrGaussianConvolutionFragmentProcessor::TestCreate( GrProcessorTestData* d) { int texIdx = d->fRandom->nextBool() ? GrProcessorUnitTest::kSkiaPMTextureIdx : GrProcessorUnitTest::kAlphaTextureIdx; sk_sp<GrTextureProxy> proxy = d->textureProxy(texIdx); int bounds[2]; int modeIdx = d->fRandom->nextRangeU(0, GrTextureDomain::kModeCount-1); Direction dir; if (d->fRandom->nextBool()) { dir = Direction::kX; bounds[0] = d->fRandom->nextRangeU(0, proxy->width()-2); bounds[1] = d->fRandom->nextRangeU(bounds[0]+1, proxy->width()-1); } else { dir = Direction::kY; bounds[0] = d->fRandom->nextRangeU(0, proxy->height()-2); bounds[1] = d->fRandom->nextRangeU(bounds[0]+1, proxy->height()-1); } int radius = d->fRandom->nextRangeU(1, kMaxKernelRadius); float sigma = radius / 3.f; return GrGaussianConvolutionFragmentProcessor::Make( d->textureProxy(texIdx), dir, radius, sigma, static_cast<GrTextureDomain::Mode>(modeIdx), bounds); } #endif