/*
* Copyright 2012 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "GrGaussianConvolutionFragmentProcessor.h"
#include "GrTexture.h"
#include "GrTextureProxy.h"
#include "glsl/GrGLSLFragmentProcessor.h"
#include "glsl/GrGLSLFragmentShaderBuilder.h"
#include "glsl/GrGLSLProgramDataManager.h"
#include "glsl/GrGLSLUniformHandler.h"
// For brevity
using UniformHandle = GrGLSLProgramDataManager::UniformHandle;
using Direction = GrGaussianConvolutionFragmentProcessor::Direction;
class GrGLConvolutionEffect : public GrGLSLFragmentProcessor {
public:
void emitCode(EmitArgs&) override;
static inline void GenKey(const GrProcessor&, const GrShaderCaps&, GrProcessorKeyBuilder*);
protected:
void onSetData(const GrGLSLProgramDataManager&, const GrFragmentProcessor&) override;
private:
UniformHandle fKernelUni;
UniformHandle fImageIncrementUni;
UniformHandle fBoundsUni;
typedef GrGLSLFragmentProcessor INHERITED;
};
void GrGLConvolutionEffect::emitCode(EmitArgs& args) {
const GrGaussianConvolutionFragmentProcessor& ce =
args.fFp.cast<GrGaussianConvolutionFragmentProcessor>();
GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
fImageIncrementUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kHalf2_GrSLType,
"ImageIncrement");
if (ce.useBounds()) {
fBoundsUni = uniformHandler->addUniform(kFragment_GrShaderFlag, kHalf2_GrSLType,
"Bounds");
}
int width = ce.width();
int arrayCount = (width + 3) / 4;
SkASSERT(4 * arrayCount >= width);
fKernelUni = uniformHandler->addUniformArray(kFragment_GrShaderFlag, kHalf4_GrSLType,
"Kernel", arrayCount);
GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
SkString coords2D = fragBuilder->ensureCoords2D(args.fTransformedCoords[0]);
fragBuilder->codeAppendf("%s = half4(0, 0, 0, 0);", args.fOutputColor);
const GrShaderVar& kernel = uniformHandler->getUniformVariable(fKernelUni);
const char* imgInc = uniformHandler->getUniformCStr(fImageIncrementUni);
fragBuilder->codeAppendf("float2 coord = %s - %d.0 * %s;", coords2D.c_str(), ce.radius(), imgInc);
fragBuilder->codeAppend("float2 coordSampled = half2(0, 0);");
// Manually unroll loop because some drivers don't; yields 20-30% speedup.
const char* kVecSuffix[4] = {".x", ".y", ".z", ".w"};
for (int i = 0; i < width; i++) {
SkString index;
SkString kernelIndex;
index.appendS32(i / 4);
kernel.appendArrayAccess(index.c_str(), &kernelIndex);
kernelIndex.append(kVecSuffix[i & 0x3]);
fragBuilder->codeAppend("coordSampled = coord;");
if (ce.useBounds()) {
// We used to compute a bool indicating whether we're in bounds or not, cast it to a
// float, and then mul weight*texture_sample by the float. However, the Adreno 430 seems
// to have a bug that caused corruption.
const char* bounds = uniformHandler->getUniformCStr(fBoundsUni);
const char* component = ce.direction() == Direction::kY ? "y" : "x";
switch (ce.mode()) {
case GrTextureDomain::kClamp_Mode: {
fragBuilder->codeAppendf("coordSampled.%s = clamp(coord.%s, %s.x, %s.y);\n",
component, component, bounds, bounds);
break;
}
case GrTextureDomain::kRepeat_Mode: {
fragBuilder->codeAppendf("coordSampled.%s = "
"mod(coord.%s - %s.x, %s.y - %s.x) + %s.x;\n",
component, component, bounds, bounds, bounds, bounds);
break;
}
case GrTextureDomain::kDecal_Mode: {
fragBuilder->codeAppendf("if (coord.%s >= %s.x && coord.%s <= %s.y) {",
component, bounds, component, bounds);
break;
}
default: {
SK_ABORT("Unsupported operation.");
}
}
}
fragBuilder->codeAppendf("%s += ", args.fOutputColor);
fragBuilder->appendTextureLookup(args.fTexSamplers[0], "coordSampled");
fragBuilder->codeAppendf(" * %s;\n", kernelIndex.c_str());
if (GrTextureDomain::kDecal_Mode == ce.mode()) {
fragBuilder->codeAppend("}");
}
fragBuilder->codeAppendf("coord += %s;\n", imgInc);
}
fragBuilder->codeAppendf("%s *= %s;\n", args.fOutputColor, args.fInputColor);
}
void GrGLConvolutionEffect::onSetData(const GrGLSLProgramDataManager& pdman,
const GrFragmentProcessor& processor) {
const GrGaussianConvolutionFragmentProcessor& conv =
processor.cast<GrGaussianConvolutionFragmentProcessor>();
GrSurfaceProxy* proxy = conv.textureSampler(0).proxy();
GrTexture& texture = *proxy->peekTexture();
float imageIncrement[2] = {0};
float ySign = proxy->origin() != kTopLeft_GrSurfaceOrigin ? 1.0f : -1.0f;
switch (conv.direction()) {
case Direction::kX:
imageIncrement[0] = 1.0f / texture.width();
break;
case Direction::kY:
imageIncrement[1] = ySign / texture.height();
break;
default:
SK_ABORT("Unknown filter direction.");
}
pdman.set2fv(fImageIncrementUni, 1, imageIncrement);
if (conv.useBounds()) {
float bounds[2] = {0};
bounds[0] = conv.bounds()[0];
bounds[1] = conv.bounds()[1];
if (GrTextureDomain::kClamp_Mode == conv.mode()) {
bounds[0] += SK_ScalarHalf;
bounds[1] -= SK_ScalarHalf;
}
if (Direction::kX == conv.direction()) {
SkScalar inv = SkScalarInvert(SkIntToScalar(texture.width()));
bounds[0] *= inv;
bounds[1] *= inv;
} else {
SkScalar inv = SkScalarInvert(SkIntToScalar(texture.height()));
if (proxy->origin() != kTopLeft_GrSurfaceOrigin) {
float tmp = bounds[0];
bounds[0] = 1.0f - (inv * bounds[1]);
bounds[1] = 1.0f - (inv * tmp);
} else {
bounds[0] *= inv;
bounds[1] *= inv;
}
}
SkASSERT(bounds[0] <= bounds[1]);
pdman.set2f(fBoundsUni, bounds[0], bounds[1]);
}
int width = conv.width();
int arrayCount = (width + 3) / 4;
SkASSERT(4 * arrayCount >= width);
pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
}
void GrGLConvolutionEffect::GenKey(const GrProcessor& processor, const GrShaderCaps&,
GrProcessorKeyBuilder* b) {
const GrGaussianConvolutionFragmentProcessor& conv =
processor.cast<GrGaussianConvolutionFragmentProcessor>();
uint32_t key = conv.radius();
key <<= 3;
key |= Direction::kY == conv.direction() ? 0x4 : 0x0;
key |= static_cast<uint32_t>(conv.mode());
b->add32(key);
}
///////////////////////////////////////////////////////////////////////////////
static void fill_in_1D_gaussian_kernel(float* kernel, int width, float gaussianSigma, int radius) {
const float twoSigmaSqrd = 2.0f * gaussianSigma * gaussianSigma;
if (SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero)) {
for (int i = 0; i < width; ++i) {
kernel[i] = 0.0f;
}
return;
}
const float denom = 1.0f / twoSigmaSqrd;
float sum = 0.0f;
for (int i = 0; i < width; ++i) {
float x = static_cast<float>(i - radius);
// Note that the constant term (1/(sqrt(2*pi*sigma^2)) of the Gaussian
// is dropped here, since we renormalize the kernel below.
kernel[i] = sk_float_exp(-x * x * denom);
sum += kernel[i];
}
// Normalize the kernel
float scale = 1.0f / sum;
for (int i = 0; i < width; ++i) {
kernel[i] *= scale;
}
}
GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor(
sk_sp<GrTextureProxy> proxy,
Direction direction,
int radius,
float gaussianSigma,
GrTextureDomain::Mode mode,
int bounds[2])
: INHERITED(kGrGaussianConvolutionFragmentProcessor_ClassID,
ModulateForSamplerOptFlags(proxy->config(),
mode == GrTextureDomain::kDecal_Mode))
, fCoordTransform(proxy.get())
, fTextureSampler(std::move(proxy))
, fRadius(radius)
, fDirection(direction)
, fMode(mode) {
// Make sure the sampler's ctor uses the clamp wrap mode
SkASSERT(fTextureSampler.samplerState().wrapModeX() == GrSamplerState::WrapMode::kClamp &&
fTextureSampler.samplerState().wrapModeY() == GrSamplerState::WrapMode::kClamp);
this->addCoordTransform(&fCoordTransform);
this->setTextureSamplerCnt(1);
SkASSERT(radius <= kMaxKernelRadius);
fill_in_1D_gaussian_kernel(fKernel, this->width(), gaussianSigma, this->radius());
memcpy(fBounds, bounds, sizeof(fBounds));
}
GrGaussianConvolutionFragmentProcessor::GrGaussianConvolutionFragmentProcessor(
const GrGaussianConvolutionFragmentProcessor& that)
: INHERITED(kGrGaussianConvolutionFragmentProcessor_ClassID, that.optimizationFlags())
, fCoordTransform(that.fCoordTransform)
, fTextureSampler(that.fTextureSampler)
, fRadius(that.fRadius)
, fDirection(that.fDirection)
, fMode(that.fMode) {
this->addCoordTransform(&fCoordTransform);
this->setTextureSamplerCnt(1);
memcpy(fKernel, that.fKernel, that.width() * sizeof(float));
memcpy(fBounds, that.fBounds, sizeof(fBounds));
}
void GrGaussianConvolutionFragmentProcessor::onGetGLSLProcessorKey(const GrShaderCaps& caps,
GrProcessorKeyBuilder* b) const {
GrGLConvolutionEffect::GenKey(*this, caps, b);
}
GrGLSLFragmentProcessor* GrGaussianConvolutionFragmentProcessor::onCreateGLSLInstance() const {
return new GrGLConvolutionEffect;
}
bool GrGaussianConvolutionFragmentProcessor::onIsEqual(const GrFragmentProcessor& sBase) const {
const GrGaussianConvolutionFragmentProcessor& s =
sBase.cast<GrGaussianConvolutionFragmentProcessor>();
return (this->radius() == s.radius() && this->direction() == s.direction() &&
this->mode() == s.mode() &&
0 == memcmp(fBounds, s.fBounds, sizeof(fBounds)) &&
0 == memcmp(fKernel, s.fKernel, this->width() * sizeof(float)));
}
///////////////////////////////////////////////////////////////////////////////
GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrGaussianConvolutionFragmentProcessor);
#if GR_TEST_UTILS
std::unique_ptr<GrFragmentProcessor> GrGaussianConvolutionFragmentProcessor::TestCreate(
GrProcessorTestData* d) {
int texIdx = d->fRandom->nextBool() ? GrProcessorUnitTest::kSkiaPMTextureIdx
: GrProcessorUnitTest::kAlphaTextureIdx;
sk_sp<GrTextureProxy> proxy = d->textureProxy(texIdx);
int bounds[2];
int modeIdx = d->fRandom->nextRangeU(0, GrTextureDomain::kModeCount-1);
Direction dir;
if (d->fRandom->nextBool()) {
dir = Direction::kX;
bounds[0] = d->fRandom->nextRangeU(0, proxy->width()-2);
bounds[1] = d->fRandom->nextRangeU(bounds[0]+1, proxy->width()-1);
} else {
dir = Direction::kY;
bounds[0] = d->fRandom->nextRangeU(0, proxy->height()-2);
bounds[1] = d->fRandom->nextRangeU(bounds[0]+1, proxy->height()-1);
}
int radius = d->fRandom->nextRangeU(1, kMaxKernelRadius);
float sigma = radius / 3.f;
return GrGaussianConvolutionFragmentProcessor::Make(
d->textureProxy(texIdx),
dir, radius, sigma, static_cast<GrTextureDomain::Mode>(modeIdx), bounds);
}
#endif