/*
 * Copyright 2016 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "SkArenaAlloc.h"
#include "SkBlendModePriv.h"
#include "SkBlitter.h"
#include "SkColor.h"
#include "SkColorFilter.h"
#include "SkColorSpacePriv.h"
#include "SkColorSpaceXformer.h"
#include "SkColorSpaceXformSteps.h"
#include "SkOpts.h"
#include "SkRasterPipeline.h"
#include "SkShader.h"
#include "SkShaderBase.h"
#include "SkTo.h"
#include "SkUtils.h"

class SkRasterPipelineBlitter final : public SkBlitter {
public:
    // This is our common entrypoint for creating the blitter once we've sorted out shaders.
    static SkBlitter* Create(const SkPixmap&, const SkPaint&, SkArenaAlloc*,
                             const SkRasterPipeline& shaderPipeline,
                             bool is_opaque, bool is_constant);

    SkRasterPipelineBlitter(SkPixmap dst,
                            SkBlendMode blend,
                            SkArenaAlloc* alloc)
        : fDst(dst)
        , fBlend(blend)
        , fAlloc(alloc)
        , fColorPipeline(alloc)
    {}

    void blitH     (int x, int y, int w)                            override;
    void blitAntiH (int x, int y, const SkAlpha[], const int16_t[]) override;
    void blitAntiH2(int x, int y, U8CPU a0, U8CPU a1)               override;
    void blitAntiV2(int x, int y, U8CPU a0, U8CPU a1)               override;
    void blitMask  (const SkMask&, const SkIRect& clip)             override;
    void blitRect  (int x, int y, int width, int height)            override;
    void blitV     (int x, int y, int height, SkAlpha alpha)        override;

private:
    void append_load_dst      (SkRasterPipeline*) const;
    void append_store         (SkRasterPipeline*) const;

    SkPixmap               fDst;
    SkBlendMode            fBlend;
    SkArenaAlloc*          fAlloc;
    SkRasterPipeline       fColorPipeline;

    SkRasterPipeline_MemoryCtx
        fDstPtr       = {nullptr,0},  // Always points to the top-left of fDst.
        fMaskPtr      = {nullptr,0};  // Updated each call to blitMask().
    SkRasterPipeline_EmbossCtx fEmbossCtx;  // Used only for k3D_Format masks.

    // We may be able to specialize blitH() or blitRect() into a memset.
    void   (*fMemset2D)(SkPixmap*, int x,int y, int w,int h, uint64_t color) = nullptr;
    uint64_t fMemsetColor = 0;   // Big enough for largest memsettable dst format, F16.

    // Built lazily on first use.
    std::function<void(size_t, size_t, size_t, size_t)> fBlitRect,
                                                        fBlitAntiH,
                                                        fBlitMaskA8,
                                                        fBlitMaskLCD16,
                                                        fBlitMask3D;

    // These values are pointed to by the blit pipelines above,
    // which allows us to adjust them from call to call.
    float fCurrentCoverage = 0.0f;
    float fDitherRate      = 0.0f;

    typedef SkBlitter INHERITED;
};

SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap& dst,
                                         const SkPaint& paint,
                                         const SkMatrix& ctm,
                                         SkArenaAlloc* alloc) {
    // For legacy/SkColorSpaceXformCanvas to keep working,
    // we need to sometimes still need to distinguish null dstCS from sRGB.
#if 0
    SkColorSpace* dstCS = dst.colorSpace() ? dst.colorSpace()
                                           : sk_srgb_singleton();
#else
    SkColorSpace* dstCS = dst.colorSpace();
#endif
    SkColorType dstCT = dst.colorType();
    SkColor4f paintColor = paint.getColor4f();
    SkColorSpaceXformSteps(sk_srgb_singleton(), kUnpremul_SkAlphaType,
                           dstCS,               kUnpremul_SkAlphaType).apply(paintColor.vec());

    auto shader = as_SB(paint.getShader());

    SkRasterPipeline_<256> shaderPipeline;
    if (!shader) {
        // Having no shader makes things nice and easy... just use the paint color.
        shaderPipeline.append_constant_color(alloc, paintColor.premul().vec());
        bool is_opaque    = paintColor.fA == 1.0f,
             is_constant  = true;
        return SkRasterPipelineBlitter::Create(dst, paint, alloc,
                                               shaderPipeline, is_opaque, is_constant);
    }

    bool is_opaque    = shader->isOpaque() && paintColor.fA == 1.0f;
    bool is_constant  = shader->isConstant();

    if (shader->appendStages({&shaderPipeline, alloc, dstCT, dstCS, paint, nullptr, ctm})) {
        if (paintColor.fA != 1.0f) {
            shaderPipeline.append(SkRasterPipeline::scale_1_float,
                                  alloc->make<float>(paintColor.fA));
        }
        return SkRasterPipelineBlitter::Create(dst, paint, alloc,
                                               shaderPipeline, is_opaque, is_constant);
    }

    // The shader has opted out of drawing anything.
    return alloc->make<SkNullBlitter>();
}

SkBlitter* SkCreateRasterPipelineBlitter(const SkPixmap& dst,
                                         const SkPaint& paint,
                                         const SkRasterPipeline& shaderPipeline,
                                         bool is_opaque,
                                         SkArenaAlloc* alloc) {
    bool is_constant = false;  // If this were the case, it'd be better to just set a paint color.
    return SkRasterPipelineBlitter::Create(dst, paint, alloc,
                                           shaderPipeline, is_opaque, is_constant);
}

SkBlitter* SkRasterPipelineBlitter::Create(const SkPixmap& dst,
                                           const SkPaint& paint,
                                           SkArenaAlloc* alloc,
                                           const SkRasterPipeline& shaderPipeline,
                                           bool is_opaque,
                                           bool is_constant) {
    auto blitter = alloc->make<SkRasterPipelineBlitter>(dst,
                                                        paint.getBlendMode(),
                                                        alloc);

    // Our job in this factory is to fill out the blitter's color pipeline.
    // This is the common front of the full blit pipelines, each constructed lazily on first use.
    // The full blit pipelines handle reading and writing the dst, blending, coverage, dithering.
    auto colorPipeline = &blitter->fColorPipeline;

    // Let's get the shader in first.
    colorPipeline->extend(shaderPipeline);

    // If there's a color filter it comes next.
    if (auto colorFilter = paint.getColorFilter()) {
        colorFilter->appendStages(colorPipeline, dst.colorSpace(), alloc, is_opaque);
        is_opaque = is_opaque && (colorFilter->getFlags() & SkColorFilter::kAlphaUnchanged_Flag);
    }

    // Not all formats make sense to dither (think, F16).  We set their dither rate
    // to zero.  We need to decide if we're going to dither now to keep is_constant accurate.
    if (paint.isDither()) {
        switch (dst.info().colorType()) {
            default:                        blitter->fDitherRate =      0.0f; break;
            case kARGB_4444_SkColorType:    blitter->fDitherRate =   1/15.0f; break;
            case   kRGB_565_SkColorType:    blitter->fDitherRate =   1/63.0f; break;
            case    kGray_8_SkColorType:
            case  kRGB_888x_SkColorType:
            case kRGBA_8888_SkColorType:
            case kBGRA_8888_SkColorType:    blitter->fDitherRate =  1/255.0f; break;
            case kRGB_101010x_SkColorType:
            case kRGBA_1010102_SkColorType: blitter->fDitherRate = 1/1023.0f; break;
        }
        // TODO: for constant colors, we could try to measure the effect of dithering, and if
        //       it has no value (i.e. all variations result in the same 32bit color, then we
        //       could disable it (for speed, by not adding the stage).
    }
    is_constant = is_constant && (blitter->fDitherRate == 0.0f);

    // We're logically done here.  The code between here and return blitter is all optimization.

    // A pipeline that's still constant here can collapse back into a constant color.
    if (is_constant) {
        SkColor4f constantColor;
        SkRasterPipeline_MemoryCtx constantColorPtr = { &constantColor, 0 };
        colorPipeline->append_gamut_clamp_if_normalized(dst.info());
        colorPipeline->append(SkRasterPipeline::store_f32, &constantColorPtr);
        colorPipeline->run(0,0,1,1);
        colorPipeline->reset();
        colorPipeline->append_constant_color(alloc, constantColor);

        is_opaque = constantColor.fA == 1.0f;
    }

    // We can strength-reduce SrcOver into Src when opaque.
    if (is_opaque && blitter->fBlend == SkBlendMode::kSrcOver) {
        blitter->fBlend = SkBlendMode::kSrc;
    }

    // When we're drawing a constant color in Src mode, we can sometimes just memset.
    // (The previous two optimizations help find more opportunities for this one.)
    if (is_constant && blitter->fBlend == SkBlendMode::kSrc) {
        // Run our color pipeline all the way through to produce what we'd memset when we can.
        // Not all blits can memset, so we need to keep colorPipeline too.
        SkRasterPipeline_<256> p;
        p.extend(*colorPipeline);
        p.append_gamut_clamp_if_normalized(dst.info());
        blitter->fDstPtr = SkRasterPipeline_MemoryCtx{&blitter->fMemsetColor, 0};
        blitter->append_store(&p);
        p.run(0,0,1,1);

        switch (blitter->fDst.shiftPerPixel()) {
            case 0: blitter->fMemset2D = [](SkPixmap* dst, int x,int y, int w,int h, uint64_t c) {
                void* p = dst->writable_addr(x,y);
                while (h --> 0) {
                    memset(p, c, w);
                    p = SkTAddOffset<void>(p, dst->rowBytes());
                }
            }; break;

            case 1: blitter->fMemset2D = [](SkPixmap* dst, int x,int y, int w,int h, uint64_t c) {
                uint16_t* p = dst->writable_addr16(x,y);
                auto fn = SkOpts::memset16;
                while (h --> 0) {
                    fn(p, c, w);
                    p = SkTAddOffset<uint16_t>(p, dst->rowBytes());
                }
            }; break;

            case 2: blitter->fMemset2D = [](SkPixmap* dst, int x,int y, int w,int h, uint64_t c) {
                uint32_t* p = dst->writable_addr32(x,y);
                auto fn = SkOpts::memset32;
                while (h --> 0) {
                    fn(p, c, w);
                    p = SkTAddOffset<uint32_t>(p, dst->rowBytes());
                }
            }; break;

            case 3: blitter->fMemset2D = [](SkPixmap* dst, int x,int y, int w,int h, uint64_t c) {
                uint64_t* p = dst->writable_addr64(x,y);
                auto fn = SkOpts::memset64;
                while (h --> 0) {
                    fn(p, c, w);
                    p = SkTAddOffset<uint64_t>(p, dst->rowBytes());
                }
            }; break;

            // TODO(F32)?
        }
    }

    blitter->fDstPtr = SkRasterPipeline_MemoryCtx{
        blitter->fDst.writable_addr(),
        blitter->fDst.rowBytesAsPixels(),
    };

    return blitter;
}

void SkRasterPipelineBlitter::append_load_dst(SkRasterPipeline* p) const {
    p->append_load_dst(fDst.info().colorType(), &fDstPtr);
    if (fDst.info().alphaType() == kUnpremul_SkAlphaType) {
        p->append(SkRasterPipeline::premul_dst);
    }
}

void SkRasterPipelineBlitter::append_store(SkRasterPipeline* p) const {
    if (fDst.info().alphaType() == kUnpremul_SkAlphaType) {
        p->append(SkRasterPipeline::unpremul);
    }
    if (fDitherRate > 0.0f) {
        p->append(SkRasterPipeline::dither, &fDitherRate);
    }

    p->append_store(fDst.info().colorType(), &fDstPtr);
}

void SkRasterPipelineBlitter::blitH(int x, int y, int w) {
    this->blitRect(x,y,w,1);
}

void SkRasterPipelineBlitter::blitRect(int x, int y, int w, int h) {
    if (fMemset2D) {
        fMemset2D(&fDst, x,y, w,h, fMemsetColor);
        return;
    }

    if (!fBlitRect) {
        SkRasterPipeline p(fAlloc);
        p.extend(fColorPipeline);
        p.append_gamut_clamp_if_normalized(fDst.info());
        if (fBlend == SkBlendMode::kSrcOver
                && (fDst.info().colorType() == kRGBA_8888_SkColorType ||
                    fDst.info().colorType() == kBGRA_8888_SkColorType)
                && !fDst.colorSpace()
                && fDst.info().alphaType() != kUnpremul_SkAlphaType
                && fDitherRate == 0.0f) {
            if (fDst.info().colorType() == kBGRA_8888_SkColorType) {
                p.append(SkRasterPipeline::swap_rb);
            }
            p.append(SkRasterPipeline::srcover_rgba_8888, &fDstPtr);
        } else {
            if (fBlend != SkBlendMode::kSrc) {
                this->append_load_dst(&p);
                SkBlendMode_AppendStages(fBlend, &p);
            }
            this->append_store(&p);
        }
        fBlitRect = p.compile();
    }

    fBlitRect(x,y,w,h);
}

void SkRasterPipelineBlitter::blitAntiH(int x, int y, const SkAlpha aa[], const int16_t runs[]) {
    if (!fBlitAntiH) {
        SkRasterPipeline p(fAlloc);
        p.extend(fColorPipeline);
        p.append_gamut_clamp_if_normalized(fDst.info());
        if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/false)) {
            p.append(SkRasterPipeline::scale_1_float, &fCurrentCoverage);
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
        } else {
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
            p.append(SkRasterPipeline::lerp_1_float, &fCurrentCoverage);
        }

        this->append_store(&p);
        fBlitAntiH = p.compile();
    }

    for (int16_t run = *runs; run > 0; run = *runs) {
        switch (*aa) {
            case 0x00:                       break;
            case 0xff: this->blitH(x,y,run); break;
            default:
                fCurrentCoverage = *aa * (1/255.0f);
                fBlitAntiH(x,y,run,1);
        }
        x    += run;
        runs += run;
        aa   += run;
    }
}

void SkRasterPipelineBlitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) {
    SkIRect clip = {x,y, x+2,y+1};
    uint8_t coverage[] = { (uint8_t)a0, (uint8_t)a1 };

    SkMask mask;
    mask.fImage    = coverage;
    mask.fBounds   = clip;
    mask.fRowBytes = 2;
    mask.fFormat   = SkMask::kA8_Format;

    this->blitMask(mask, clip);
}

void SkRasterPipelineBlitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) {
    SkIRect clip = {x,y, x+1,y+2};
    uint8_t coverage[] = { (uint8_t)a0, (uint8_t)a1 };

    SkMask mask;
    mask.fImage    = coverage;
    mask.fBounds   = clip;
    mask.fRowBytes = 1;
    mask.fFormat   = SkMask::kA8_Format;

    this->blitMask(mask, clip);
}

void SkRasterPipelineBlitter::blitV(int x, int y, int height, SkAlpha alpha) {
    SkIRect clip = {x,y, x+1,y+height};

    SkMask mask;
    mask.fImage    = &alpha;
    mask.fBounds   = clip;
    mask.fRowBytes = 0;     // so we reuse the 1 "row" for all of height
    mask.fFormat   = SkMask::kA8_Format;

    this->blitMask(mask, clip);
}

void SkRasterPipelineBlitter::blitMask(const SkMask& mask, const SkIRect& clip) {
    if (mask.fFormat == SkMask::kBW_Format) {
        // TODO: native BW masks?
        return INHERITED::blitMask(mask, clip);
    }

    // ARGB and SDF masks shouldn't make it here.
    SkASSERT(mask.fFormat == SkMask::kA8_Format
          || mask.fFormat == SkMask::kLCD16_Format
          || mask.fFormat == SkMask::k3D_Format);

    auto extract_mask_plane = [&mask](int plane, SkRasterPipeline_MemoryCtx* ctx) {
        // LCD is 16-bit per pixel; A8 and 3D are 8-bit per pixel.
        size_t bpp = mask.fFormat == SkMask::kLCD16_Format ? 2 : 1;

        // Select the right mask plane.  Usually plane == 0 and this is just mask.fImage.
        auto ptr = (uintptr_t)mask.fImage
                 + plane * mask.computeImageSize();

        // Update ctx to point "into" this current mask, but lined up with fDstPtr at (0,0).
        // This sort of trickery upsets UBSAN (pointer-overflow) so our ptr must be a uintptr_t.
        // mask.fRowBytes is a uint32_t, which would break our addressing math on 64-bit builds.
        size_t rowBytes = mask.fRowBytes;
        ctx->stride = rowBytes / bpp;
        ctx->pixels = (void*)(ptr - mask.fBounds.left() * bpp
                                  - mask.fBounds.top()  * rowBytes);
    };

    extract_mask_plane(0, &fMaskPtr);
    if (mask.fFormat == SkMask::k3D_Format) {
        extract_mask_plane(1, &fEmbossCtx.mul);
        extract_mask_plane(2, &fEmbossCtx.add);
    }

    // Lazily build whichever pipeline we need, specialized for each mask format.
    if (mask.fFormat == SkMask::kA8_Format && !fBlitMaskA8) {
        SkRasterPipeline p(fAlloc);
        p.extend(fColorPipeline);
        p.append_gamut_clamp_if_normalized(fDst.info());
        if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/false)) {
            p.append(SkRasterPipeline::scale_u8, &fMaskPtr);
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
        } else {
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
            p.append(SkRasterPipeline::lerp_u8, &fMaskPtr);
        }
        this->append_store(&p);
        fBlitMaskA8 = p.compile();
    }
    if (mask.fFormat == SkMask::kLCD16_Format && !fBlitMaskLCD16) {
        SkRasterPipeline p(fAlloc);
        p.extend(fColorPipeline);
        p.append_gamut_clamp_if_normalized(fDst.info());
        if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/true)) {
            // Somewhat unusually, scale_565 needs dst loaded first.
            this->append_load_dst(&p);
            p.append(SkRasterPipeline::scale_565, &fMaskPtr);
            SkBlendMode_AppendStages(fBlend, &p);
        } else {
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
            p.append(SkRasterPipeline::lerp_565, &fMaskPtr);
        }
        this->append_store(&p);
        fBlitMaskLCD16 = p.compile();
    }
    if (mask.fFormat == SkMask::k3D_Format && !fBlitMask3D) {
        SkRasterPipeline p(fAlloc);
        p.extend(fColorPipeline);
        // This bit is where we differ from kA8_Format:
        p.append(SkRasterPipeline::emboss, &fEmbossCtx);
        // Now onward just as kA8.
        p.append_gamut_clamp_if_normalized(fDst.info());
        if (SkBlendMode_ShouldPreScaleCoverage(fBlend, /*rgb_coverage=*/false)) {
            p.append(SkRasterPipeline::scale_u8, &fMaskPtr);
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
        } else {
            this->append_load_dst(&p);
            SkBlendMode_AppendStages(fBlend, &p);
            p.append(SkRasterPipeline::lerp_u8, &fMaskPtr);
        }
        this->append_store(&p);
        fBlitMask3D = p.compile();
    }

    std::function<void(size_t,size_t,size_t,size_t)>* blitter = nullptr;
    switch (mask.fFormat) {
        case SkMask::kA8_Format:    blitter = &fBlitMaskA8;    break;
        case SkMask::kLCD16_Format: blitter = &fBlitMaskLCD16; break;
        case SkMask::k3D_Format:    blitter = &fBlitMask3D;    break;
        default:
            SkASSERT(false);
            return;
    }

    SkASSERT(blitter);
    (*blitter)(clip.left(),clip.top(), clip.width(),clip.height());
}