/* * Copyright 2006 The Android Open Source Project * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "Sk4px.h" #include "SkColorData.h" #include "SkCoreBlitters.h" #include "SkShader.h" #include "SkUtils.h" #include "SkXfermodePriv.h" static inline int upscale_31_to_32(int value) { SkASSERT((unsigned)value <= 31); return value + (value >> 4); } static inline int blend_32(int src, int dst, int scale) { SkASSERT((unsigned)src <= 0xFF); SkASSERT((unsigned)dst <= 0xFF); SkASSERT((unsigned)scale <= 32); return dst + ((src - dst) * scale >> 5); } static inline SkPMColor blend_lcd16(int srcA, int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask) { if (mask == 0) { return dst; } /* We want all of these in 5bits, hence the shifts in case one of them * (green) is 6bits. */ int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5); int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5); int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5); // Now upscale them to 0..32, so we can use blend32 maskR = upscale_31_to_32(maskR); maskG = upscale_31_to_32(maskG); maskB = upscale_31_to_32(maskB); // srcA has been upscaled to 256 before passed into this function maskR = maskR * srcA >> 8; maskG = maskG * srcA >> 8; maskB = maskB * srcA >> 8; int dstR = SkGetPackedR32(dst); int dstG = SkGetPackedG32(dst); int dstB = SkGetPackedB32(dst); // LCD blitting is only supported if the dst is known/required // to be opaque return SkPackARGB32(0xFF, blend_32(srcR, dstR, maskR), blend_32(srcG, dstG, maskG), blend_32(srcB, dstB, maskB)); } static inline SkPMColor blend_lcd16_opaque(int srcR, int srcG, int srcB, SkPMColor dst, uint16_t mask, SkPMColor opaqueDst) { if (mask == 0) { return dst; } if (0xFFFF == mask) { return opaqueDst; } /* We want all of these in 5bits, hence the shifts in case one of them * (green) is 6bits. */ int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5); int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5); int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5); // Now upscale them to 0..32, so we can use blend32 maskR = upscale_31_to_32(maskR); maskG = upscale_31_to_32(maskG); maskB = upscale_31_to_32(maskB); int dstR = SkGetPackedR32(dst); int dstG = SkGetPackedG32(dst); int dstB = SkGetPackedB32(dst); // LCD blitting is only supported if the dst is known/required // to be opaque return SkPackARGB32(0xFF, blend_32(srcR, dstR, maskR), blend_32(srcG, dstG, maskG), blend_32(srcB, dstB, maskB)); } // TODO: rewrite at least the SSE code here. It's miserable. #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 #include <emmintrin.h> // The following (left) shifts cause the top 5 bits of the mask components to // line up with the corresponding components in an SkPMColor. // Note that the mask's RGB16 order may differ from the SkPMColor order. #define SK_R16x5_R32x5_SHIFT (SK_R32_SHIFT - SK_R16_SHIFT - SK_R16_BITS + 5) #define SK_G16x5_G32x5_SHIFT (SK_G32_SHIFT - SK_G16_SHIFT - SK_G16_BITS + 5) #define SK_B16x5_B32x5_SHIFT (SK_B32_SHIFT - SK_B16_SHIFT - SK_B16_BITS + 5) #if SK_R16x5_R32x5_SHIFT == 0 #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (x) #elif SK_R16x5_R32x5_SHIFT > 0 #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_slli_epi32(x, SK_R16x5_R32x5_SHIFT)) #else #define SkPackedR16x5ToUnmaskedR32x5_SSE2(x) (_mm_srli_epi32(x, -SK_R16x5_R32x5_SHIFT)) #endif #if SK_G16x5_G32x5_SHIFT == 0 #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (x) #elif SK_G16x5_G32x5_SHIFT > 0 #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_slli_epi32(x, SK_G16x5_G32x5_SHIFT)) #else #define SkPackedG16x5ToUnmaskedG32x5_SSE2(x) (_mm_srli_epi32(x, -SK_G16x5_G32x5_SHIFT)) #endif #if SK_B16x5_B32x5_SHIFT == 0 #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (x) #elif SK_B16x5_B32x5_SHIFT > 0 #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_slli_epi32(x, SK_B16x5_B32x5_SHIFT)) #else #define SkPackedB16x5ToUnmaskedB32x5_SSE2(x) (_mm_srli_epi32(x, -SK_B16x5_B32x5_SHIFT)) #endif static __m128i blend_lcd16_sse2(__m128i &src, __m128i &dst, __m128i &mask, __m128i &srcA) { // In the following comments, the components of src, dst and mask are // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked // by an R, G, B, or A suffix. Components of one of the four pixels that // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for // example is the blue channel of the second destination pixel. Memory // layout is shown for an ARGB byte order in a color value. // src and srcA store 8-bit values interleaved with zeros. // src = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0) // srcA = (srcA, 0, srcA, 0, srcA, 0, srcA, 0, // srcA, 0, srcA, 0, srcA, 0, srcA, 0) // mask stores 16-bit values (compressed three channels) interleaved with zeros. // Lo and Hi denote the low and high bytes of a 16-bit value, respectively. // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0, // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0) // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits. // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0) __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask), _mm_set1_epi32(0x1F << SK_R32_SHIFT)); // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0) __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask), _mm_set1_epi32(0x1F << SK_G32_SHIFT)); // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B) __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask), _mm_set1_epi32(0x1F << SK_B32_SHIFT)); // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3) // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an // 8-bit position // mask = (0, m0R, m0G, m0B, 0, m1R, m1G, m1B, // 0, m2R, m2G, m2B, 0, m3R, m3G, m3B) mask = _mm_or_si128(_mm_or_si128(r, g), b); // Interleave R,G,B into the lower byte of word. // i.e. split the sixteen 8-bit values from mask into two sets of eight // 16-bit values, padded by zero. __m128i maskLo, maskHi; // maskLo = (0, 0, m0R, 0, m0G, 0, m0B, 0, 0, 0, m1R, 0, m1G, 0, m1B, 0) maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); // maskHi = (0, 0, m2R, 0, m2G, 0, m2B, 0, 0, 0, m3R, 0, m3G, 0, m3B, 0) maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128()); // Upscale from 0..31 to 0..32 // (allows to replace division by left-shift further down) // Left-shift each component by 4 and add the result back to that component, // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32 maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4)); maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4)); // Multiply each component of maskLo and maskHi by srcA maskLo = _mm_mullo_epi16(maskLo, srcA); maskHi = _mm_mullo_epi16(maskHi, srcA); // Left shift mask components by 8 (divide by 256) maskLo = _mm_srli_epi16(maskLo, 8); maskHi = _mm_srli_epi16(maskHi, 8); // Interleave R,G,B into the lower byte of the word // dstLo = (0, 0, d0R, 0, d0G, 0, d0B, 0, 0, 0, d1R, 0, d1G, 0, d1B, 0) __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128()); // dstLo = (0, 0, d2R, 0, d2G, 0, d2B, 0, 0, 0, d3R, 0, d3G, 0, d3B, 0) __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128()); // mask = (src - dst) * mask maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo)); maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi)); // mask = (src - dst) * mask >> 5 maskLo = _mm_srai_epi16(maskLo, 5); maskHi = _mm_srai_epi16(maskHi, 5); // Add two pixels into result. // result = dst + ((src - dst) * mask >> 5) __m128i resultLo = _mm_add_epi16(dstLo, maskLo); __m128i resultHi = _mm_add_epi16(dstHi, maskHi); // Pack into 4 32bit dst pixels. // resultLo and resultHi contain eight 16-bit components (two pixels) each. // Merge into one SSE regsiter with sixteen 8-bit values (four pixels), // clamping to 255 if necessary. return _mm_packus_epi16(resultLo, resultHi); } static __m128i blend_lcd16_opaque_sse2(__m128i &src, __m128i &dst, __m128i &mask) { // In the following comments, the components of src, dst and mask are // abbreviated as (s)rc, (d)st, and (m)ask. Color components are marked // by an R, G, B, or A suffix. Components of one of the four pixels that // are processed in parallel are marked with 0, 1, 2, and 3. "d1B", for // example is the blue channel of the second destination pixel. Memory // layout is shown for an ARGB byte order in a color value. // src and srcA store 8-bit values interleaved with zeros. // src = (0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0) // mask stores 16-bit values (shown as high and low bytes) interleaved with // zeros // mask = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0, // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0) // Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits. // r = (0, m0R, 0, 0, 0, m1R, 0, 0, 0, m2R, 0, 0, 0, m3R, 0, 0) __m128i r = _mm_and_si128(SkPackedR16x5ToUnmaskedR32x5_SSE2(mask), _mm_set1_epi32(0x1F << SK_R32_SHIFT)); // g = (0, 0, m0G, 0, 0, 0, m1G, 0, 0, 0, m2G, 0, 0, 0, m3G, 0) __m128i g = _mm_and_si128(SkPackedG16x5ToUnmaskedG32x5_SSE2(mask), _mm_set1_epi32(0x1F << SK_G32_SHIFT)); // b = (0, 0, 0, m0B, 0, 0, 0, m1B, 0, 0, 0, m2B, 0, 0, 0, m3B) __m128i b = _mm_and_si128(SkPackedB16x5ToUnmaskedB32x5_SSE2(mask), _mm_set1_epi32(0x1F << SK_B32_SHIFT)); // Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3) // Each component (m0R, m0G, etc.) is then a 5-bit value aligned to an // 8-bit position // mask = (0, m0R, m0G, m0B, 0, m1R, m1G, m1B, // 0, m2R, m2G, m2B, 0, m3R, m3G, m3B) mask = _mm_or_si128(_mm_or_si128(r, g), b); // Interleave R,G,B into the lower byte of word. // i.e. split the sixteen 8-bit values from mask into two sets of eight // 16-bit values, padded by zero. __m128i maskLo, maskHi; // maskLo = (0, 0, m0R, 0, m0G, 0, m0B, 0, 0, 0, m1R, 0, m1G, 0, m1B, 0) maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); // maskHi = (0, 0, m2R, 0, m2G, 0, m2B, 0, 0, 0, m3R, 0, m3G, 0, m3B, 0) maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128()); // Upscale from 0..31 to 0..32 // (allows to replace division by left-shift further down) // Left-shift each component by 4 and add the result back to that component, // mapping numbers in the range 0..15 to 0..15, and 16..31 to 17..32 maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4)); maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4)); // Interleave R,G,B into the lower byte of the word // dstLo = (0, 0, d0R, 0, d0G, 0, d0B, 0, 0, 0, d1R, 0, d1G, 0, d1B, 0) __m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128()); // dstLo = (0, 0, d2R, 0, d2G, 0, d2B, 0, 0, 0, d3R, 0, d3G, 0, d3B, 0) __m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128()); // mask = (src - dst) * mask maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(src, dstLo)); maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(src, dstHi)); // mask = (src - dst) * mask >> 5 maskLo = _mm_srai_epi16(maskLo, 5); maskHi = _mm_srai_epi16(maskHi, 5); // Add two pixels into result. // result = dst + ((src - dst) * mask >> 5) __m128i resultLo = _mm_add_epi16(dstLo, maskLo); __m128i resultHi = _mm_add_epi16(dstHi, maskHi); // Pack into 4 32bit dst pixels and force opaque. // resultLo and resultHi contain eight 16-bit components (two pixels) each. // Merge into one SSE regsiter with sixteen 8-bit values (four pixels), // clamping to 255 if necessary. Set alpha components to 0xFF. return _mm_or_si128(_mm_packus_epi16(resultLo, resultHi), _mm_set1_epi32(SK_A32_MASK << SK_A32_SHIFT)); } void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor) { if (width <= 0) { return; } int srcA = SkColorGetA(src); int srcR = SkColorGetR(src); int srcG = SkColorGetG(src); int srcB = SkColorGetB(src); srcA = SkAlpha255To256(srcA); if (width >= 4) { SkASSERT(((size_t)dst & 0x03) == 0); while (((size_t)dst & 0x0F) != 0) { *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask); mask++; dst++; width--; } __m128i *d = reinterpret_cast<__m128i*>(dst); // Set alpha to 0xFF and replicate source four times in SSE register. __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB)); // Interleave with zeros to get two sets of four 16-bit values. src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128()); // Set srcA_sse to contain eight copies of srcA, padded with zero. // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0) __m128i srcA_sse = _mm_set1_epi16(srcA); while (width >= 4) { // Load four destination pixels into dst_sse. __m128i dst_sse = _mm_load_si128(d); // Load four 16-bit masks into lower half of mask_sse. __m128i mask_sse = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(mask)); // Check whether masks are equal to 0 and get the highest bit // of each byte of result, if masks are all zero, we will get // pack_cmp to 0xFFFF int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse, _mm_setzero_si128())); // if mask pixels are not all zero, we will blend the dst pixels if (pack_cmp != 0xFFFF) { // Unpack 4 16bit mask pixels to // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0, // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0) mask_sse = _mm_unpacklo_epi16(mask_sse, _mm_setzero_si128()); // Process 4 32bit dst pixels __m128i result = blend_lcd16_sse2(src_sse, dst_sse, mask_sse, srcA_sse); _mm_store_si128(d, result); } d++; mask += 4; width -= 4; } dst = reinterpret_cast<SkPMColor*>(d); } while (width > 0) { *dst = blend_lcd16(srcA, srcR, srcG, srcB, *dst, *mask); mask++; dst++; width--; } } void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor opaqueDst) { if (width <= 0) { return; } int srcR = SkColorGetR(src); int srcG = SkColorGetG(src); int srcB = SkColorGetB(src); if (width >= 4) { SkASSERT(((size_t)dst & 0x03) == 0); while (((size_t)dst & 0x0F) != 0) { *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst); mask++; dst++; width--; } __m128i *d = reinterpret_cast<__m128i*>(dst); // Set alpha to 0xFF and replicate source four times in SSE register. __m128i src_sse = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB)); // Set srcA_sse to contain eight copies of srcA, padded with zero. // src_sse=(0xFF, 0, sR, 0, sG, 0, sB, 0, 0xFF, 0, sR, 0, sG, 0, sB, 0) src_sse = _mm_unpacklo_epi8(src_sse, _mm_setzero_si128()); while (width >= 4) { // Load four destination pixels into dst_sse. __m128i dst_sse = _mm_load_si128(d); // Load four 16-bit masks into lower half of mask_sse. __m128i mask_sse = _mm_loadl_epi64( reinterpret_cast<const __m128i*>(mask)); // Check whether masks are equal to 0 and get the highest bit // of each byte of result, if masks are all zero, we will get // pack_cmp to 0xFFFF int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_sse, _mm_setzero_si128())); // if mask pixels are not all zero, we will blend the dst pixels if (pack_cmp != 0xFFFF) { // Unpack 4 16bit mask pixels to // mask_sse = (m0RGBLo, m0RGBHi, 0, 0, m1RGBLo, m1RGBHi, 0, 0, // m2RGBLo, m2RGBHi, 0, 0, m3RGBLo, m3RGBHi, 0, 0) mask_sse = _mm_unpacklo_epi16(mask_sse, _mm_setzero_si128()); // Process 4 32bit dst pixels __m128i result = blend_lcd16_opaque_sse2(src_sse, dst_sse, mask_sse); _mm_store_si128(d, result); } d++; mask += 4; width -= 4; } dst = reinterpret_cast<SkPMColor*>(d); } while (width > 0) { *dst = blend_lcd16_opaque(srcR, srcG, srcB, *dst, *mask, opaqueDst); mask++; dst++; width--; } } #elif defined(SK_ARM_HAS_NEON) #include <arm_neon.h> #define NEON_A (SK_A32_SHIFT / 8) #define NEON_R (SK_R32_SHIFT / 8) #define NEON_G (SK_G32_SHIFT / 8) #define NEON_B (SK_B32_SHIFT / 8) static inline uint8x8_t blend_32_neon(uint8x8_t src, uint8x8_t dst, uint16x8_t scale) { int16x8_t src_wide, dst_wide; src_wide = vreinterpretq_s16_u16(vmovl_u8(src)); dst_wide = vreinterpretq_s16_u16(vmovl_u8(dst)); src_wide = (src_wide - dst_wide) * vreinterpretq_s16_u16(scale); dst_wide += vshrq_n_s16(src_wide, 5); return vmovn_u16(vreinterpretq_u16_s16(dst_wide)); } void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t src[], SkColor color, int width, SkPMColor opaqueDst) { int colR = SkColorGetR(color); int colG = SkColorGetG(color); int colB = SkColorGetB(color); uint8x8_t vcolR = vdup_n_u8(colR); uint8x8_t vcolG = vdup_n_u8(colG); uint8x8_t vcolB = vdup_n_u8(colB); uint8x8_t vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst)); uint8x8_t vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst)); uint8x8_t vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst)); uint8x8_t vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst)); while (width >= 8) { uint8x8x4_t vdst; uint16x8_t vmask; uint16x8_t vmaskR, vmaskG, vmaskB; uint8x8_t vsel_trans, vsel_opq; vdst = vld4_u8((uint8_t*)dst); vmask = vld1q_u16(src); // Prepare compare masks vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0))); vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF))); // Get all the color masks on 5 bits vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), SK_B16_BITS + SK_R16_BITS + 1); vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); // Upscale to 0..32 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF)); vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]); vdst.val[NEON_R] = blend_32_neon(vcolR, vdst.val[NEON_R], vmaskR); vdst.val[NEON_G] = blend_32_neon(vcolG, vdst.val[NEON_G], vmaskG); vdst.val[NEON_B] = blend_32_neon(vcolB, vdst.val[NEON_B], vmaskB); vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]); vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]); vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]); vst4_u8((uint8_t*)dst, vdst); dst += 8; src += 8; width -= 8; } // Leftovers for (int i = 0; i < width; i++) { dst[i] = blend_lcd16_opaque(colR, colG, colB, dst[i], src[i], opaqueDst); } } void blit_row_lcd16(SkPMColor dst[], const uint16_t src[], SkColor color, int width, SkPMColor) { int colA = SkColorGetA(color); int colR = SkColorGetR(color); int colG = SkColorGetG(color); int colB = SkColorGetB(color); colA = SkAlpha255To256(colA); uint16x8_t vcolA = vdupq_n_u16(colA); uint8x8_t vcolR = vdup_n_u8(colR); uint8x8_t vcolG = vdup_n_u8(colG); uint8x8_t vcolB = vdup_n_u8(colB); while (width >= 8) { uint8x8x4_t vdst; uint16x8_t vmask; uint16x8_t vmaskR, vmaskG, vmaskB; vdst = vld4_u8((uint8_t*)dst); vmask = vld1q_u16(src); // Get all the color masks on 5 bits vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), SK_B16_BITS + SK_R16_BITS + 1); vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); // Upscale to 0..32 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); vmaskR = vshrq_n_u16(vmaskR * vcolA, 8); vmaskG = vshrq_n_u16(vmaskG * vcolA, 8); vmaskB = vshrq_n_u16(vmaskB * vcolA, 8); vdst.val[NEON_A] = vdup_n_u8(0xFF); vdst.val[NEON_R] = blend_32_neon(vcolR, vdst.val[NEON_R], vmaskR); vdst.val[NEON_G] = blend_32_neon(vcolG, vdst.val[NEON_G], vmaskG); vdst.val[NEON_B] = blend_32_neon(vcolB, vdst.val[NEON_B], vmaskB); vst4_u8((uint8_t*)dst, vdst); dst += 8; src += 8; width -= 8; } for (int i = 0; i < width; i++) { dst[i] = blend_lcd16(colA, colR, colG, colB, dst[i], src[i]); } } #else static inline void blit_row_lcd16(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor) { int srcA = SkColorGetA(src); int srcR = SkColorGetR(src); int srcG = SkColorGetG(src); int srcB = SkColorGetB(src); srcA = SkAlpha255To256(srcA); for (int i = 0; i < width; i++) { dst[i] = blend_lcd16(srcA, srcR, srcG, srcB, dst[i], mask[i]); } } static inline void blit_row_lcd16_opaque(SkPMColor dst[], const uint16_t mask[], SkColor src, int width, SkPMColor opaqueDst) { int srcR = SkColorGetR(src); int srcG = SkColorGetG(src); int srcB = SkColorGetB(src); for (int i = 0; i < width; i++) { dst[i] = blend_lcd16_opaque(srcR, srcG, srcB, dst[i], mask[i], opaqueDst); } } #endif static bool blit_color(const SkPixmap& device, const SkMask& mask, const SkIRect& clip, SkColor color) { int x = clip.fLeft, y = clip.fTop; if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) { SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(), (const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes, color, clip.width(), clip.height()); return true; } if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) { auto dstRow = device.writable_addr32(x,y); auto maskRow = (const uint16_t*)mask.getAddr(x,y); auto blit_row = blit_row_lcd16; SkPMColor opaqueDst = 0; // ignored unless opaque if (0xff == SkColorGetA(color)) { blit_row = blit_row_lcd16_opaque; opaqueDst = SkPreMultiplyColor(color); } for (int height = clip.height(); height --> 0; ) { blit_row(dstRow, maskRow, color, clip.width(), opaqueDst); dstRow = (SkPMColor*) (( char*) dstRow + device.rowBytes()); maskRow = (const uint16_t*)((const char*)maskRow + mask.fRowBytes); } return true; } return false; } /////////////////////////////////////////////////////////////////////////////// static void SkARGB32_Blit32(const SkPixmap& device, const SkMask& mask, const SkIRect& clip, SkPMColor srcColor) { U8CPU alpha = SkGetPackedA32(srcColor); unsigned flags = SkBlitRow::kSrcPixelAlpha_Flag32; if (alpha != 255) { flags |= SkBlitRow::kGlobalAlpha_Flag32; } SkBlitRow::Proc32 proc = SkBlitRow::Factory32(flags); int x = clip.fLeft; int y = clip.fTop; int width = clip.width(); int height = clip.height(); SkPMColor* dstRow = device.writable_addr32(x, y); const SkPMColor* srcRow = reinterpret_cast<const SkPMColor*>(mask.getAddr8(x, y)); do { proc(dstRow, srcRow, width, alpha); dstRow = (SkPMColor*)((char*)dstRow + device.rowBytes()); srcRow = (const SkPMColor*)((const char*)srcRow + mask.fRowBytes); } while (--height != 0); } ////////////////////////////////////////////////////////////////////////////////////// SkARGB32_Blitter::SkARGB32_Blitter(const SkPixmap& device, const SkPaint& paint) : INHERITED(device) { SkColor color = paint.getColor(); fColor = color; fSrcA = SkColorGetA(color); unsigned scale = SkAlpha255To256(fSrcA); fSrcR = SkAlphaMul(SkColorGetR(color), scale); fSrcG = SkAlphaMul(SkColorGetG(color), scale); fSrcB = SkAlphaMul(SkColorGetB(color), scale); fPMColor = SkPackARGB32(fSrcA, fSrcR, fSrcG, fSrcB); } const SkPixmap* SkARGB32_Blitter::justAnOpaqueColor(uint32_t* value) { if (255 == fSrcA) { *value = fPMColor; return &fDevice; } return nullptr; } #if defined _WIN32 // disable warning : local variable used without having been initialized #pragma warning ( push ) #pragma warning ( disable : 4701 ) #endif void SkARGB32_Blitter::blitH(int x, int y, int width) { SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width()); uint32_t* device = fDevice.writable_addr32(x, y); SkBlitRow::Color32(device, device, width, fPMColor); } void SkARGB32_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[], const int16_t runs[]) { if (fSrcA == 0) { return; } uint32_t color = fPMColor; uint32_t* device = fDevice.writable_addr32(x, y); unsigned opaqueMask = fSrcA; // if fSrcA is 0xFF, then we will catch the fast opaque case for (;;) { int count = runs[0]; SkASSERT(count >= 0); if (count <= 0) { return; } unsigned aa = antialias[0]; if (aa) { if ((opaqueMask & aa) == 255) { sk_memset32(device, color, count); } else { uint32_t sc = SkAlphaMulQ(color, SkAlpha255To256(aa)); SkBlitRow::Color32(device, device, count, sc); } } runs += count; antialias += count; device += count; } } void SkARGB32_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { uint32_t* device = fDevice.writable_addr32(x, y); SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);) device[0] = SkBlendARGB32(fPMColor, device[0], a0); device[1] = SkBlendARGB32(fPMColor, device[1], a1); } void SkARGB32_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { uint32_t* device = fDevice.writable_addr32(x, y); SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);) device[0] = SkBlendARGB32(fPMColor, device[0], a0); device = (uint32_t*)((char*)device + fDevice.rowBytes()); device[0] = SkBlendARGB32(fPMColor, device[0], a1); } ////////////////////////////////////////////////////////////////////////////////////// #define solid_8_pixels(mask, dst, color) \ do { \ if (mask & 0x80) dst[0] = color; \ if (mask & 0x40) dst[1] = color; \ if (mask & 0x20) dst[2] = color; \ if (mask & 0x10) dst[3] = color; \ if (mask & 0x08) dst[4] = color; \ if (mask & 0x04) dst[5] = color; \ if (mask & 0x02) dst[6] = color; \ if (mask & 0x01) dst[7] = color; \ } while (0) #define SK_BLITBWMASK_NAME SkARGB32_BlitBW #define SK_BLITBWMASK_ARGS , SkPMColor color #define SK_BLITBWMASK_BLIT8(mask, dst) solid_8_pixels(mask, dst, color) #define SK_BLITBWMASK_GETADDR writable_addr32 #define SK_BLITBWMASK_DEVTYPE uint32_t #include "SkBlitBWMaskTemplate.h" #define blend_8_pixels(mask, dst, sc, dst_scale) \ do { \ if (mask & 0x80) { dst[0] = sc + SkAlphaMulQ(dst[0], dst_scale); } \ if (mask & 0x40) { dst[1] = sc + SkAlphaMulQ(dst[1], dst_scale); } \ if (mask & 0x20) { dst[2] = sc + SkAlphaMulQ(dst[2], dst_scale); } \ if (mask & 0x10) { dst[3] = sc + SkAlphaMulQ(dst[3], dst_scale); } \ if (mask & 0x08) { dst[4] = sc + SkAlphaMulQ(dst[4], dst_scale); } \ if (mask & 0x04) { dst[5] = sc + SkAlphaMulQ(dst[5], dst_scale); } \ if (mask & 0x02) { dst[6] = sc + SkAlphaMulQ(dst[6], dst_scale); } \ if (mask & 0x01) { dst[7] = sc + SkAlphaMulQ(dst[7], dst_scale); } \ } while (0) #define SK_BLITBWMASK_NAME SkARGB32_BlendBW #define SK_BLITBWMASK_ARGS , uint32_t sc, unsigned dst_scale #define SK_BLITBWMASK_BLIT8(mask, dst) blend_8_pixels(mask, dst, sc, dst_scale) #define SK_BLITBWMASK_GETADDR writable_addr32 #define SK_BLITBWMASK_DEVTYPE uint32_t #include "SkBlitBWMaskTemplate.h" void SkARGB32_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) { SkASSERT(mask.fBounds.contains(clip)); SkASSERT(fSrcA != 0xFF); if (fSrcA == 0) { return; } if (blit_color(fDevice, mask, clip, fColor)) { return; } switch (mask.fFormat) { case SkMask::kBW_Format: SkARGB32_BlendBW(fDevice, mask, clip, fPMColor, SkAlpha255To256(255 - fSrcA)); break; case SkMask::kARGB32_Format: SkARGB32_Blit32(fDevice, mask, clip, fPMColor); break; default: SK_ABORT("Mask format not handled."); } } void SkARGB32_Opaque_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) { SkASSERT(mask.fBounds.contains(clip)); if (blit_color(fDevice, mask, clip, fColor)) { return; } switch (mask.fFormat) { case SkMask::kBW_Format: SkARGB32_BlitBW(fDevice, mask, clip, fPMColor); break; case SkMask::kARGB32_Format: SkARGB32_Blit32(fDevice, mask, clip, fPMColor); break; default: SK_ABORT("Mask format not handled."); } } void SkARGB32_Opaque_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { uint32_t* device = fDevice.writable_addr32(x, y); SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);) device[0] = SkFastFourByteInterp(fPMColor, device[0], a0); device[1] = SkFastFourByteInterp(fPMColor, device[1], a1); } void SkARGB32_Opaque_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { uint32_t* device = fDevice.writable_addr32(x, y); SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);) device[0] = SkFastFourByteInterp(fPMColor, device[0], a0); device = (uint32_t*)((char*)device + fDevice.rowBytes()); device[0] = SkFastFourByteInterp(fPMColor, device[0], a1); } /////////////////////////////////////////////////////////////////////////////// void SkARGB32_Blitter::blitV(int x, int y, int height, SkAlpha alpha) { if (alpha == 0 || fSrcA == 0) { return; } uint32_t* device = fDevice.writable_addr32(x, y); uint32_t color = fPMColor; if (alpha != 255) { color = SkAlphaMulQ(color, SkAlpha255To256(alpha)); } unsigned dst_scale = SkAlpha255To256(255 - SkGetPackedA32(color)); size_t rowBytes = fDevice.rowBytes(); while (--height >= 0) { device[0] = color + SkAlphaMulQ(device[0], dst_scale); device = (uint32_t*)((char*)device + rowBytes); } } void SkARGB32_Blitter::blitRect(int x, int y, int width, int height) { SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width() && y + height <= fDevice.height()); if (fSrcA == 0) { return; } uint32_t* device = fDevice.writable_addr32(x, y); uint32_t color = fPMColor; size_t rowBytes = fDevice.rowBytes(); while (--height >= 0) { SkBlitRow::Color32(device, device, width, color); device = (uint32_t*)((char*)device + rowBytes); } } #if defined _WIN32 #pragma warning ( pop ) #endif /////////////////////////////////////////////////////////////////////// void SkARGB32_Black_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[], const int16_t runs[]) { uint32_t* device = fDevice.writable_addr32(x, y); SkPMColor black = (SkPMColor)(SK_A32_MASK << SK_A32_SHIFT); for (;;) { int count = runs[0]; SkASSERT(count >= 0); if (count <= 0) { return; } unsigned aa = antialias[0]; if (aa) { if (aa == 255) { sk_memset32(device, black, count); } else { SkPMColor src = aa << SK_A32_SHIFT; unsigned dst_scale = 256 - aa; int n = count; do { --n; device[n] = src + SkAlphaMulQ(device[n], dst_scale); } while (n > 0); } } runs += count; antialias += count; device += count; } } void SkARGB32_Black_Blitter::blitAntiH2(int x, int y, U8CPU a0, U8CPU a1) { uint32_t* device = fDevice.writable_addr32(x, y); SkDEBUGCODE((void)fDevice.writable_addr32(x + 1, y);) device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0); device[1] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[1], 256 - a1); } void SkARGB32_Black_Blitter::blitAntiV2(int x, int y, U8CPU a0, U8CPU a1) { uint32_t* device = fDevice.writable_addr32(x, y); SkDEBUGCODE((void)fDevice.writable_addr32(x, y + 1);) device[0] = (a0 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a0); device = (uint32_t*)((char*)device + fDevice.rowBytes()); device[0] = (a1 << SK_A32_SHIFT) + SkAlphaMulQ(device[0], 256 - a1); } /////////////////////////////////////////////////////////////////////////////// // Special version of SkBlitRow::Factory32 that knows we're in kSrc_Mode, // instead of kSrcOver_Mode static void blend_srcmode(SkPMColor* SK_RESTRICT device, const SkPMColor* SK_RESTRICT span, int count, U8CPU aa) { int aa256 = SkAlpha255To256(aa); for (int i = 0; i < count; ++i) { device[i] = SkFourByteInterp256(span[i], device[i], aa256); } } SkARGB32_Shader_Blitter::SkARGB32_Shader_Blitter(const SkPixmap& device, const SkPaint& paint, SkShaderBase::Context* shaderContext) : INHERITED(device, paint, shaderContext) { fBuffer = (SkPMColor*)sk_malloc_throw(device.width() * (sizeof(SkPMColor))); fXfermode = SkXfermode::Peek(paint.getBlendMode()); int flags = 0; if (!(shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag)) { flags |= SkBlitRow::kSrcPixelAlpha_Flag32; } // we call this on the output from the shader fProc32 = SkBlitRow::Factory32(flags); // we call this on the output from the shader + alpha from the aa buffer fProc32Blend = SkBlitRow::Factory32(flags | SkBlitRow::kGlobalAlpha_Flag32); fShadeDirectlyIntoDevice = false; if (fXfermode == nullptr) { if (shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag) { fShadeDirectlyIntoDevice = true; } } else { if (SkBlendMode::kSrc == paint.getBlendMode()) { fShadeDirectlyIntoDevice = true; fProc32Blend = blend_srcmode; } } fConstInY = SkToBool(shaderContext->getFlags() & SkShaderBase::kConstInY32_Flag); } SkARGB32_Shader_Blitter::~SkARGB32_Shader_Blitter() { sk_free(fBuffer); } void SkARGB32_Shader_Blitter::blitH(int x, int y, int width) { SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width()); uint32_t* device = fDevice.writable_addr32(x, y); if (fShadeDirectlyIntoDevice) { fShaderContext->shadeSpan(x, y, device, width); } else { SkPMColor* span = fBuffer; fShaderContext->shadeSpan(x, y, span, width); if (fXfermode) { fXfermode->xfer32(device, span, width, nullptr); } else { fProc32(device, span, width, 255); } } } void SkARGB32_Shader_Blitter::blitRect(int x, int y, int width, int height) { SkASSERT(x >= 0 && y >= 0 && x + width <= fDevice.width() && y + height <= fDevice.height()); uint32_t* device = fDevice.writable_addr32(x, y); size_t deviceRB = fDevice.rowBytes(); auto* shaderContext = fShaderContext; SkPMColor* span = fBuffer; if (fConstInY) { if (fShadeDirectlyIntoDevice) { // shade the first row directly into the device shaderContext->shadeSpan(x, y, device, width); span = device; while (--height > 0) { device = (uint32_t*)((char*)device + deviceRB); memcpy(device, span, width << 2); } } else { shaderContext->shadeSpan(x, y, span, width); SkXfermode* xfer = fXfermode; if (xfer) { do { xfer->xfer32(device, span, width, nullptr); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { SkBlitRow::Proc32 proc = fProc32; do { proc(device, span, width, 255); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } } return; } if (fShadeDirectlyIntoDevice) { do { shaderContext->shadeSpan(x, y, device, width); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { SkXfermode* xfer = fXfermode; if (xfer) { do { shaderContext->shadeSpan(x, y, span, width); xfer->xfer32(device, span, width, nullptr); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { SkBlitRow::Proc32 proc = fProc32; do { shaderContext->shadeSpan(x, y, span, width); proc(device, span, width, 255); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } } } void SkARGB32_Shader_Blitter::blitAntiH(int x, int y, const SkAlpha antialias[], const int16_t runs[]) { SkPMColor* span = fBuffer; uint32_t* device = fDevice.writable_addr32(x, y); auto* shaderContext = fShaderContext; if (fXfermode && !fShadeDirectlyIntoDevice) { for (;;) { SkXfermode* xfer = fXfermode; int count = *runs; if (count <= 0) break; int aa = *antialias; if (aa) { shaderContext->shadeSpan(x, y, span, count); if (aa == 255) { xfer->xfer32(device, span, count, nullptr); } else { // count is almost always 1 for (int i = count - 1; i >= 0; --i) { xfer->xfer32(&device[i], &span[i], 1, antialias); } } } device += count; runs += count; antialias += count; x += count; } } else if (fShadeDirectlyIntoDevice || (shaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag)) { for (;;) { int count = *runs; if (count <= 0) { break; } int aa = *antialias; if (aa) { if (aa == 255) { // cool, have the shader draw right into the device shaderContext->shadeSpan(x, y, device, count); } else { shaderContext->shadeSpan(x, y, span, count); fProc32Blend(device, span, count, aa); } } device += count; runs += count; antialias += count; x += count; } } else { for (;;) { int count = *runs; if (count <= 0) { break; } int aa = *antialias; if (aa) { shaderContext->shadeSpan(x, y, span, count); if (aa == 255) { fProc32(device, span, count, 255); } else { fProc32Blend(device, span, count, aa); } } device += count; runs += count; antialias += count; x += count; } } } static void blend_row_A8(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) { auto mask = (const uint8_t*)vmask; #ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER for (int i = 0; i < n; ++i) { if (mask[i]) { dst[i] = SkBlendARGB32(src[i], dst[i], mask[i]); } } #else Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) { const auto s_aa = s.approxMulDiv255(aa); return s_aa + d.approxMulDiv255(s_aa.alphas().inv()); }); #endif } static void blend_row_A8_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) { auto mask = (const uint8_t*)vmask; #ifdef SK_SUPPORT_LEGACY_A8_MASKBLITTER for (int i = 0; i < n; ++i) { if (int m = mask[i]) { m += (m >> 7); dst[i] = SkAlphaMulQ(src[i], m) + SkAlphaMulQ(dst[i], 256 - m); } } #else Sk4px::MapDstSrcAlpha(n, dst, src, mask, [](const Sk4px& d, const Sk4px& s, const Sk4px& aa) { return (s * aa + d * aa.inv()).div255(); }); #endif } static void blend_row_lcd16(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) { auto src_alpha_blend = [](int s, int d, int sa, int m) { return d + SkAlphaMul(s - SkAlphaMul(sa, d), m); }; auto upscale_31_to_255 = [](int v) { return (v << 3) | (v >> 2); }; auto mask = (const uint16_t*)vmask; for (int i = 0; i < n; ++i) { uint16_t m = mask[i]; if (0 == m) { continue; } SkPMColor s = src[i]; SkPMColor d = dst[i]; int srcA = SkGetPackedA32(s); int srcR = SkGetPackedR32(s); int srcG = SkGetPackedG32(s); int srcB = SkGetPackedB32(s); srcA += srcA >> 7; // We're ignoring the least significant bit of the green coverage channel here. int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5); int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5); int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5); // Scale up to 8-bit coverage to work with SkAlphaMul() in src_alpha_blend(). maskR = upscale_31_to_255(maskR); maskG = upscale_31_to_255(maskG); maskB = upscale_31_to_255(maskB); // This LCD blit routine only works if the destination is opaque. dst[i] = SkPackARGB32(0xFF, src_alpha_blend(srcR, SkGetPackedR32(d), srcA, maskR), src_alpha_blend(srcG, SkGetPackedG32(d), srcA, maskG), src_alpha_blend(srcB, SkGetPackedB32(d), srcA, maskB)); } } static void blend_row_LCD16_opaque(SkPMColor* dst, const void* vmask, const SkPMColor* src, int n) { auto mask = (const uint16_t*)vmask; for (int i = 0; i < n; ++i) { uint16_t m = mask[i]; if (0 == m) { continue; } SkPMColor s = src[i]; SkPMColor d = dst[i]; int srcR = SkGetPackedR32(s); int srcG = SkGetPackedG32(s); int srcB = SkGetPackedB32(s); // We're ignoring the least significant bit of the green coverage channel here. int maskR = SkGetPackedR16(m) >> (SK_R16_BITS - 5); int maskG = SkGetPackedG16(m) >> (SK_G16_BITS - 5); int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5); // Now upscale them to 0..32, so we can use blend_32. maskR = upscale_31_to_32(maskR); maskG = upscale_31_to_32(maskG); maskB = upscale_31_to_32(maskB); // This LCD blit routine only works if the destination is opaque. dst[i] = SkPackARGB32(0xFF, blend_32(srcR, SkGetPackedR32(d), maskR), blend_32(srcG, SkGetPackedG32(d), maskG), blend_32(srcB, SkGetPackedB32(d), maskB)); } } void SkARGB32_Shader_Blitter::blitMask(const SkMask& mask, const SkIRect& clip) { // we only handle kA8 with an xfermode if (fXfermode && (SkMask::kA8_Format != mask.fFormat)) { this->INHERITED::blitMask(mask, clip); return; } SkASSERT(mask.fBounds.contains(clip)); void (*blend_row)(SkPMColor*, const void* mask, const SkPMColor*, int) = nullptr; if (!fXfermode) { bool opaque = (fShaderContext->getFlags() & SkShaderBase::kOpaqueAlpha_Flag); if (mask.fFormat == SkMask::kA8_Format && opaque) { blend_row = blend_row_A8_opaque; } else if (mask.fFormat == SkMask::kA8_Format) { blend_row = blend_row_A8; } else if (mask.fFormat == SkMask::kLCD16_Format && opaque) { blend_row = blend_row_LCD16_opaque; } else if (mask.fFormat == SkMask::kLCD16_Format) { blend_row = blend_row_lcd16; } else { this->INHERITED::blitMask(mask, clip); return; } } const int x = clip.fLeft; const int width = clip.width(); int y = clip.fTop; int height = clip.height(); char* dstRow = (char*)fDevice.writable_addr32(x, y); const size_t dstRB = fDevice.rowBytes(); const uint8_t* maskRow = (const uint8_t*)mask.getAddr(x, y); const size_t maskRB = mask.fRowBytes; SkPMColor* span = fBuffer; if (fXfermode) { SkASSERT(SkMask::kA8_Format == mask.fFormat); SkXfermode* xfer = fXfermode; do { fShaderContext->shadeSpan(x, y, span, width); xfer->xfer32(reinterpret_cast<SkPMColor*>(dstRow), span, width, maskRow); dstRow += dstRB; maskRow += maskRB; y += 1; } while (--height > 0); } else { SkASSERT(blend_row); do { fShaderContext->shadeSpan(x, y, span, width); blend_row(reinterpret_cast<SkPMColor*>(dstRow), maskRow, span, width); dstRow += dstRB; maskRow += maskRB; y += 1; } while (--height > 0); } } void SkARGB32_Shader_Blitter::blitV(int x, int y, int height, SkAlpha alpha) { SkASSERT(x >= 0 && y >= 0 && y + height <= fDevice.height()); uint32_t* device = fDevice.writable_addr32(x, y); size_t deviceRB = fDevice.rowBytes(); if (fConstInY) { SkPMColor c; fShaderContext->shadeSpan(x, y, &c, 1); if (fShadeDirectlyIntoDevice) { if (255 == alpha) { do { *device = c; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { do { *device = SkFourByteInterp(c, *device, alpha); device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } } else { SkXfermode* xfer = fXfermode; if (xfer) { do { xfer->xfer32(device, &c, 1, &alpha); device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { SkBlitRow::Proc32 proc = (255 == alpha) ? fProc32 : fProc32Blend; do { proc(device, &c, 1, alpha); device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } } return; } if (fShadeDirectlyIntoDevice) { if (255 == alpha) { do { fShaderContext->shadeSpan(x, y, device, 1); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { do { SkPMColor c; fShaderContext->shadeSpan(x, y, &c, 1); *device = SkFourByteInterp(c, *device, alpha); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } } else { SkPMColor* span = fBuffer; SkXfermode* xfer = fXfermode; if (xfer) { do { fShaderContext->shadeSpan(x, y, span, 1); xfer->xfer32(device, span, 1, &alpha); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } else { SkBlitRow::Proc32 proc = (255 == alpha) ? fProc32 : fProc32Blend; do { fShaderContext->shadeSpan(x, y, span, 1); proc(device, span, 1, alpha); y += 1; device = (uint32_t*)((char*)device + deviceRB); } while (--height > 0); } } }