/* * Copyright 2014 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include <arm_neon.h> #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale) #define SCALE_FILTER_NAME MAKENAME(_filter_scale) #define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x) #define PACK_FILTER_Y_NAME MAKENAME(_pack_filter_y) #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4) #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4) #ifndef PREAMBLE #define PREAMBLE(state) #define PREAMBLE_PARAM_X #define PREAMBLE_PARAM_Y #define PREAMBLE_ARG_X #define PREAMBLE_ARG_Y #endif static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, uint32_t xy[], int count, int x, int y) { SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) == 0); PREAMBLE(s); // we store y, x, x, x, x, x const unsigned maxX = s.fPixmap.width() - 1; SkFractionalInt fx; { const SkBitmapProcStateAutoMapper mapper(s, x, y); const unsigned maxY = s.fPixmap.height() - 1; *xy++ = TILEY_PROCF(mapper.fixedY(), maxY); fx = mapper.fractionalIntX(); } if (0 == maxX) { // all of the following X values must be 0 memset(xy, 0, count * sizeof(uint16_t)); return; } const SkFractionalInt dx = s.fInvSxFractionalInt; #ifdef CHECK_FOR_DECAL // test if we don't need to apply the tile proc const SkFixed fixedFx = SkFractionalIntToFixed(fx); const SkFixed fixedDx = SkFractionalIntToFixed(dx); if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) { decal_nofilter_scale_neon(xy, fixedFx, fixedDx, count); return; } #endif if (count >= 8) { SkFractionalInt dx2 = dx+dx; SkFractionalInt dx4 = dx2+dx2; SkFractionalInt dx8 = dx4+dx4; // now build fx/fx+dx/fx+2dx/fx+3dx SkFractionalInt fx1, fx2, fx3; int32x4_t lbase, hbase; int16_t *dst16 = (int16_t *)xy; fx1 = fx+dx; fx2 = fx1+dx; fx3 = fx2+dx; lbase = vdupq_n_s32(SkFractionalIntToFixed(fx)); lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1); lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2); lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3); hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4))); // store & bump while (count >= 8) { int16x8_t fx8; fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX); vst1q_s16(dst16, fx8); // but preserving base & on to the next lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); dst16 += 8; count -= 8; fx += dx8; }; xy = (uint32_t *) dst16; } uint16_t* xx = (uint16_t*)xy; for (int i = count; i > 0; --i) { *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX); fx += dx; } } static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max, SkFixed one PREAMBLE_PARAM_Y) { unsigned i = TILEY_PROCF(f, max); i = (i << 4) | EXTRACT_LOW_BITS(f, max); return (i << 14) | (TILEY_PROCF((f + one), max)); } static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max, SkFixed one PREAMBLE_PARAM_X) { unsigned i = TILEX_PROCF(f, max); i = (i << 4) | EXTRACT_LOW_BITS(f, max); return (i << 14) | (TILEX_PROCF((f + one), max)); } static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max, SkFixed one PREAMBLE_PARAM_X) { int32x4_t ret, res, wide_one; // Prepare constants wide_one = vdupq_n_s32(one); // Step 1 res = TILEX_PROCF_NEON4(f, max); // Step 2 ret = EXTRACT_LOW_BITS_NEON4(f, max); ret = vsliq_n_s32(ret, res, 4); // Step 3 res = TILEX_PROCF_NEON4(f + wide_one, max); ret = vorrq_s32(vshlq_n_s32(ret, 14), res); return ret; } static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max, SkFixed one PREAMBLE_PARAM_X) { int32x4_t ret, res, wide_one; // Prepare constants wide_one = vdupq_n_s32(one); // Step 1 res = TILEY_PROCF_NEON4(f, max); // Step 2 ret = EXTRACT_LOW_BITS_NEON4(f, max); ret = vsliq_n_s32(ret, res, 4); // Step 3 res = TILEY_PROCF_NEON4(f + wide_one, max); ret = vorrq_s32(vshlq_n_s32(ret, 14), res); return ret; } static void SCALE_FILTER_NAME(const SkBitmapProcState& s, uint32_t xy[], int count, int x, int y) { SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | SkMatrix::kScale_Mask)) == 0); SkASSERT(s.fInvKy == 0); PREAMBLE(s); const unsigned maxX = s.fPixmap.width() - 1; const SkFixed one = s.fFilterOneX; const SkFractionalInt dx = s.fInvSxFractionalInt; SkFractionalInt fx; { const SkBitmapProcStateAutoMapper mapper(s, x, y); const SkFixed fy = mapper.fixedY(); const unsigned maxY = s.fPixmap.height() - 1; // compute our two Y values up front *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y); // now initialize fx fx = mapper.fractionalIntX(); } #ifdef CHECK_FOR_DECAL // test if we don't need to apply the tile proc const SkFixed fixedFx = SkFractionalIntToFixed(fx); const SkFixed fixedDx = SkFractionalIntToFixed(dx); if (can_truncate_to_fixed_for_decal(fixedFx, fixedDx, count, maxX)) { decal_filter_scale_neon(xy, fixedFx, fixedDx, count); return; } #endif { if (count >= 4) { int32x4_t wide_fx; wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx)); wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1); wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2); wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3); while (count >= 4) { int32x4_t res; res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X); vst1q_u32(xy, vreinterpretq_u32_s32(res)); wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx)); fx += dx+dx+dx+dx; xy += 4; count -= 4; } } while (--count >= 0) { *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBLE_ARG_X); fx += dx; } } } const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = { SCALE_NOFILTER_NAME, SCALE_FILTER_NAME, }; #undef TILEX_PROCF_NEON8 #undef TILEY_PROCF_NEON8 #undef TILEX_PROCF_NEON4 #undef TILEY_PROCF_NEON4 #undef EXTRACT_LOW_BITS_NEON4 #undef MAKENAME #undef TILEX_PROCF #undef TILEY_PROCF #ifdef CHECK_FOR_DECAL #undef CHECK_FOR_DECAL #endif #undef SCALE_NOFILTER_NAME #undef SCALE_FILTER_NAME #undef PREAMBLE #undef PREAMBLE_PARAM_X #undef PREAMBLE_PARAM_Y #undef PREAMBLE_ARG_X #undef PREAMBLE_ARG_Y #undef EXTRACT_LOW_BITS