@/****************************************************************************** @ * @ * Copyright (C) 2015 The Android Open Source Project @ * @ * Licensed under the Apache License, Version 2.0 (the "License"); @ * you may not use this file except in compliance with the License. @ * You may obtain a copy of the License at: @ * @ * http://www.apache.org/licenses/LICENSE-2.0 @ * @ * Unless required by applicable law or agreed to in writing, software @ * distributed under the License is distributed on an "AS IS" BASIS, @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @ * See the License for the specific language governing permissions and @ * limitations under the License. @ * @ ***************************************************************************** @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore @*/ @****************************************************************************** @* @* @brief @* This file contains definitions of routines for spatial filter @* @* @author @* Ittiam @* @* @par List of Functions: @* - ideint_cac_8x8_a9() @* @* @remarks @* None @* @******************************************************************************* @****************************************************************************** @* @* @brief Calculates Combing Artifact @* @* @par Description @* This functions calculates combing artifact check (CAC) for given two fields @* @* @param[in] pu1_top @* UWORD8 pointer to top field @* @* @param[in] pu1_bot @* UWORD8 pointer to bottom field @* @* @param[in] top_strd @* Top field stride @* @* @param[in] bot_strd @* Bottom field stride @* @* @returns @* None @* @* @remarks @* @****************************************************************************** .global ideint_cac_8x8_a9 ideint_cac_8x8_a9: stmfd sp!, {r4-r10, lr} @ Load first row of top vld1.u8 d28, [r0], r2 @ Load first row of bottom vld1.u8 d29, [r1], r3 @ Load second row of top vld1.u8 d30, [r0], r2 @ Load second row of bottom vld1.u8 d31, [r1], r3 @ Calculate row based adj and alt values @ Get row sums vpaddl.u8 q0, q14 vpaddl.u8 q1, q15 vpaddl.u16 q0, q0 vpaddl.u16 q1, q1 @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows @ Pack q0 and q1 into a single register (sum does not exceed 16bits) vshl.u32 q8, q1, #16 vorr.u32 q8, q0, q8 @ q8 now contains 8 sums @ Load third row of top vld1.u8 d24, [r0], r2 @ Load third row of bottom vld1.u8 d25, [r1], r3 @ Load fourth row of top vld1.u8 d26, [r0], r2 @ Load fourth row of bottom vld1.u8 d27, [r1], r3 @ Get row sums vpaddl.u8 q2, q12 vpaddl.u8 q3, q13 vpaddl.u16 q2, q2 vpaddl.u16 q3, q3 @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows @ Pack q2 and q3 into a single register (sum does not exceed 16bits) vshl.u32 q9, q3, #16 vorr.u32 q9, q2, q9 @ q9 now contains 8 sums @ Compute absolute diff between top and bottom row sums vabd.u16 d16, d16, d17 vabd.u16 d17, d18, d19 @ RSUM_CSUM_THRESH vmov.u16 q9, #20 @ Eliminate values smaller than RSUM_CSUM_THRESH vcge.u16 q10, q8, q9 vand.u16 q10, q8, q10 @ q10 now contains 8 absolute diff of sums above the threshold @ Compute adj vadd.u16 d20, d20, d21 @ d20 has four adj values for two sub-blocks @ Compute alt vabd.u32 q0, q0, q1 vabd.u32 q2, q2, q3 vadd.u32 q0, q0, q2 vadd.u32 d21, d0, d1 @ d21 has two values for two sub-blocks @ Calculate column based adj and alt values vrhadd.u8 q0, q14, q15 vrhadd.u8 q1, q12, q13 vrhadd.u8 q0, q0, q1 vabd.u8 d0, d0, d1 @ RSUM_CSUM_THRESH >> 2 vmov.u8 d9, #5 @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2 vcge.u8 d1, d0, d9 vand.u8 d0, d0, d1 @ d0 now contains 8 absolute diff of sums above the threshold vpaddl.u8 d0, d0 vshl.u16 d0, d0, #2 @ Add row based adj vadd.u16 d20, d0, d20 vpaddl.u16 d20, d20 @ d20 now contains 2 adj values vrhadd.u8 d0, d28, d29 vrhadd.u8 d2, d24, d25 vrhadd.u8 d0, d0, d2 vrhadd.u8 d1, d30, d31 vrhadd.u8 d3, d26, d27 vrhadd.u8 d1, d1, d3 vabd.u8 d0, d0, d1 vpaddl.u8 d0, d0 vshl.u16 d0, d0, #2 vpaddl.u16 d0, d0 vadd.u32 d21, d0, d21 @ d21 now contains 2 alt values @ SAD_BIAS_MULT_SHIFT vshr.u32 d0, d21, #3 vadd.u32 d21, d21, d0 @ SAD_BIAS_ADDITIVE >> 1 vmov.u32 d0, #4 vadd.u32 d21, d21, d0 vclt.u32 d0, d21, d20 vpaddl.u32 d0, d0 vmov.u32 r0, d0[0] cmp r0, #0 movne r0, #1 ldmfd sp!, {r4-r10, pc}