@/****************************************************************************** @ * @ * Copyright (C) 2015 The Android Open Source Project @ * @ * Licensed under the Apache License, Version 2.0 (the "License"); @ * you may not use this file except in compliance with the License. @ * You may obtain a copy of the License at: @ * @ * http://www.apache.org/licenses/LICENSE-2.0 @ * @ * Unless required by applicable law or agreed to in writing, software @ * distributed under the License is distributed on an "AS IS" BASIS, @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @ * See the License for the specific language governing permissions and @ * limitations under the License. @ * @ ***************************************************************************** @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore @*/ @** @****************************************************************************** @* @file @* ih264_intra_pred_luma_16x16_a9q.s @* @* @brief @* Contains function definitions for intra 16x16 Luma prediction . @* @* @author @* Ittiam @* @* @par List of Functions: @* @* - ih264_intra_pred_luma_16x16_mode_vert_a9q() @* - ih264_intra_pred_luma_16x16_mode_horz_a9q() @* - ih264_intra_pred_luma_16x16_mode_dc_a9q() @* - ih264_intra_pred_luma_16x16_mode_plane_a9q() @* @* @remarks @* None @* @******************************************************************************* @* @* All the functions here are replicated from ih264_intra_pred_filters.c @ @** @** @** @ .text .p2align 2 .extern ih264_gai1_intrapred_luma_plane_coeffs .hidden ih264_gai1_intrapred_luma_plane_coeffs scratch_intrapred_addr1: .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8 @** @******************************************************************************* @* @*ih264_intra_pred_luma_16x16_mode_vert @* @* @brief @* Perform Intra prediction for luma_16x16 mode:vertical @* @* @par Description: @* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 @* @* @param[in] pu1_src @* UWORD8 pointer to the source @* @* @param[out] pu1_dst @* UWORD8 pointer to the destination @* @* @param[in] src_strd @* integer source stride @* @* @param[in] dst_strd @* integer destination stride @* @* @param[in] ui_neighboravailability @* availability of neighbouring pixels(Not used in this function) @* @* @returns @* @* @remarks @* None @* @******************************************************************************* @void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, @ UWORD8 *pu1_dst, @ WORD32 src_strd, @ WORD32 dst_strd, @ WORD32 ui_neighboravailability) @**************Variables Vs Registers***************************************** @ r0 => *pu1_src @ r1 => *pu1_dst @ r2 => src_strd @ r3 => dst_strd @ r4 => ui_neighboravailability .global ih264_intra_pred_luma_16x16_mode_vert_a9q ih264_intra_pred_luma_16x16_mode_vert_a9q: stmfd sp!, {r4-r12, r14} @store register values to stack add r0, r0, #17 vld1.8 {q0}, [r0] vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack @****************************************************************************** @** @******************************************************************************* @* @*ih264_intra_pred_luma_16x16_mode_horz @* @* @brief @* Perform Intra prediction for luma_16x16 mode:horizontal @* @* @par Description: @* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 @* @* @param[in] pu1_src @* UWORD8 pointer to the source @* @* @param[out] pu1_dst @* UWORD8 pointer to the destination @* @* @param[in] src_strd @* integer source stride @* @* @param[in] dst_strd @* integer destination stride @* @* @param[in] ui_neighboravailability @* availability of neighbouring pixels(Not used in this function) @* @* @returns @* @* @remarks @* None @* @******************************************************************************* @* @void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, @ UWORD8 *pu1_dst, @ WORD32 src_strd, @ WORD32 dst_strd, @ WORD32 ui_neighboravailability) @**************Variables Vs Registers***************************************** @ r0 => *pu1_src @ r1 => *pu1_dst @ r2 => src_strd @ r3 => dst_strd @ r4 => ui_neighboravailability .global ih264_intra_pred_luma_16x16_mode_horz_a9q ih264_intra_pred_luma_16x16_mode_horz_a9q: stmfd sp!, {r14} @store register values to stack vld1.u8 {q0}, [r0] mov r2, #14 vdup.u8 q1, d1[7] vdup.u8 q2, d1[6] vst1.8 {q1}, [r1], r3 loop_16x16_horz: vext.8 q0, q0, q0, #14 vst1.8 {q2}, [r1], r3 vdup.u8 q1, d1[7] subs r2, #2 vdup.u8 q2, d1[6] vst1.8 {q1}, [r1], r3 bne loop_16x16_horz vext.8 q0, q0, q0, #14 vst1.8 {q2}, [r1], r3 ldmfd sp!, {pc} @Restoring registers from stack @****************************************************************************** @** @******************************************************************************* @* @*ih264_intra_pred_luma_16x16_mode_dc @* @* @brief @* Perform Intra prediction for luma_16x16 mode:DC @* @* @par Description: @* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 @* @* @param[in] pu1_src @* UWORD8 pointer to the source @* @* @param[out] pu1_dst @* UWORD8 pointer to the destination @* @* @param[in] src_strd @* integer source stride @* @* @param[in] dst_strd @* integer destination stride @* @* @param[in] ui_neighboravailability @* availability of neighbouring pixels @* @* @returns @* @* @remarks @* None @* @******************************************************************************* @void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, @ UWORD8 *pu1_dst, @ WORD32 src_strd, @ WORD32 dst_strd, @ WORD32 ui_neighboravailability) @**************Variables Vs Registers***************************************** @ r0 => *pu1_src @ r1 => *pu1_dst @ r2 => src_strd @ r3 => dst_strd @ r4 => ui_neighboravailability .global ih264_intra_pred_luma_16x16_mode_dc_a9q ih264_intra_pred_luma_16x16_mode_dc_a9q: stmfd sp!, {r4, r14} @store register values to stack ldr r4, [sp, #8] @r4 => ui_neighboravailability ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE beq top_available ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE beq left_available vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE add r0, r0, #17 vpaddl.u8 q0, q0 vld1.u8 {q1}, [r0] vpaddl.u8 q1, q1 vadd.u16 q0, q0, q1 vadd.u16 d0, d0, d1 vpaddl.u16 d0, d0 vpaddl.u32 d0, d0 vqrshrun.s16 d0, q0, #5 vdup.u8 q0, d0[0] b str_pred top_available: @ONLY TOP AVAILABLE ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE beq none_available add r0, r0, #17 vld1.u8 {q0}, [r0] vpaddl.u8 q0, q0 vadd.u16 d0, d0, d1 vpaddl.u16 d0, d0 vpaddl.u32 d0, d0 vqrshrun.s16 d0, q0, #4 vdup.u8 q0, d0[0] b str_pred left_available: @ONLY LEFT AVAILABLE vld1.u8 {q0}, [r0] vpaddl.u8 q0, q0 vadd.u16 d0, d0, d1 vpaddl.u16 d0, d0 vpaddl.u32 d0, d0 vqrshrun.s16 d0, q0, #4 vdup.u8 q0, d0[0] b str_pred none_available: @NONE AVAILABLE vmov.u8 q0, #128 str_pred: vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 vst1.8 {q0}, [r1], r3 ldmfd sp!, {r4, pc} @Restoring registers from stack @****************************************************************************** @** @******************************************************************************* @* @*ih264_intra_pred_luma_16x16_mode_plane @* @* @brief @* Perform Intra prediction for luma_16x16 mode:PLANE @* @* @par Description: @* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 @* @* @param[in] pu1_src @* UWORD8 pointer to the source @* @* @param[out] pu1_dst @* UWORD8 pointer to the destination @* @* @param[in] src_strd @* integer source stride @* @* @param[in] dst_strd @* integer destination stride @* @* @param[in] ui_neighboravailability @* availability of neighbouring pixels @* @* @returns @* @* @remarks @* None @* @******************************************************************************* @void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, @ UWORD8 *pu1_dst, @ WORD32 src_strd, @ WORD32 dst_strd, @ WORD32 ui_neighboravailability) @**************Variables Vs Registers***************************************** @ r0 => *pu1_src @ r1 => *pu1_dst @ r2 => src_strd @ r3 => dst_strd @ r4 => ui_neighboravailability .global ih264_intra_pred_luma_16x16_mode_plane_a9q ih264_intra_pred_luma_16x16_mode_plane_a9q: stmfd sp!, {r4-r10, r12, lr} mov r2, r1 add r1, r0, #17 add r0, r0, #15 mov r8, #9 sub r1, r1, #1 mov r10, r1 @top_left mov r4, #-1 vld1.32 d2, [r1], r8 ldr r7, scratch_intrapred_addr1 scrlbl1: add r7, r7, pc vld1.32 d0, [r1] vrev64.8 d2, d2 vld1.32 {q3}, [r7] vsubl.u8 q0, d0, d2 vmovl.u8 q8, d6 vmul.s16 q0, q0, q8 vmovl.u8 q9, d7 add r7, r0, r4, lsl #3 sub r0, r7, r4, lsl #1 rsb lr, r4, #0x0 vpadd.s16 d0, d0, d1 ldrb r8, [r7], r4 ldrb r9, [r0], lr vpaddl.s16 d0, d0 sub r12, r8, r9 ldrb r8, [r7], r4 vpaddl.s32 d0, d0 ldrb r9, [r0], lr sub r8, r8, r9 vshl.s32 d2, d0, #2 add r12, r12, r8, lsl #1 vadd.s32 d0, d0, d2 ldrb r8, [r7], r4 ldrb r9, [r0], lr vrshr.s32 d0, d0, #6 @ i_b = D0[0] sub r8, r8, r9 ldrb r5, [r7], r4 add r8, r8, r8, lsl #1 vdup.16 q2, d0[0] add r12, r12, r8 ldrb r9, [r0], lr vmul.s16 q0, q2, q8 sub r5, r5, r9 vmul.s16 q1, q2, q9 add r12, r12, r5, lsl #2 ldrb r8, [r7], r4 ldrb r9, [r0], lr sub r8, r8, r9 ldrb r5, [r7], r4 add r8, r8, r8, lsl #2 ldrb r6, [r0], lr add r12, r12, r8 ldrb r8, [r7], r4 ldrb r9, [r0], lr sub r5, r5, r6 sub r8, r8, r9 add r5, r5, r5, lsl #1 rsb r8, r8, r8, lsl #3 add r12, r12, r5, lsl #1 ldrb r5, [r7], r4 ldrb r6, [r10] @top_left add r12, r12, r8 sub r9, r5, r6 ldrb r6, [r1, #7] add r12, r12, r9, lsl #3 @ i_c = r12 add r8, r5, r6 add r12, r12, r12, lsl #2 lsl r8, r8, #4 @ i_a = r8 add r12, r12, #0x20 lsr r12, r12, #6 vshl.s16 q14, q2, #3 vdup.16 q3, r12 vdup.16 q15, r8 vshl.s16 q13, q3, #3 vsub.s16 q15, q15, q14 vsub.s16 q15, q15, q13 vadd.s16 q14, q15, q3 mov r0, #14 vadd.s16 q13, q14, q0 vadd.s16 q14, q14, q1 vqrshrun.s16 d20, q13, #5 vqrshrun.s16 d21, q14, #5 loop_16x16_plane: vadd.s16 q13, q13, q3 vadd.s16 q14, q14, q3 vqrshrun.s16 d22, q13, #5 vst1.32 {q10}, [r2], r3 vqrshrun.s16 d23, q14, #5 vadd.s16 q13, q13, q3 subs r0, #2 vadd.s16 q14, q14, q3 vqrshrun.s16 d20, q13, #5 vst1.32 {q11}, [r2], r3 vqrshrun.s16 d21, q14, #5 bne loop_16x16_plane vadd.s16 q13, q13, q3 vadd.s16 q14, q14, q3 vqrshrun.s16 d22, q13, #5 vst1.32 {q10}, [r2], r3 vqrshrun.s16 d23, q14, #5 vst1.32 {q11}, [r2], r3 ldmfd sp!, {r4-r10, r12, pc}