//****************************************************************************** //* //* Copyright (C) 2015 The Android Open Source Project //* //* Licensed under the Apache License, Version 2.0 (the "License"); //* you may not use this file except in compliance with the License. //* You may obtain a copy of the License at: //* //* http://www.apache.org/licenses/LICENSE-2.0 //* //* Unless required by applicable law or agreed to in writing, software //* distributed under the License is distributed on an "AS IS" BASIS, //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //* See the License for the specific language governing permissions and //* limitations under the License. //* //***************************************************************************** //* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore //*/ ///** //****************************************************************************** //* @file //* ih264_intra_pred_luma_4x4_av8.s //* //* @brief //* Contains function definitions for intra 4x4 Luma prediction . //* //* @author //* Ittiam //* //* @par List of Functions: //* //* -ih264_intra_pred_luma_4x4_mode_vert_av8 //* -ih264_intra_pred_luma_4x4_mode_horz_av8 //* -ih264_intra_pred_luma_4x4_mode_dc_av8 //* -ih264_intra_pred_luma_4x4_mode_diag_dl_av8 //* -ih264_intra_pred_luma_4x4_mode_diag_dr_av8 //* -ih264_intra_pred_luma_4x4_mode_vert_r_av8 //* -ih264_intra_pred_luma_4x4_mode_horz_d_av8 //* -ih264_intra_pred_luma_4x4_mode_vert_l_av8 //* -ih264_intra_pred_luma_4x4_mode_horz_u_av8 //* //* @remarks //* None //* //******************************************************************************* //*/ ///* All the functions here are replicated from ih264_intra_pred_filters.c // ///** ///** ///** // .text .p2align 2 .include "ih264_neon_macros.s" ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_vert //* //* @brief //* Perform Intra prediction for luma_4x4 mode:vertical //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:vertical ,described in sec 8.3.1.2.1 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels(Not used in this function) //* //* @returns //* //* @remarks //* None //* //******************************************************************************* //void ih264_intra_pred_luma_4x4_mode_vert(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_vert_av8 ih264_intra_pred_luma_4x4_mode_vert_av8: push_v_regs add x0, x0, #5 ld1 {v0.s}[0], [x0] st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 pop_v_regs ret ///****************************************************************************** ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_horz //* //* @brief //* Perform Intra prediction for luma_4x4 mode:horizontal //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:horizontal ,described in sec 8.3.1.2.2 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels(Not used in this function) //* //* @returns //* //* @remarks //* None //* //******************************************************************************* //*/ //void ih264_intra_pred_luma_4x4_mode_horz(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_horz_av8 ih264_intra_pred_luma_4x4_mode_horz_av8: push_v_regs ld1 {v1.s}[0], [x0] dup v0.8b, v1.b[3] dup v2.8b, v1.b[2] st1 {v0.s}[0], [x1], x3 dup v3.8b, v1.b[1] st1 {v2.s}[0], [x1], x3 dup v4.8b, v1.b[0] st1 {v3.s}[0], [x1], x3 st1 {v4.s}[0], [x1], x3 pop_v_regs ret ///****************************************************************************** ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_dc //* //* @brief //* Perform Intra prediction for luma_4x4 mode:DC //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:DC ,described in sec 8.3.1.2.3 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_dc(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_dc_av8 ih264_intra_pred_luma_4x4_mode_dc_av8: push_v_regs stp x19, x20, [sp, #-16]! ands x5, x4, #0x01 beq top_available //LEFT NOT AVAILABLE add x10, x0, #3 mov x2, #-1 ldrb w5, [x10], #-1 sxtw x5, w5 ldrb w6, [x10], #-1 sxtw x6, w6 ldrb w7, [x10], #-1 sxtw x7, w7 add x5, x5, x6 ldrb w8, [x10], #-1 sxtw x8, w8 add x5, x5, x7 ands x11, x4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE add x5, x5, x8 beq left_available add x10, x0, #5 // BOTH LEFT AND TOP AVAILABLE ldrb w6, [x10], #1 sxtw x6, w6 ldrb w7, [x10], #1 sxtw x7, w7 add x5, x5, x6 ldrb w8, [x10], #1 sxtw x8, w8 add x5, x5, x7 ldrb w9, [x10], #1 sxtw x9, w9 add x5, x5, x8 add x5, x5, x9 add x5, x5, #4 lsr x5, x5, #3 dup v0.8b, w5 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 b end_func top_available: // ONLT TOP AVAILABLE ands x11, x4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE beq none_available add x10, x0, #5 ldrb w6, [x10], #1 sxtw x6, w6 ldrb w7, [x10], #1 sxtw x7, w7 ldrb w8, [x10], #1 sxtw x8, w8 add x5, x6, x7 ldrb w9, [x10], #1 sxtw x9, w9 add x5, x5, x8 add x5, x5, x9 add x5, x5, #2 lsr x5, x5, #2 dup v0.8b, w5 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 b end_func left_available: //ONLY LEFT AVAILABLE add x5, x5, #2 lsr x5, x5, #2 dup v0.8b, w5 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 b end_func none_available: //NONE AVAILABLE mov x5, #128 dup v0.8b, w5 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 st1 {v0.s}[0], [x1], x3 b end_func end_func: ldp x19, x20, [sp], #16 pop_v_regs ret ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_diag_dl //* //* @brief //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Left ,described in sec 8.3.1.2.4 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_diag_dl(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_diag_dl_av8 ih264_intra_pred_luma_4x4_mode_diag_dl_av8: push_v_regs stp x19, x20, [sp, #-16]! add x0, x0, #5 sub x5, x3, #2 add x6, x0, #7 ld1 {v0.8b}, [x0] ext v1.8b, v0.8b , v0.8b , #1 ext v2.8b, v0.8b , v0.8b , #2 ld1 {v2.b}[6], [x6] uaddl v20.8h, v0.8b, v1.8b uaddl v22.8h, v1.8b, v2.8b add v24.8h, v20.8h , v22.8h sqrshrun v3.8b, v24.8h, #2 st1 {v3.s}[0], [x1], x3 ext v4.8b, v3.8b , v3.8b , #1 st1 {v4.s}[0], [x1], x3 st1 {v3.h}[1], [x1], #2 st1 {v3.h}[2], [x1], x5 st1 {v4.h}[1], [x1], #2 st1 {v4.h}[2], [x1] end_func_diag_dl: ldp x19, x20, [sp], #16 pop_v_regs ret ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_diag_dr //* //* @brief //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:Diagonal_Down_Right ,described in sec 8.3.1.2.5 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_diag_dr(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_diag_dr_av8 ih264_intra_pred_luma_4x4_mode_diag_dr_av8: push_v_regs stp x19, x20, [sp, #-16]! ld1 {v0.8b}, [x0] add x0, x0, #1 ld1 {v1.8b}, [x0] ext v2.8b, v1.8b , v1.8b , #1 uaddl v20.8h, v0.8b, v1.8b uaddl v22.8h, v1.8b, v2.8b add v24.8h, v20.8h , v22.8h sqrshrun v3.8b, v24.8h, #2 ext v4.8b, v3.8b , v3.8b , #1 sub x5, x3, #2 st1 {v4.h}[1], [x1], #2 st1 {v4.h}[2], [x1], x5 st1 {v3.h}[1], [x1], #2 st1 {v3.h}[2], [x1], x5 st1 {v4.s}[0], [x1], x3 st1 {v3.s}[0], [x1], x3 end_func_diag_dr: ldp x19, x20, [sp], #16 pop_v_regs ret ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_vert_r //* //* @brief //* Perform Intra prediction for luma_4x4 mode:Vertical_Right //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:Vertical_Right ,described in sec 8.3.1.2.6 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_vert_r(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_vert_r_av8 ih264_intra_pred_luma_4x4_mode_vert_r_av8: push_v_regs stp x19, x20, [sp, #-16]! ld1 {v0.8b}, [x0] add x0, x0, #1 ld1 {v1.8b}, [x0] ext v2.8b, v1.8b , v1.8b , #1 uaddl v20.8h, v0.8b, v1.8b uaddl v22.8h, v1.8b, v2.8b add v24.8h, v20.8h , v22.8h sqrshrun v4.8b, v20.8h, #1 sqrshrun v3.8b, v24.8h, #2 sub x5, x3, #2 ext v5.8b, v3.8b , v3.8b , #3 st1 {v4.s}[1], [x1], x3 st1 {v5.s}[0], [x1], x3 sub x8, x3, #3 st1 {v3.b}[2], [x1], #1 st1 {v4.h}[2], [x1], #2 st1 {v4.b}[6], [x1], x8 st1 {v3.b}[1], [x1], #1 st1 {v5.h}[0], [x1], #2 st1 {v5.b}[2], [x1] end_func_vert_r: ldp x19, x20, [sp], #16 pop_v_regs ret ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_horz_d //* //* @brief //* Perform Intra prediction for luma_4x4 mode:Horizontal_Down //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:Horizontal_Down ,described in sec 8.3.1.2.7 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_horz_d(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_horz_d_av8 ih264_intra_pred_luma_4x4_mode_horz_d_av8: push_v_regs stp x19, x20, [sp, #-16]! ld1 {v0.8b}, [x0] add x0, x0, #1 ld1 {v1.8b}, [x0] ext v2.8b, v1.8b , v0.8b , #1 uaddl v20.8h, v0.8b, v1.8b uaddl v22.8h, v1.8b, v2.8b add v24.8h, v20.8h , v22.8h sqrshrun v4.8b, v20.8h, #1 sqrshrun v5.8b, v24.8h, #2 sub x5, x3, #2 mov v6.8b, v5.8b trn1 v10.8b, v4.8b, v5.8b trn2 v5.8b, v4.8b, v5.8b // mov v4.8b, v10.8b st1 {v5.h}[1], [x1], #2 st1 {v6.h}[2], [x1], x5 st1 {v4.h}[1], [x1], #2 st1 {v5.h}[1], [x1], x5 st1 {v5.h}[0], [x1], #2 st1 {v4.h}[1], [x1], x5 st1 {v4.h}[0], [x1], #2 st1 {v5.h}[0], [x1], x5 end_func_horz_d: ldp x19, x20, [sp], #16 pop_v_regs ret ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_vert_l //* //* @brief //* Perform Intra prediction for luma_4x4 mode:Vertical_Left //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:Vertical_Left ,described in sec 8.3.1.2.8 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_vert_l(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_vert_l_av8 ih264_intra_pred_luma_4x4_mode_vert_l_av8: push_v_regs stp x19, x20, [sp, #-16]! add x0, x0, #4 ld1 {v0.8b}, [x0] add x0, x0, #1 ld1 {v1.8b}, [x0] ext v2.8b, v1.8b , v0.8b , #1 uaddl v20.8h, v0.8b, v1.8b uaddl v22.8h, v1.8b, v2.8b add v24.8h, v20.8h , v22.8h sqrshrun v4.8b, v20.8h, #1 sqrshrun v5.8b, v24.8h, #2 ext v6.8b, v4.8b , v4.8b , #1 ext v7.8b, v5.8b , v5.8b , #1 st1 {v6.s}[0], [x1], x3 ext v8.8b, v4.8b , v4.8b , #2 ext v9.8b, v5.8b , v5.8b , #2 st1 {v7.s}[0], [x1], x3 st1 {v8.s}[0], [x1], x3 st1 {v9.s}[0], [x1], x3 end_func_vert_l: ldp x19, x20, [sp], #16 pop_v_regs ret ///** //******************************************************************************* //* //*ih264_intra_pred_luma_4x4_mode_horz_u //* //* @brief //* Perform Intra prediction for luma_4x4 mode:Horizontal_Up //* //* @par Description: //* Perform Intra prediction for luma_4x4 mode:Horizontal_Up ,described in sec 8.3.1.2.9 //* //* @param[in] pu1_src //* UWORD8 pointer to the source //* //* @param[out] pu1_dst //* UWORD8 pointer to the destination //* //* @param[in] src_strd //* integer source stride //* //* @param[in] dst_strd //* integer destination stride //* //* @param[in] ui_neighboravailability //* availability of neighbouring pixels //* //* @returns //* //* @remarks //* None //* //*******************************************************************************/ //void ih264_intra_pred_luma_4x4_mode_horz_u(UWORD8 *pu1_src, // UWORD8 *pu1_dst, // WORD32 src_strd, // WORD32 dst_strd, // WORD32 ui_neighboravailability) //**************Variables Vs Registers***************************************** // x0 => *pu1_src // x1 => *pu1_dst // x2 => src_strd // x3 => dst_strd // x4 => ui_neighboravailability .global ih264_intra_pred_luma_4x4_mode_horz_u_av8 ih264_intra_pred_luma_4x4_mode_horz_u_av8: push_v_regs stp x19, x20, [sp, #-16]! mov x10, x0 ld1 {v0.8b}, [x0] ldrb w9, [x0], #1 sxtw x9, w9 ext v1.8b, v0.8b , v0.8b , #1 ld1 {v0.b}[7], [x10] ext v2.8b, v1.8b , v1.8b , #1 uaddl v20.8h, v0.8b, v1.8b uaddl v22.8h, v1.8b, v2.8b add v24.8h, v20.8h , v22.8h sqrshrun v4.8b, v20.8h, #1 sqrshrun v5.8b, v24.8h, #2 mov v6.8b, v4.8b ext v6.8b, v5.8b , v4.8b , #1 st1 {v4.b}[2], [x1], #1 st1 {v6.b}[0], [x1], #1 trn1 v10.8b, v6.8b, v5.8b trn2 v5.8b, v6.8b, v5.8b // mov v6.8b , v10.8b sub x5, x3, #2 trn1 v10.8b, v4.8b, v6.8b trn2 v6.8b, v4.8b, v6.8b // mov v4.8b , v10.8b dup v7.8b, w9 st1 {v6.h}[0], [x1], x5 st1 {v6.h}[0], [x1], #2 st1 {v5.h}[3], [x1], x5 st1 {v5.h}[3], [x1], #2 st1 {v7.h}[3], [x1], x5 st1 {v7.s}[0], [x1], x3 end_func_horz_u: ldp x19, x20, [sp], #16 pop_v_regs ret