///*****************************************************************************
//*
//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
//*
//* Licensed under the Apache License, Version 2.0 (the "License");
//* you may not use this file except in compliance with the License.
//* You may obtain a copy of the License at:
//*
//* http://www.apache.org/licenses/LICENSE-2.0
//*
//* Unless required by applicable law or agreed to in writing, software
//* distributed under the License is distributed on an "AS IS" BASIS,
//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//* See the License for the specific language governing permissions and
//* limitations under the License.
//*
//*****************************************************************************/
///*******************************************************************************
//* @file
//*  ihevc_deblk_luma_horz.s
//*
//* @brief
//*  contains function definitions for inter prediction  interpolation.
//* functions are coded using neon  intrinsics and can be compiled using

//* rvct
//*
//* @author
//*  anand s
//*
//* @par list of functions:
//*
//*
//* @remarks
//*  none
//*
//void ihevc_deblk_chroma_horz(UWORD8 *pu1_src,
//                             WORD32 src_strd,
//                             WORD32 quant_param_p,
//                             WORD32 quant_param_q,
//                             WORD32 qp_offset_u,
//                             WORD32 qp_offset_v,
//                             WORD32 tc_offset_div2,
//                             WORD32 filter_flag_p,
//                             WORD32 filter_flag_q)
//

.text
.align 4
.include "ihevc_neon_macros.s"



.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_horz_av8

.type ihevc_deblk_chroma_horz_av8, %function

ihevc_deblk_chroma_horz_av8:
    sxtw        x4,w4
    sxtw        x5,w5
    sxtw        x6,w6
    ldr         w9, [sp]
    sxtw        x9,w9
    push_v_regs
    stp         x19, x20,[sp,#-16]!
    mov         x10, x4
    mov         x8, x7
    mov         x7, x5
    mov         x4, x6

    sub         x12,x0,x1
    ld1         {v0.8b},[x0]
    sub         x5,x12,x1
    add         x6,x0,x1
    add         x1,x2,x3
    uxtl        v0.8h, v0.8b
    ld1         {v2.8b},[x12]
    add         x2,x1,#1
    ld1         {v4.8b},[x5]
    ld1         {v16.8b},[x6]
    adds        x1,x10,x2,asr #1
    uxtl        v2.8h, v2.8b
    adrp        x3, :got:gai4_ihevc_qp_table
    ldr         x3, [x3, #:got_lo12:gai4_ihevc_qp_table]
    bmi         l1.3312
    cmp         x1,#0x39
    bgt         lbl78
    ldr         w1, [x3,x1,lsl #2]
lbl78:
    sub         x20,x1,#6
    csel        x1, x20, x1,gt
l1.3312:
    adds        x2,x7,x2,asr #1
    uxtl        v4.8h, v4.8b
    bmi         l1.3332
    cmp         x2,#0x39
    bgt         lbl85
    ldr         w2, [x3,x2,lsl #2]
lbl85:
    sub         x20,x2,#6
    csel        x2, x20, x2,gt
l1.3332:
    add         x1,x1,x4,lsl #1
    sub         v6.8h,  v0.8h ,  v2.8h
    add         x3,x1,#2
    cmp         x3,#0x35
    mov         x20,#0x35
    csel        x1, x20, x1,gt
    shl         v6.8h, v6.8h,#2
    uxtl        v16.8h, v16.8b
    bgt         l1.3368
    adds        x3,x1,#2
    add         x20,x1,#2
    csel        x1, x20, x1,pl
    mov         x20,#0
    csel        x1, x20, x1,mi
l1.3368:
    adrp        x3, :got:gai4_ihevc_tc_table
    ldr         x3, [x3, #:got_lo12:gai4_ihevc_tc_table]
    add         v4.8h,  v6.8h ,  v4.8h
    add         x2,x2,x4,lsl #1
    sub         v6.8h,  v4.8h ,  v16.8h
    add         x4,x2,#2
    ldr         w1, [x3,x1,lsl #2]
    cmp         x4,#0x35
    mov         x20,#0x35
    csel        x2, x20, x2,gt
    bgt         l1.3412
    adds        x4,x2,#2
    add         x20,x2,#2
    csel        x2, x20, x2,pl
    mov         x20,#0
    csel        x2, x20, x2,mi
l1.3412:


    ldr         w2, [x3,x2,lsl #2]
    cmp         x8,#0
    dup         v31.8h,w2
    dup         v30.8h,w1
    sub         x20,x1,#0
    neg         x1, x20
    srshr       v6.8h, v6.8h,#3
    dup         v28.8h,w1
    sub         x20,x2,#0
    neg         x1, x20
    zip1        v4.8h, v30.8h, v31.8h
    dup         v29.8h,w1

    zip1        v18.8h, v28.8h, v29.8h

    smin        v16.8h,  v6.8h ,  v4.8h
    smax        v4.8h,  v18.8h ,  v16.8h
    add         v2.8h,  v2.8h ,  v4.8h
    sub         v0.8h,  v0.8h ,  v4.8h
    sqxtun      v2.8b, v2.8h
    sqxtun      v0.8b, v0.8h
    beq         l1.3528
    st1         {v2.8b},[x12]
l1.3528:
    cmp         x9,#0
    beq         l1.3540
    st1         {v0.8b},[x0]
l1.3540:
    ldp         x19, x20,[sp],#16
    pop_v_regs
    ret