@/*****************************************************************************
@*
@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
@*
@* Licensed under the Apache License, Version 2.0 (the "License");
@* you may not use this file except in compliance with the License.
@* You may obtain a copy of the License at:
@*
@* http://www.apache.org/licenses/LICENSE-2.0
@*
@* Unless required by applicable law or agreed to in writing, software
@* distributed under the License is distributed on an "AS IS" BASIS,
@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@* See the License for the specific language governing permissions and
@* limitations under the License.
@*
@*****************************************************************************/
@/*******************************************************************************
@* @file
@*  ihevc_deblk_luma_horz.s
@*
@* @brief
@*  contains function definitions for inter prediction  interpolation.
@* functions are coded using neon  intrinsics and can be compiled using

@* rvct
@*
@* @author
@*  anand s
@*
@* @par list of functions:
@*
@*
@* @remarks
@*  none
@*
@*******************************************************************************/

.equ    qp_offset_u_offset,     40
.equ    qp_offset_v_offset,     44
.equ    tc_offset_div2_offset,  48
.equ    filter_p_offset,        52
.equ    filter_q_offset,        56

.text
.align 4




.extern gai4_ihevc_qp_table
.extern gai4_ihevc_tc_table
.globl ihevc_deblk_chroma_horz_a9q

gai4_ihevc_qp_table_addr:
.long gai4_ihevc_qp_table - ulbl1 - 8

gai4_ihevc_tc_table_addr:
.long gai4_ihevc_tc_table - ulbl2 - 8

.type ihevc_deblk_chroma_horz_a9q, %function

ihevc_deblk_chroma_horz_a9q:
    push        {r4-r12,lr}
    sub         r12,r0,r1
    vld1.8      {d0},[r0]
    sub         r5,r12,r1
    add         r6,r0,r1
    add         r1,r2,r3
    vmovl.u8    q0,d0
    ldr         r10,[sp,#qp_offset_u_offset]
    vld1.8      {d2},[r12]
    add         r2,r1,#1
    ldr         r4,[sp,#tc_offset_div2_offset]
    vld1.8      {d4},[r5]
    ldr         r8,[sp,#filter_p_offset]
    vld1.8      {d16},[r6]
    ldr         r9,[sp,#filter_q_offset]
    adds        r1,r10,r2,asr #1
    vmovl.u8    q1,d2
    ldr         r7,[sp,#qp_offset_v_offset]
    ldr         r3,gai4_ihevc_qp_table_addr
ulbl1:
    add         r3, r3, pc
    bmi         l1.3312
    cmp         r1,#0x39
    ldrle       r1,[r3,r1,lsl #2]
    subgt       r1,r1,#6
l1.3312:
    adds        r2,r7,r2,asr #1
    vmovl.u8    q2,d4
    bmi         l1.3332
    cmp         r2,#0x39
    ldrle       r2,[r3,r2,lsl #2]
    subgt       r2,r2,#6
l1.3332:
    add         r1,r1,r4,lsl #1
    vsub.i16    q3,q0,q1
    add         r3,r1,#2
    cmp         r3,#0x35
    movgt       r1,#0x35
    vshl.i16    q3,q3,#2
    vmovl.u8    q8,d16
    bgt         l1.3368
    adds        r3,r1,#2
    addpl       r1,r1,#2
    movmi       r1,#0
l1.3368:
    ldr         r3,gai4_ihevc_tc_table_addr
ulbl2:
    add         r3, r3, pc
    vadd.i16    q2,q3,q2
    add         r2,r2,r4,lsl #1
    vsub.i16    q3,q2,q8
    add         r4,r2,#2
    ldr         r1,[r3,r1,lsl #2]
    cmp         r4,#0x35
    movgt       r2,#0x35
    bgt         l1.3412
    adds        r4,r2,#2
    addpl       r2,r2,#2
    movmi       r2,#0
l1.3412:


    ldr         r2,[r3,r2,lsl #2]
    cmp         r8,#0
    vdup.16     q8,r2
    vdup.16     q2,r1
    rsb         r1,r1,#0
    vrshr.s16   q3,q3,#3
    vdup.16     q9,r1
    rsb         r1,r2,#0
    vzip.16     q2,q8
    vdup.16     q10,r1

    vzip.16     q9,q10

    vmin.s16    q8,q3,q2
    vmax.s16    q2,q9,q8
    vadd.i16    q1,q1,q2
    vsub.i16    q0,q0,q2
    vqmovun.s16 d2,q1
    vqmovun.s16 d0,q0
    beq         l1.3528
    vst1.8      {d2},[r12]
l1.3528:
    cmp         r9,#0
    beq         l1.3540
    vst1.8      {d0},[r0]
l1.3540:
    pop         {r4-r12,pc}