///***************************************************************************** //* //* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore //* //* Licensed under the Apache License, Version 2.0 (the "License"); //* you may not use this file except in compliance with the License. //* You may obtain a copy of the License at: //* //* http://www.apache.org/licenses/LICENSE-2.0 //* //* Unless required by applicable law or agreed to in writing, software //* distributed under the License is distributed on an "AS IS" BASIS, //* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //* See the License for the specific language governing permissions and //* limitations under the License. //* //*****************************************************************************/ ///** // ******************************************************************************* // * ,:file // * ihevc_mem_fns_neon.s // * // * ,:brief // * Contains function definitions for memory manipulation // * // * ,:author // * Naveen SR // * // * ,:par List of Functions: // * - ihevc_memcpy() // * - ihevc_memset_mul_8() // * - ihevc_memset_16bit_mul_8() // * // * ,:remarks // * None // * // ******************************************************************************* //*/ ///** //******************************************************************************* //* //* ,:brief //* memcpy of a 1d array //* //* ,:par Description: //* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes //* //* ,:param[in] pu1_dst //* UWORD8 pointer to the destination //* //* ,:param[in] pu1_src //* UWORD8 pointer to the source //* //* ,:param[in] num_bytes //* number of bytes to copy //* ,:returns //* //* ,:remarks //* None //* //******************************************************************************* //*/ //void ihevc_memcpy_mul_8(UWORD8 *pu1_dst, // UWORD8 *pu1_src, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => *pu1_src // x2 => num_bytes .text .p2align 2 .global ihevc_memcpy_mul_8_av8 .type ihevc_memcpy_mul_8_av8, %function ihevc_memcpy_mul_8_av8: LOOP_NEON_MEMCPY_MUL_8: // Memcpy 8 bytes LD1 {v0.8b},[x1],#8 ST1 {v0.8b},[x0],#8 SUBS x2,x2,#8 BNE LOOP_NEON_MEMCPY_MUL_8 ret //******************************************************************************* //*/ //void ihevc_memcpy(UWORD8 *pu1_dst, // UWORD8 *pu1_src, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => *pu1_src // x2 => num_bytes .global ihevc_memcpy_av8 .type ihevc_memcpy_av8, %function ihevc_memcpy_av8: SUBS x2,x2,#8 BLT ARM_MEMCPY LOOP_NEON_MEMCPY: // Memcpy 8 bytes LD1 {v0.8b},[x1],#8 ST1 {v0.8b},[x0],#8 SUBS x2,x2,#8 BGE LOOP_NEON_MEMCPY CMN x2,#8 BEQ MEMCPY_RETURN ARM_MEMCPY: ADD x2,x2,#8 LOOP_ARM_MEMCPY: LDRB w3,[x1],#1 STRB w3,[x0],#1 SUBS x2,x2,#1 BNE LOOP_ARM_MEMCPY MEMCPY_RETURN: ret //void ihevc_memset_mul_8(UWORD8 *pu1_dst, // UWORD8 value, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => value // x2 => num_bytes .text .p2align 2 .global ihevc_memset_mul_8_av8 .type ihevc_memset_mul_8_av8, %function ihevc_memset_mul_8_av8: // Assumptions: numbytes is either 8, 16 or 32 dup v0.8b,w1 LOOP_MEMSET_MUL_8: // Memset 8 bytes ST1 {v0.8b},[x0],#8 SUBS x2,x2,#8 BNE LOOP_MEMSET_MUL_8 ret //void ihevc_memset(UWORD8 *pu1_dst, // UWORD8 value, // UWORD8 num_bytes) //**************Variables Vs Registers************************* // x0 => *pu1_dst // x1 => value // x2 => num_bytes .global ihevc_memset_av8 .type ihevc_memset_av8, %function ihevc_memset_av8: SUBS x2,x2,#8 BLT ARM_MEMSET dup v0.8b,w1 LOOP_NEON_MEMSET: // Memcpy 8 bytes ST1 {v0.8b},[x0],#8 SUBS x2,x2,#8 BGE LOOP_NEON_MEMSET CMN x2,#8 BEQ MEMSET_RETURN ARM_MEMSET: ADD x2,x2,#8 LOOP_ARM_MEMSET: STRB w1,[x0],#1 SUBS x2,x2,#1 BNE LOOP_ARM_MEMSET MEMSET_RETURN: ret //void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst, // UWORD16 value, // UWORD8 num_words) //**************Variables Vs Registers************************* // x0 => *pu2_dst // x1 => value // x2 => num_words .text .p2align 2 .global ihevc_memset_16bit_mul_8_av8 .type ihevc_memset_16bit_mul_8_av8, %function ihevc_memset_16bit_mul_8_av8: // Assumptions: num_words is either 8, 16 or 32 // Memset 8 words dup v0.8h,w1 LOOP_MEMSET_16BIT_MUL_8: ST1 {v0.8h},[x0],#16 SUBS x2,x2,#8 BNE LOOP_MEMSET_16BIT_MUL_8 ret //void ihevc_memset_16bit(UWORD16 *pu2_dst, // UWORD16 value, // UWORD8 num_words) //**************Variables Vs Registers************************* // x0 => *pu2_dst // x1 => value // x2 => num_words .global ihevc_memset_16bit_av8 .type ihevc_memset_16bit_av8, %function ihevc_memset_16bit_av8: SUBS x2,x2,#8 BLT ARM_MEMSET_16BIT dup v0.8h,w1 LOOP_NEON_MEMSET_16BIT: // Memset 8 words ST1 {v0.8h},[x0],#16 SUBS x2,x2,#8 BGE LOOP_NEON_MEMSET_16BIT CMN x2,#8 BEQ MEMSET_16BIT_RETURN ARM_MEMSET_16BIT: ADD x2,x2,#8 LOOP_ARM_MEMSET_16BIT: STRH w1,[x0],#2 SUBS x2,x2,#1 BNE LOOP_ARM_MEMSET_16BIT MEMSET_16BIT_RETURN: ret .section .note.GNU-stack,"",%progbits