/* * Copyright 2010 The Android Open Source Project * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ /* Changes: * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com> * Added small changes to the two functions to make them work on the * specified number of 16- or 32-bit values rather than the original * code which was specified as a count of bytes. More verbose comments * to aid future maintenance. */ .text .align 4 .syntax unified .global arm_memset32 .type arm_memset32, %function .global arm_memset16 .type arm_memset16, %function /* * Optimized memset functions for ARM. * * void arm_memset16(uint16_t* dst, uint16_t value, int count); * void arm_memset32(uint32_t* dst, uint32_t value, int count); * */ arm_memset16: .fnstart push {lr} /* if count is equal to zero then abort */ teq r2, #0 ble .Lfinish /* Multiply count by 2 - go from the number of 16-bit shorts * to the number of bytes desired. */ mov r2, r2, lsl #1 /* expand the data to 32 bits */ orr r1, r1, r1, lsl #16 /* align to 32 bits */ tst r0, #2 strhne r1, [r0], #2 subne r2, r2, #2 /* Now jump into the main loop below. */ b .Lwork_32 .fnend arm_memset32: .fnstart push {lr} /* if count is equal to zero then abort */ teq r2, #0 ble .Lfinish /* Multiply count by 4 - go from the number of 32-bit words to * the number of bytes desired. */ mov r2, r2, lsl #2 .Lwork_32: /* Set up registers ready for writing them out. */ mov ip, r1 mov lr, r1 /* Try to align the destination to a cache line. Assume 32 * byte (8 word) cache lines, it's the common case. */ rsb r3, r0, #0 ands r3, r3, #0x1C beq .Laligned32 cmp r3, r2 andhi r3, r2, #0x1C sub r2, r2, r3 /* (Optionally) write any unaligned leading bytes. * (0-28 bytes, length in r3) */ movs r3, r3, lsl #28 stmiacs r0!, {r1, lr} stmiacs r0!, {r1, lr} stmiami r0!, {r1, lr} movs r3, r3, lsl #2 strcs r1, [r0], #4 /* Now quickly loop through the cache-aligned data. */ .Laligned32: mov r3, r1 1: subs r2, r2, #32 stmiahs r0!, {r1,r3,ip,lr} stmiahs r0!, {r1,r3,ip,lr} bhs 1b add r2, r2, #32 /* (Optionally) store any remaining trailing bytes. * (0-30 bytes, length in r2) */ movs r2, r2, lsl #28 stmiacs r0!, {r1,r3,ip,lr} stmiami r0!, {r1,lr} movs r2, r2, lsl #2 strcs r1, [r0], #4 strhmi lr, [r0], #2 .Lfinish: pop {pc} .fnend