// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build mips mipsle

#include "textflag.h"

#ifdef GOARCH_mips
#define MOVWHI  MOVWL
#define MOVWLO  MOVWR
#else
#define MOVWHI  MOVWR
#define MOVWLO  MOVWL
#endif

// void runtime·memmove(void*, void*, uintptr)
TEXT runtime·memmove(SB),NOSPLIT,$-0-12
	MOVW	n+8(FP), R3
	MOVW	from+4(FP), R2
	MOVW	to+0(FP), R1

	ADDU	R3, R2, R4	// end pointer for source
	ADDU	R3, R1, R5	// end pointer for destination

	// if destination is ahead of source, start at the end of the buffer and go backward.
	SGTU	R1, R2, R6
	BNE	R6, backward

	// if less than 4 bytes, use byte by byte copying
	SGTU	$4, R3, R6
	BNE	R6, f_small_copy

	// align destination to 4 bytes
	AND	$3, R1, R6
	BEQ	R6, f_dest_aligned
	SUBU	R1, R0, R6
	AND	$3, R6
	MOVWHI	0(R2), R7
	SUBU	R6, R3
	MOVWLO	3(R2), R7
	ADDU	R6, R2
	MOVWHI	R7, 0(R1)
	ADDU	R6, R1

f_dest_aligned:
	AND	$31, R3, R7
	AND	$3, R3, R6
	SUBU	R7, R5, R7	// end pointer for 32-byte chunks
	SUBU	R6, R5, R6	// end pointer for 4-byte chunks

	// if source is not aligned, use unaligned reads
	AND	$3, R2, R8
	BNE	R8, f_large_ua

f_large:
	BEQ	R1, R7, f_words
	ADDU	$32, R1
	MOVW	0(R2), R8
	MOVW	4(R2), R9
	MOVW	8(R2), R10
	MOVW	12(R2), R11
	MOVW	16(R2), R12
	MOVW	20(R2), R13
	MOVW	24(R2), R14
	MOVW	28(R2), R15
	ADDU	$32, R2
	MOVW	R8, -32(R1)
	MOVW	R9, -28(R1)
	MOVW	R10, -24(R1)
	MOVW	R11, -20(R1)
	MOVW	R12, -16(R1)
	MOVW	R13, -12(R1)
	MOVW	R14, -8(R1)
	MOVW	R15, -4(R1)
	JMP	f_large

f_words:
	BEQ	R1, R6, f_tail
	ADDU	$4, R1
	MOVW	0(R2), R8
	ADDU	$4, R2
	MOVW	R8, -4(R1)
	JMP	f_words

f_tail:
	BEQ	R1, R5, ret
	MOVWLO	-1(R4), R8
	MOVWLO	R8, -1(R5)

ret:
	RET

f_large_ua:
	BEQ	R1, R7, f_words_ua
	ADDU	$32, R1
	MOVWHI	0(R2), R8
	MOVWHI	4(R2), R9
	MOVWHI	8(R2), R10
	MOVWHI	12(R2), R11
	MOVWHI	16(R2), R12
	MOVWHI	20(R2), R13
	MOVWHI	24(R2), R14
	MOVWHI	28(R2), R15
	MOVWLO	3(R2), R8
	MOVWLO	7(R2), R9
	MOVWLO	11(R2), R10
	MOVWLO	15(R2), R11
	MOVWLO	19(R2), R12
	MOVWLO	23(R2), R13
	MOVWLO	27(R2), R14
	MOVWLO	31(R2), R15
	ADDU	$32, R2
	MOVW	R8, -32(R1)
	MOVW	R9, -28(R1)
	MOVW	R10, -24(R1)
	MOVW	R11, -20(R1)
	MOVW	R12, -16(R1)
	MOVW	R13, -12(R1)
	MOVW	R14, -8(R1)
	MOVW	R15, -4(R1)
	JMP	f_large_ua

f_words_ua:
	BEQ	R1, R6, f_tail_ua
	MOVWHI	0(R2), R8
	ADDU	$4, R1
	MOVWLO	3(R2), R8
	ADDU	$4, R2
	MOVW	R8, -4(R1)
	JMP	f_words_ua

f_tail_ua:
	BEQ	R1, R5, ret
	MOVWHI	-4(R4), R8
	MOVWLO	-1(R4), R8
	MOVWLO	R8, -1(R5)
	JMP	ret

f_small_copy:
	BEQ	R1, R5, ret
	ADDU	$1, R1
	MOVB	0(R2), R6
	ADDU	$1, R2
	MOVB	R6, -1(R1)
	JMP	f_small_copy

backward:
	SGTU	$4, R3, R6
	BNE	R6, b_small_copy

	AND	$3, R5, R6
	BEQ	R6, b_dest_aligned
	MOVWHI	-4(R4), R7
	SUBU	R6, R3
	MOVWLO	-1(R4), R7
	SUBU	R6, R4
	MOVWLO	R7, -1(R5)
	SUBU	R6, R5

b_dest_aligned:
	AND	$31, R3, R7
	AND	$3, R3, R6
	ADDU	R7, R1, R7
	ADDU	R6, R1, R6

	AND	$3, R4, R8
	BNE	R8, b_large_ua

b_large:
	BEQ	R5, R7, b_words
	ADDU	$-32, R5
	MOVW	-4(R4), R8
	MOVW	-8(R4), R9
	MOVW	-12(R4), R10
	MOVW	-16(R4), R11
	MOVW	-20(R4), R12
	MOVW	-24(R4), R13
	MOVW	-28(R4), R14
	MOVW	-32(R4), R15
	ADDU	$-32, R4
	MOVW	R8, 28(R5)
	MOVW	R9, 24(R5)
	MOVW	R10, 20(R5)
	MOVW	R11, 16(R5)
	MOVW	R12, 12(R5)
	MOVW	R13, 8(R5)
	MOVW	R14, 4(R5)
	MOVW	R15, 0(R5)
	JMP	b_large

b_words:
	BEQ	R5, R6, b_tail
	ADDU	$-4, R5
	MOVW	-4(R4), R8
	ADDU	$-4, R4
	MOVW	R8, 0(R5)
	JMP	b_words

b_tail:
	BEQ	R5, R1, ret
	MOVWHI	0(R2), R8	// R2 and R1 have the same alignment so we don't need to load a whole word
	MOVWHI	R8, 0(R1)
	JMP	ret

b_large_ua:
	BEQ	R5, R7, b_words_ua
	ADDU	$-32, R5
	MOVWHI	-4(R4), R8
	MOVWHI	-8(R4), R9
	MOVWHI	-12(R4), R10
	MOVWHI	-16(R4), R11
	MOVWHI	-20(R4), R12
	MOVWHI	-24(R4), R13
	MOVWHI	-28(R4), R14
	MOVWHI	-32(R4), R15
	MOVWLO	-1(R4), R8
	MOVWLO	-5(R4), R9
	MOVWLO	-9(R4), R10
	MOVWLO	-13(R4), R11
	MOVWLO	-17(R4), R12
	MOVWLO	-21(R4), R13
	MOVWLO	-25(R4), R14
	MOVWLO	-29(R4), R15
	ADDU	$-32, R4
	MOVW	R8, 28(R5)
	MOVW	R9, 24(R5)
	MOVW	R10, 20(R5)
	MOVW	R11, 16(R5)
	MOVW	R12, 12(R5)
	MOVW	R13, 8(R5)
	MOVW	R14, 4(R5)
	MOVW	R15, 0(R5)
	JMP	b_large_ua

b_words_ua:
	BEQ	R5, R6, b_tail_ua
	MOVWHI	-4(R4), R8
	ADDU	$-4, R5
	MOVWLO	-1(R4), R8
	ADDU	$-4, R4
	MOVW	R8, 0(R5)
	JMP	b_words_ua

b_tail_ua:
	BEQ	R5, R1, ret
	MOVWHI	(R2), R8
	MOVWLO	3(R2), R8
	MOVWHI	R8, 0(R1)
	JMP ret

b_small_copy:
	BEQ	R5, R1, ret
	ADDU	$-1, R5
	MOVB	-1(R4), R6
	ADDU	$-1, R4
	MOVB	R6, 0(R5)
	JMP	b_small_copy