%ifidn __OUTPUT_FORMAT__,obj
section	code	use32 class=code align=64
%elifidn __OUTPUT_FORMAT__,win32
%ifdef __YASM_VERSION_ID__
%if __YASM_VERSION_ID__ < 01010000h
%error yasm version 1.1.0 or later needed.
%endif
; Yasm automatically includes .00 and complains about redefining it.
; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
%else
$@feat.00 equ 1
%endif
section	.text	code align=64
%else
section	.text	code
%endif
global	_ChaCha20_ctr32
align	16
_ChaCha20_ctr32:
L$_ChaCha20_ctr32_begin:
	push	ebp
	push	ebx
	push	esi
	push	edi
	xor	eax,eax
	cmp	eax,DWORD [28+esp]
	je	NEAR L$000no_data
	call	L$pic_point
L$pic_point:
	pop	eax
	lea	ebp,[_OPENSSL_ia32cap_P]
	test	DWORD [ebp],16777216
	jz	NEAR L$001x86
	test	DWORD [4+ebp],512
	jz	NEAR L$001x86
	jmp	NEAR L$ssse3_shortcut
L$001x86:
	mov	esi,DWORD [32+esp]
	mov	edi,DWORD [36+esp]
	sub	esp,132
	mov	eax,DWORD [esi]
	mov	ebx,DWORD [4+esi]
	mov	ecx,DWORD [8+esi]
	mov	edx,DWORD [12+esi]
	mov	DWORD [80+esp],eax
	mov	DWORD [84+esp],ebx
	mov	DWORD [88+esp],ecx
	mov	DWORD [92+esp],edx
	mov	eax,DWORD [16+esi]
	mov	ebx,DWORD [20+esi]
	mov	ecx,DWORD [24+esi]
	mov	edx,DWORD [28+esi]
	mov	DWORD [96+esp],eax
	mov	DWORD [100+esp],ebx
	mov	DWORD [104+esp],ecx
	mov	DWORD [108+esp],edx
	mov	eax,DWORD [edi]
	mov	ebx,DWORD [4+edi]
	mov	ecx,DWORD [8+edi]
	mov	edx,DWORD [12+edi]
	sub	eax,1
	mov	DWORD [112+esp],eax
	mov	DWORD [116+esp],ebx
	mov	DWORD [120+esp],ecx
	mov	DWORD [124+esp],edx
	jmp	NEAR L$002entry
align	16
L$003outer_loop:
	mov	DWORD [156+esp],ebx
	mov	DWORD [152+esp],eax
	mov	DWORD [160+esp],ecx
L$002entry:
	mov	eax,1634760805
	mov	DWORD [4+esp],857760878
	mov	DWORD [8+esp],2036477234
	mov	DWORD [12+esp],1797285236
	mov	ebx,DWORD [84+esp]
	mov	ebp,DWORD [88+esp]
	mov	ecx,DWORD [104+esp]
	mov	esi,DWORD [108+esp]
	mov	edx,DWORD [116+esp]
	mov	edi,DWORD [120+esp]
	mov	DWORD [20+esp],ebx
	mov	DWORD [24+esp],ebp
	mov	DWORD [40+esp],ecx
	mov	DWORD [44+esp],esi
	mov	DWORD [52+esp],edx
	mov	DWORD [56+esp],edi
	mov	ebx,DWORD [92+esp]
	mov	edi,DWORD [124+esp]
	mov	edx,DWORD [112+esp]
	mov	ebp,DWORD [80+esp]
	mov	ecx,DWORD [96+esp]
	mov	esi,DWORD [100+esp]
	add	edx,1
	mov	DWORD [28+esp],ebx
	mov	DWORD [60+esp],edi
	mov	DWORD [112+esp],edx
	mov	ebx,10
	jmp	NEAR L$004loop
align	16
L$004loop:
	add	eax,ebp
	mov	DWORD [128+esp],ebx
	mov	ebx,ebp
	xor	edx,eax
	rol	edx,16
	add	ecx,edx
	xor	ebx,ecx
	mov	edi,DWORD [52+esp]
	rol	ebx,12
	mov	ebp,DWORD [20+esp]
	add	eax,ebx
	xor	edx,eax
	mov	DWORD [esp],eax
	rol	edx,8
	mov	eax,DWORD [4+esp]
	add	ecx,edx
	mov	DWORD [48+esp],edx
	xor	ebx,ecx
	add	eax,ebp
	rol	ebx,7
	xor	edi,eax
	mov	DWORD [32+esp],ecx
	rol	edi,16
	mov	DWORD [16+esp],ebx
	add	esi,edi
	mov	ecx,DWORD [40+esp]
	xor	ebp,esi
	mov	edx,DWORD [56+esp]
	rol	ebp,12
	mov	ebx,DWORD [24+esp]
	add	eax,ebp
	xor	edi,eax
	mov	DWORD [4+esp],eax
	rol	edi,8
	mov	eax,DWORD [8+esp]
	add	esi,edi
	mov	DWORD [52+esp],edi
	xor	ebp,esi
	add	eax,ebx
	rol	ebp,7
	xor	edx,eax
	mov	DWORD [36+esp],esi
	rol	edx,16
	mov	DWORD [20+esp],ebp
	add	ecx,edx
	mov	esi,DWORD [44+esp]
	xor	ebx,ecx
	mov	edi,DWORD [60+esp]
	rol	ebx,12
	mov	ebp,DWORD [28+esp]
	add	eax,ebx
	xor	edx,eax
	mov	DWORD [8+esp],eax
	rol	edx,8
	mov	eax,DWORD [12+esp]
	add	ecx,edx
	mov	DWORD [56+esp],edx
	xor	ebx,ecx
	add	eax,ebp
	rol	ebx,7
	xor	edi,eax
	rol	edi,16
	mov	DWORD [24+esp],ebx
	add	esi,edi
	xor	ebp,esi
	rol	ebp,12
	mov	ebx,DWORD [20+esp]
	add	eax,ebp
	xor	edi,eax
	mov	DWORD [12+esp],eax
	rol	edi,8
	mov	eax,DWORD [esp]
	add	esi,edi
	mov	edx,edi
	xor	ebp,esi
	add	eax,ebx
	rol	ebp,7
	xor	edx,eax
	rol	edx,16
	mov	DWORD [28+esp],ebp
	add	ecx,edx
	xor	ebx,ecx
	mov	edi,DWORD [48+esp]
	rol	ebx,12
	mov	ebp,DWORD [24+esp]
	add	eax,ebx
	xor	edx,eax
	mov	DWORD [esp],eax
	rol	edx,8
	mov	eax,DWORD [4+esp]
	add	ecx,edx
	mov	DWORD [60+esp],edx
	xor	ebx,ecx
	add	eax,ebp
	rol	ebx,7
	xor	edi,eax
	mov	DWORD [40+esp],ecx
	rol	edi,16
	mov	DWORD [20+esp],ebx
	add	esi,edi
	mov	ecx,DWORD [32+esp]
	xor	ebp,esi
	mov	edx,DWORD [52+esp]
	rol	ebp,12
	mov	ebx,DWORD [28+esp]
	add	eax,ebp
	xor	edi,eax
	mov	DWORD [4+esp],eax
	rol	edi,8
	mov	eax,DWORD [8+esp]
	add	esi,edi
	mov	DWORD [48+esp],edi
	xor	ebp,esi
	add	eax,ebx
	rol	ebp,7
	xor	edx,eax
	mov	DWORD [44+esp],esi
	rol	edx,16
	mov	DWORD [24+esp],ebp
	add	ecx,edx
	mov	esi,DWORD [36+esp]
	xor	ebx,ecx
	mov	edi,DWORD [56+esp]
	rol	ebx,12
	mov	ebp,DWORD [16+esp]
	add	eax,ebx
	xor	edx,eax
	mov	DWORD [8+esp],eax
	rol	edx,8
	mov	eax,DWORD [12+esp]
	add	ecx,edx
	mov	DWORD [52+esp],edx
	xor	ebx,ecx
	add	eax,ebp
	rol	ebx,7
	xor	edi,eax
	rol	edi,16
	mov	DWORD [28+esp],ebx
	add	esi,edi
	xor	ebp,esi
	mov	edx,DWORD [48+esp]
	rol	ebp,12
	mov	ebx,DWORD [128+esp]
	add	eax,ebp
	xor	edi,eax
	mov	DWORD [12+esp],eax
	rol	edi,8
	mov	eax,DWORD [esp]
	add	esi,edi
	mov	DWORD [56+esp],edi
	xor	ebp,esi
	rol	ebp,7
	dec	ebx
	jnz	NEAR L$004loop
	mov	ebx,DWORD [160+esp]
	add	eax,1634760805
	add	ebp,DWORD [80+esp]
	add	ecx,DWORD [96+esp]
	add	esi,DWORD [100+esp]
	cmp	ebx,64
	jb	NEAR L$005tail
	mov	ebx,DWORD [156+esp]
	add	edx,DWORD [112+esp]
	add	edi,DWORD [120+esp]
	xor	eax,DWORD [ebx]
	xor	ebp,DWORD [16+ebx]
	mov	DWORD [esp],eax
	mov	eax,DWORD [152+esp]
	xor	ecx,DWORD [32+ebx]
	xor	esi,DWORD [36+ebx]
	xor	edx,DWORD [48+ebx]
	xor	edi,DWORD [56+ebx]
	mov	DWORD [16+eax],ebp
	mov	DWORD [32+eax],ecx
	mov	DWORD [36+eax],esi
	mov	DWORD [48+eax],edx
	mov	DWORD [56+eax],edi
	mov	ebp,DWORD [4+esp]
	mov	ecx,DWORD [8+esp]
	mov	esi,DWORD [12+esp]
	mov	edx,DWORD [20+esp]
	mov	edi,DWORD [24+esp]
	add	ebp,857760878
	add	ecx,2036477234
	add	esi,1797285236
	add	edx,DWORD [84+esp]
	add	edi,DWORD [88+esp]
	xor	ebp,DWORD [4+ebx]
	xor	ecx,DWORD [8+ebx]
	xor	esi,DWORD [12+ebx]
	xor	edx,DWORD [20+ebx]
	xor	edi,DWORD [24+ebx]
	mov	DWORD [4+eax],ebp
	mov	DWORD [8+eax],ecx
	mov	DWORD [12+eax],esi
	mov	DWORD [20+eax],edx
	mov	DWORD [24+eax],edi
	mov	ebp,DWORD [28+esp]
	mov	ecx,DWORD [40+esp]
	mov	esi,DWORD [44+esp]
	mov	edx,DWORD [52+esp]
	mov	edi,DWORD [60+esp]
	add	ebp,DWORD [92+esp]
	add	ecx,DWORD [104+esp]
	add	esi,DWORD [108+esp]
	add	edx,DWORD [116+esp]
	add	edi,DWORD [124+esp]
	xor	ebp,DWORD [28+ebx]
	xor	ecx,DWORD [40+ebx]
	xor	esi,DWORD [44+ebx]
	xor	edx,DWORD [52+ebx]
	xor	edi,DWORD [60+ebx]
	lea	ebx,[64+ebx]
	mov	DWORD [28+eax],ebp
	mov	ebp,DWORD [esp]
	mov	DWORD [40+eax],ecx
	mov	ecx,DWORD [160+esp]
	mov	DWORD [44+eax],esi
	mov	DWORD [52+eax],edx
	mov	DWORD [60+eax],edi
	mov	DWORD [eax],ebp
	lea	eax,[64+eax]
	sub	ecx,64
	jnz	NEAR L$003outer_loop
	jmp	NEAR L$006done
L$005tail:
	add	edx,DWORD [112+esp]
	add	edi,DWORD [120+esp]
	mov	DWORD [esp],eax
	mov	DWORD [16+esp],ebp
	mov	DWORD [32+esp],ecx
	mov	DWORD [36+esp],esi
	mov	DWORD [48+esp],edx
	mov	DWORD [56+esp],edi
	mov	ebp,DWORD [4+esp]
	mov	ecx,DWORD [8+esp]
	mov	esi,DWORD [12+esp]
	mov	edx,DWORD [20+esp]
	mov	edi,DWORD [24+esp]
	add	ebp,857760878
	add	ecx,2036477234
	add	esi,1797285236
	add	edx,DWORD [84+esp]
	add	edi,DWORD [88+esp]
	mov	DWORD [4+esp],ebp
	mov	DWORD [8+esp],ecx
	mov	DWORD [12+esp],esi
	mov	DWORD [20+esp],edx
	mov	DWORD [24+esp],edi
	mov	ebp,DWORD [28+esp]
	mov	ecx,DWORD [40+esp]
	mov	esi,DWORD [44+esp]
	mov	edx,DWORD [52+esp]
	mov	edi,DWORD [60+esp]
	add	ebp,DWORD [92+esp]
	add	ecx,DWORD [104+esp]
	add	esi,DWORD [108+esp]
	add	edx,DWORD [116+esp]
	add	edi,DWORD [124+esp]
	mov	DWORD [28+esp],ebp
	mov	ebp,DWORD [156+esp]
	mov	DWORD [40+esp],ecx
	mov	ecx,DWORD [152+esp]
	mov	DWORD [44+esp],esi
	xor	esi,esi
	mov	DWORD [52+esp],edx
	mov	DWORD [60+esp],edi
	xor	eax,eax
	xor	edx,edx
L$007tail_loop:
	mov	al,BYTE [ebp*1+esi]
	mov	dl,BYTE [esi*1+esp]
	lea	esi,[1+esi]
	xor	al,dl
	mov	BYTE [esi*1+ecx-1],al
	dec	ebx
	jnz	NEAR L$007tail_loop
L$006done:
	add	esp,132
L$000no_data:
	pop	edi
	pop	esi
	pop	ebx
	pop	ebp
	ret
global	_ChaCha20_ssse3
align	16
_ChaCha20_ssse3:
L$_ChaCha20_ssse3_begin:
	push	ebp
	push	ebx
	push	esi
	push	edi
L$ssse3_shortcut:
	mov	edi,DWORD [20+esp]
	mov	esi,DWORD [24+esp]
	mov	ecx,DWORD [28+esp]
	mov	edx,DWORD [32+esp]
	mov	ebx,DWORD [36+esp]
	mov	ebp,esp
	sub	esp,524
	and	esp,-64
	mov	DWORD [512+esp],ebp
	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
	movdqu	xmm3,[ebx]
	cmp	ecx,256
	jb	NEAR L$0081x
	mov	DWORD [516+esp],edx
	mov	DWORD [520+esp],ebx
	sub	ecx,256
	lea	ebp,[384+esp]
	movdqu	xmm7,[edx]
	pshufd	xmm0,xmm3,0
	pshufd	xmm1,xmm3,85
	pshufd	xmm2,xmm3,170
	pshufd	xmm3,xmm3,255
	paddd	xmm0,[48+eax]
	pshufd	xmm4,xmm7,0
	pshufd	xmm5,xmm7,85
	psubd	xmm0,[64+eax]
	pshufd	xmm6,xmm7,170
	pshufd	xmm7,xmm7,255
	movdqa	[64+ebp],xmm0
	movdqa	[80+ebp],xmm1
	movdqa	[96+ebp],xmm2
	movdqa	[112+ebp],xmm3
	movdqu	xmm3,[16+edx]
	movdqa	[ebp-64],xmm4
	movdqa	[ebp-48],xmm5
	movdqa	[ebp-32],xmm6
	movdqa	[ebp-16],xmm7
	movdqa	xmm7,[32+eax]
	lea	ebx,[128+esp]
	pshufd	xmm0,xmm3,0
	pshufd	xmm1,xmm3,85
	pshufd	xmm2,xmm3,170
	pshufd	xmm3,xmm3,255
	pshufd	xmm4,xmm7,0
	pshufd	xmm5,xmm7,85
	pshufd	xmm6,xmm7,170
	pshufd	xmm7,xmm7,255
	movdqa	[ebp],xmm0
	movdqa	[16+ebp],xmm1
	movdqa	[32+ebp],xmm2
	movdqa	[48+ebp],xmm3
	movdqa	[ebp-128],xmm4
	movdqa	[ebp-112],xmm5
	movdqa	[ebp-96],xmm6
	movdqa	[ebp-80],xmm7
	lea	esi,[128+esi]
	lea	edi,[128+edi]
	jmp	NEAR L$009outer_loop
align	16
L$009outer_loop:
	movdqa	xmm1,[ebp-112]
	movdqa	xmm2,[ebp-96]
	movdqa	xmm3,[ebp-80]
	movdqa	xmm5,[ebp-48]
	movdqa	xmm6,[ebp-32]
	movdqa	xmm7,[ebp-16]
	movdqa	[ebx-112],xmm1
	movdqa	[ebx-96],xmm2
	movdqa	[ebx-80],xmm3
	movdqa	[ebx-48],xmm5
	movdqa	[ebx-32],xmm6
	movdqa	[ebx-16],xmm7
	movdqa	xmm2,[32+ebp]
	movdqa	xmm3,[48+ebp]
	movdqa	xmm4,[64+ebp]
	movdqa	xmm5,[80+ebp]
	movdqa	xmm6,[96+ebp]
	movdqa	xmm7,[112+ebp]
	paddd	xmm4,[64+eax]
	movdqa	[32+ebx],xmm2
	movdqa	[48+ebx],xmm3
	movdqa	[64+ebx],xmm4
	movdqa	[80+ebx],xmm5
	movdqa	[96+ebx],xmm6
	movdqa	[112+ebx],xmm7
	movdqa	[64+ebp],xmm4
	movdqa	xmm0,[ebp-128]
	movdqa	xmm6,xmm4
	movdqa	xmm3,[ebp-64]
	movdqa	xmm4,[ebp]
	movdqa	xmm5,[16+ebp]
	mov	edx,10
	nop
align	16
L$010loop:
	paddd	xmm0,xmm3
	movdqa	xmm2,xmm3
	pxor	xmm6,xmm0
	pshufb	xmm6,[eax]
	paddd	xmm4,xmm6
	pxor	xmm2,xmm4
	movdqa	xmm3,[ebx-48]
	movdqa	xmm1,xmm2
	pslld	xmm2,12
	psrld	xmm1,20
	por	xmm2,xmm1
	movdqa	xmm1,[ebx-112]
	paddd	xmm0,xmm2
	movdqa	xmm7,[80+ebx]
	pxor	xmm6,xmm0
	movdqa	[ebx-128],xmm0
	pshufb	xmm6,[16+eax]
	paddd	xmm4,xmm6
	movdqa	[64+ebx],xmm6
	pxor	xmm2,xmm4
	paddd	xmm1,xmm3
	movdqa	xmm0,xmm2
	pslld	xmm2,7
	psrld	xmm0,25
	pxor	xmm7,xmm1
	por	xmm2,xmm0
	movdqa	[ebx],xmm4
	pshufb	xmm7,[eax]
	movdqa	[ebx-64],xmm2
	paddd	xmm5,xmm7
	movdqa	xmm4,[32+ebx]
	pxor	xmm3,xmm5
	movdqa	xmm2,[ebx-32]
	movdqa	xmm0,xmm3
	pslld	xmm3,12
	psrld	xmm0,20
	por	xmm3,xmm0
	movdqa	xmm0,[ebx-96]
	paddd	xmm1,xmm3
	movdqa	xmm6,[96+ebx]
	pxor	xmm7,xmm1
	movdqa	[ebx-112],xmm1
	pshufb	xmm7,[16+eax]
	paddd	xmm5,xmm7
	movdqa	[80+ebx],xmm7
	pxor	xmm3,xmm5
	paddd	xmm0,xmm2
	movdqa	xmm1,xmm3
	pslld	xmm3,7
	psrld	xmm1,25
	pxor	xmm6,xmm0
	por	xmm3,xmm1
	movdqa	[16+ebx],xmm5
	pshufb	xmm6,[eax]
	movdqa	[ebx-48],xmm3
	paddd	xmm4,xmm6
	movdqa	xmm5,[48+ebx]
	pxor	xmm2,xmm4
	movdqa	xmm3,[ebx-16]
	movdqa	xmm1,xmm2
	pslld	xmm2,12
	psrld	xmm1,20
	por	xmm2,xmm1
	movdqa	xmm1,[ebx-80]
	paddd	xmm0,xmm2
	movdqa	xmm7,[112+ebx]
	pxor	xmm6,xmm0
	movdqa	[ebx-96],xmm0
	pshufb	xmm6,[16+eax]
	paddd	xmm4,xmm6
	movdqa	[96+ebx],xmm6
	pxor	xmm2,xmm4
	paddd	xmm1,xmm3
	movdqa	xmm0,xmm2
	pslld	xmm2,7
	psrld	xmm0,25
	pxor	xmm7,xmm1
	por	xmm2,xmm0
	pshufb	xmm7,[eax]
	movdqa	[ebx-32],xmm2
	paddd	xmm5,xmm7
	pxor	xmm3,xmm5
	movdqa	xmm2,[ebx-48]
	movdqa	xmm0,xmm3
	pslld	xmm3,12
	psrld	xmm0,20
	por	xmm3,xmm0
	movdqa	xmm0,[ebx-128]
	paddd	xmm1,xmm3
	pxor	xmm7,xmm1
	movdqa	[ebx-80],xmm1
	pshufb	xmm7,[16+eax]
	paddd	xmm5,xmm7
	movdqa	xmm6,xmm7
	pxor	xmm3,xmm5
	paddd	xmm0,xmm2
	movdqa	xmm1,xmm3
	pslld	xmm3,7
	psrld	xmm1,25
	pxor	xmm6,xmm0
	por	xmm3,xmm1
	pshufb	xmm6,[eax]
	movdqa	[ebx-16],xmm3
	paddd	xmm4,xmm6
	pxor	xmm2,xmm4
	movdqa	xmm3,[ebx-32]
	movdqa	xmm1,xmm2
	pslld	xmm2,12
	psrld	xmm1,20
	por	xmm2,xmm1
	movdqa	xmm1,[ebx-112]
	paddd	xmm0,xmm2
	movdqa	xmm7,[64+ebx]
	pxor	xmm6,xmm0
	movdqa	[ebx-128],xmm0
	pshufb	xmm6,[16+eax]
	paddd	xmm4,xmm6
	movdqa	[112+ebx],xmm6
	pxor	xmm2,xmm4
	paddd	xmm1,xmm3
	movdqa	xmm0,xmm2
	pslld	xmm2,7
	psrld	xmm0,25
	pxor	xmm7,xmm1
	por	xmm2,xmm0
	movdqa	[32+ebx],xmm4
	pshufb	xmm7,[eax]
	movdqa	[ebx-48],xmm2
	paddd	xmm5,xmm7
	movdqa	xmm4,[ebx]
	pxor	xmm3,xmm5
	movdqa	xmm2,[ebx-16]
	movdqa	xmm0,xmm3
	pslld	xmm3,12
	psrld	xmm0,20
	por	xmm3,xmm0
	movdqa	xmm0,[ebx-96]
	paddd	xmm1,xmm3
	movdqa	xmm6,[80+ebx]
	pxor	xmm7,xmm1
	movdqa	[ebx-112],xmm1
	pshufb	xmm7,[16+eax]
	paddd	xmm5,xmm7
	movdqa	[64+ebx],xmm7
	pxor	xmm3,xmm5
	paddd	xmm0,xmm2
	movdqa	xmm1,xmm3
	pslld	xmm3,7
	psrld	xmm1,25
	pxor	xmm6,xmm0
	por	xmm3,xmm1
	movdqa	[48+ebx],xmm5
	pshufb	xmm6,[eax]
	movdqa	[ebx-32],xmm3
	paddd	xmm4,xmm6
	movdqa	xmm5,[16+ebx]
	pxor	xmm2,xmm4
	movdqa	xmm3,[ebx-64]
	movdqa	xmm1,xmm2
	pslld	xmm2,12
	psrld	xmm1,20
	por	xmm2,xmm1
	movdqa	xmm1,[ebx-80]
	paddd	xmm0,xmm2
	movdqa	xmm7,[96+ebx]
	pxor	xmm6,xmm0
	movdqa	[ebx-96],xmm0
	pshufb	xmm6,[16+eax]
	paddd	xmm4,xmm6
	movdqa	[80+ebx],xmm6
	pxor	xmm2,xmm4
	paddd	xmm1,xmm3
	movdqa	xmm0,xmm2
	pslld	xmm2,7
	psrld	xmm0,25
	pxor	xmm7,xmm1
	por	xmm2,xmm0
	pshufb	xmm7,[eax]
	movdqa	[ebx-16],xmm2
	paddd	xmm5,xmm7
	pxor	xmm3,xmm5
	movdqa	xmm0,xmm3
	pslld	xmm3,12
	psrld	xmm0,20
	por	xmm3,xmm0
	movdqa	xmm0,[ebx-128]
	paddd	xmm1,xmm3
	movdqa	xmm6,[64+ebx]
	pxor	xmm7,xmm1
	movdqa	[ebx-80],xmm1
	pshufb	xmm7,[16+eax]
	paddd	xmm5,xmm7
	movdqa	[96+ebx],xmm7
	pxor	xmm3,xmm5
	movdqa	xmm1,xmm3
	pslld	xmm3,7
	psrld	xmm1,25
	por	xmm3,xmm1
	dec	edx
	jnz	NEAR L$010loop
	movdqa	[ebx-64],xmm3
	movdqa	[ebx],xmm4
	movdqa	[16+ebx],xmm5
	movdqa	[64+ebx],xmm6
	movdqa	[96+ebx],xmm7
	movdqa	xmm1,[ebx-112]
	movdqa	xmm2,[ebx-96]
	movdqa	xmm3,[ebx-80]
	paddd	xmm0,[ebp-128]
	paddd	xmm1,[ebp-112]
	paddd	xmm2,[ebp-96]
	paddd	xmm3,[ebp-80]
	movdqa	xmm6,xmm0
	punpckldq	xmm0,xmm1
	movdqa	xmm7,xmm2
	punpckldq	xmm2,xmm3
	punpckhdq	xmm6,xmm1
	punpckhdq	xmm7,xmm3
	movdqa	xmm1,xmm0
	punpcklqdq	xmm0,xmm2
	movdqa	xmm3,xmm6
	punpcklqdq	xmm6,xmm7
	punpckhqdq	xmm1,xmm2
	punpckhqdq	xmm3,xmm7
	movdqu	xmm4,[esi-128]
	movdqu	xmm5,[esi-64]
	movdqu	xmm2,[esi]
	movdqu	xmm7,[64+esi]
	lea	esi,[16+esi]
	pxor	xmm4,xmm0
	movdqa	xmm0,[ebx-64]
	pxor	xmm5,xmm1
	movdqa	xmm1,[ebx-48]
	pxor	xmm6,xmm2
	movdqa	xmm2,[ebx-32]
	pxor	xmm7,xmm3
	movdqa	xmm3,[ebx-16]
	movdqu	[edi-128],xmm4
	movdqu	[edi-64],xmm5
	movdqu	[edi],xmm6
	movdqu	[64+edi],xmm7
	lea	edi,[16+edi]
	paddd	xmm0,[ebp-64]
	paddd	xmm1,[ebp-48]
	paddd	xmm2,[ebp-32]
	paddd	xmm3,[ebp-16]
	movdqa	xmm6,xmm0
	punpckldq	xmm0,xmm1
	movdqa	xmm7,xmm2
	punpckldq	xmm2,xmm3
	punpckhdq	xmm6,xmm1
	punpckhdq	xmm7,xmm3
	movdqa	xmm1,xmm0
	punpcklqdq	xmm0,xmm2
	movdqa	xmm3,xmm6
	punpcklqdq	xmm6,xmm7
	punpckhqdq	xmm1,xmm2
	punpckhqdq	xmm3,xmm7
	movdqu	xmm4,[esi-128]
	movdqu	xmm5,[esi-64]
	movdqu	xmm2,[esi]
	movdqu	xmm7,[64+esi]
	lea	esi,[16+esi]
	pxor	xmm4,xmm0
	movdqa	xmm0,[ebx]
	pxor	xmm5,xmm1
	movdqa	xmm1,[16+ebx]
	pxor	xmm6,xmm2
	movdqa	xmm2,[32+ebx]
	pxor	xmm7,xmm3
	movdqa	xmm3,[48+ebx]
	movdqu	[edi-128],xmm4
	movdqu	[edi-64],xmm5
	movdqu	[edi],xmm6
	movdqu	[64+edi],xmm7
	lea	edi,[16+edi]
	paddd	xmm0,[ebp]
	paddd	xmm1,[16+ebp]
	paddd	xmm2,[32+ebp]
	paddd	xmm3,[48+ebp]
	movdqa	xmm6,xmm0
	punpckldq	xmm0,xmm1
	movdqa	xmm7,xmm2
	punpckldq	xmm2,xmm3
	punpckhdq	xmm6,xmm1
	punpckhdq	xmm7,xmm3
	movdqa	xmm1,xmm0
	punpcklqdq	xmm0,xmm2
	movdqa	xmm3,xmm6
	punpcklqdq	xmm6,xmm7
	punpckhqdq	xmm1,xmm2
	punpckhqdq	xmm3,xmm7
	movdqu	xmm4,[esi-128]
	movdqu	xmm5,[esi-64]
	movdqu	xmm2,[esi]
	movdqu	xmm7,[64+esi]
	lea	esi,[16+esi]
	pxor	xmm4,xmm0
	movdqa	xmm0,[64+ebx]
	pxor	xmm5,xmm1
	movdqa	xmm1,[80+ebx]
	pxor	xmm6,xmm2
	movdqa	xmm2,[96+ebx]
	pxor	xmm7,xmm3
	movdqa	xmm3,[112+ebx]
	movdqu	[edi-128],xmm4
	movdqu	[edi-64],xmm5
	movdqu	[edi],xmm6
	movdqu	[64+edi],xmm7
	lea	edi,[16+edi]
	paddd	xmm0,[64+ebp]
	paddd	xmm1,[80+ebp]
	paddd	xmm2,[96+ebp]
	paddd	xmm3,[112+ebp]
	movdqa	xmm6,xmm0
	punpckldq	xmm0,xmm1
	movdqa	xmm7,xmm2
	punpckldq	xmm2,xmm3
	punpckhdq	xmm6,xmm1
	punpckhdq	xmm7,xmm3
	movdqa	xmm1,xmm0
	punpcklqdq	xmm0,xmm2
	movdqa	xmm3,xmm6
	punpcklqdq	xmm6,xmm7
	punpckhqdq	xmm1,xmm2
	punpckhqdq	xmm3,xmm7
	movdqu	xmm4,[esi-128]
	movdqu	xmm5,[esi-64]
	movdqu	xmm2,[esi]
	movdqu	xmm7,[64+esi]
	lea	esi,[208+esi]
	pxor	xmm4,xmm0
	pxor	xmm5,xmm1
	pxor	xmm6,xmm2
	pxor	xmm7,xmm3
	movdqu	[edi-128],xmm4
	movdqu	[edi-64],xmm5
	movdqu	[edi],xmm6
	movdqu	[64+edi],xmm7
	lea	edi,[208+edi]
	sub	ecx,256
	jnc	NEAR L$009outer_loop
	add	ecx,256
	jz	NEAR L$011done
	mov	ebx,DWORD [520+esp]
	lea	esi,[esi-128]
	mov	edx,DWORD [516+esp]
	lea	edi,[edi-128]
	movd	xmm2,DWORD [64+ebp]
	movdqu	xmm3,[ebx]
	paddd	xmm2,[96+eax]
	pand	xmm3,[112+eax]
	por	xmm3,xmm2
L$0081x:
	movdqa	xmm0,[32+eax]
	movdqu	xmm1,[edx]
	movdqu	xmm2,[16+edx]
	movdqa	xmm6,[eax]
	movdqa	xmm7,[16+eax]
	mov	DWORD [48+esp],ebp
	movdqa	[esp],xmm0
	movdqa	[16+esp],xmm1
	movdqa	[32+esp],xmm2
	movdqa	[48+esp],xmm3
	mov	edx,10
	jmp	NEAR L$012loop1x
align	16
L$013outer1x:
	movdqa	xmm3,[80+eax]
	movdqa	xmm0,[esp]
	movdqa	xmm1,[16+esp]
	movdqa	xmm2,[32+esp]
	paddd	xmm3,[48+esp]
	mov	edx,10
	movdqa	[48+esp],xmm3
	jmp	NEAR L$012loop1x
align	16
L$012loop1x:
	paddd	xmm0,xmm1
	pxor	xmm3,xmm0
db	102,15,56,0,222
	paddd	xmm2,xmm3
	pxor	xmm1,xmm2
	movdqa	xmm4,xmm1
	psrld	xmm1,20
	pslld	xmm4,12
	por	xmm1,xmm4
	paddd	xmm0,xmm1
	pxor	xmm3,xmm0
db	102,15,56,0,223
	paddd	xmm2,xmm3
	pxor	xmm1,xmm2
	movdqa	xmm4,xmm1
	psrld	xmm1,25
	pslld	xmm4,7
	por	xmm1,xmm4
	pshufd	xmm2,xmm2,78
	pshufd	xmm1,xmm1,57
	pshufd	xmm3,xmm3,147
	nop
	paddd	xmm0,xmm1
	pxor	xmm3,xmm0
db	102,15,56,0,222
	paddd	xmm2,xmm3
	pxor	xmm1,xmm2
	movdqa	xmm4,xmm1
	psrld	xmm1,20
	pslld	xmm4,12
	por	xmm1,xmm4
	paddd	xmm0,xmm1
	pxor	xmm3,xmm0
db	102,15,56,0,223
	paddd	xmm2,xmm3
	pxor	xmm1,xmm2
	movdqa	xmm4,xmm1
	psrld	xmm1,25
	pslld	xmm4,7
	por	xmm1,xmm4
	pshufd	xmm2,xmm2,78
	pshufd	xmm1,xmm1,147
	pshufd	xmm3,xmm3,57
	dec	edx
	jnz	NEAR L$012loop1x
	paddd	xmm0,[esp]
	paddd	xmm1,[16+esp]
	paddd	xmm2,[32+esp]
	paddd	xmm3,[48+esp]
	cmp	ecx,64
	jb	NEAR L$014tail
	movdqu	xmm4,[esi]
	movdqu	xmm5,[16+esi]
	pxor	xmm0,xmm4
	movdqu	xmm4,[32+esi]
	pxor	xmm1,xmm5
	movdqu	xmm5,[48+esi]
	pxor	xmm2,xmm4
	pxor	xmm3,xmm5
	lea	esi,[64+esi]
	movdqu	[edi],xmm0
	movdqu	[16+edi],xmm1
	movdqu	[32+edi],xmm2
	movdqu	[48+edi],xmm3
	lea	edi,[64+edi]
	sub	ecx,64
	jnz	NEAR L$013outer1x
	jmp	NEAR L$011done
L$014tail:
	movdqa	[esp],xmm0
	movdqa	[16+esp],xmm1
	movdqa	[32+esp],xmm2
	movdqa	[48+esp],xmm3
	xor	eax,eax
	xor	edx,edx
	xor	ebp,ebp
L$015tail_loop:
	mov	al,BYTE [ebp*1+esp]
	mov	dl,BYTE [ebp*1+esi]
	lea	ebp,[1+ebp]
	xor	al,dl
	mov	BYTE [ebp*1+edi-1],al
	dec	ecx
	jnz	NEAR L$015tail_loop
L$011done:
	mov	esp,DWORD [512+esp]
	pop	edi
	pop	esi
	pop	ebx
	pop	ebp
	ret
align	64
L$ssse3_data:
db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
dd	1634760805,857760878,2036477234,1797285236
dd	0,1,2,3
dd	4,4,4,4
dd	1,0,0,0
dd	4,0,0,0
dd	0,-1,-1,-1
align	64
db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
db	114,103,62,0
segment	.bss
common	_OPENSSL_ia32cap_P 16