#if defined(__i386__)
.file	"ghash-x86.S"
.text
.globl	_gcm_gmult_4bit_mmx
.private_extern	_gcm_gmult_4bit_mmx
.align	4
_gcm_gmult_4bit_mmx:
L_gcm_gmult_4bit_mmx_begin:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%edi
	movl	24(%esp),%esi
	call	L000pic_point
L000pic_point:
	popl	%eax
	leal	Lrem_4bit-L000pic_point(%eax),%eax
	movzbl	15(%edi),%ebx
	xorl	%ecx,%ecx
	movl	%ebx,%edx
	movb	%dl,%cl
	movl	$14,%ebp
	shlb	$4,%cl
	andl	$240,%edx
	movq	8(%esi,%ecx,1),%mm0
	movq	(%esi,%ecx,1),%mm1
	movd	%mm0,%ebx
	jmp	L001mmx_loop
.align	4,0x90
L001mmx_loop:
	psrlq	$4,%mm0
	andl	$15,%ebx
	movq	%mm1,%mm2
	psrlq	$4,%mm1
	pxor	8(%esi,%edx,1),%mm0
	movb	(%edi,%ebp,1),%cl
	psllq	$60,%mm2
	pxor	(%eax,%ebx,8),%mm1
	decl	%ebp
	movd	%mm0,%ebx
	pxor	(%esi,%edx,1),%mm1
	movl	%ecx,%edx
	pxor	%mm2,%mm0
	js	L002mmx_break
	shlb	$4,%cl
	andl	$15,%ebx
	psrlq	$4,%mm0
	andl	$240,%edx
	movq	%mm1,%mm2
	psrlq	$4,%mm1
	pxor	8(%esi,%ecx,1),%mm0
	psllq	$60,%mm2
	pxor	(%eax,%ebx,8),%mm1
	movd	%mm0,%ebx
	pxor	(%esi,%ecx,1),%mm1
	pxor	%mm2,%mm0
	jmp	L001mmx_loop
.align	4,0x90
L002mmx_break:
	shlb	$4,%cl
	andl	$15,%ebx
	psrlq	$4,%mm0
	andl	$240,%edx
	movq	%mm1,%mm2
	psrlq	$4,%mm1
	pxor	8(%esi,%ecx,1),%mm0
	psllq	$60,%mm2
	pxor	(%eax,%ebx,8),%mm1
	movd	%mm0,%ebx
	pxor	(%esi,%ecx,1),%mm1
	pxor	%mm2,%mm0
	psrlq	$4,%mm0
	andl	$15,%ebx
	movq	%mm1,%mm2
	psrlq	$4,%mm1
	pxor	8(%esi,%edx,1),%mm0
	psllq	$60,%mm2
	pxor	(%eax,%ebx,8),%mm1
	movd	%mm0,%ebx
	pxor	(%esi,%edx,1),%mm1
	pxor	%mm2,%mm0
	psrlq	$32,%mm0
	movd	%mm1,%edx
	psrlq	$32,%mm1
	movd	%mm0,%ecx
	movd	%mm1,%ebp
	bswap	%ebx
	bswap	%edx
	bswap	%ecx
	bswap	%ebp
	emms
	movl	%ebx,12(%edi)
	movl	%edx,4(%edi)
	movl	%ecx,8(%edi)
	movl	%ebp,(%edi)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.globl	_gcm_ghash_4bit_mmx
.private_extern	_gcm_ghash_4bit_mmx
.align	4
_gcm_ghash_4bit_mmx:
L_gcm_ghash_4bit_mmx_begin:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%eax
	movl	24(%esp),%ebx
	movl	28(%esp),%ecx
	movl	32(%esp),%edx
	movl	%esp,%ebp
	call	L003pic_point
L003pic_point:
	popl	%esi
	leal	Lrem_8bit-L003pic_point(%esi),%esi
	subl	$544,%esp
	andl	$-64,%esp
	subl	$16,%esp
	addl	%ecx,%edx
	movl	%eax,544(%esp)
	movl	%edx,552(%esp)
	movl	%ebp,556(%esp)
	addl	$128,%ebx
	leal	144(%esp),%edi
	leal	400(%esp),%ebp
	movl	-120(%ebx),%edx
	movq	-120(%ebx),%mm0
	movq	-128(%ebx),%mm3
	shll	$4,%edx
	movb	%dl,(%esp)
	movl	-104(%ebx),%edx
	movq	-104(%ebx),%mm2
	movq	-112(%ebx),%mm5
	movq	%mm0,-128(%edi)
	psrlq	$4,%mm0
	movq	%mm3,(%edi)
	movq	%mm3,%mm7
	psrlq	$4,%mm3
	shll	$4,%edx
	movb	%dl,1(%esp)
	movl	-88(%ebx),%edx
	movq	-88(%ebx),%mm1
	psllq	$60,%mm7
	movq	-96(%ebx),%mm4
	por	%mm7,%mm0
	movq	%mm2,-120(%edi)
	psrlq	$4,%mm2
	movq	%mm5,8(%edi)
	movq	%mm5,%mm6
	movq	%mm0,-128(%ebp)
	psrlq	$4,%mm5
	movq	%mm3,(%ebp)
	shll	$4,%edx
	movb	%dl,2(%esp)
	movl	-72(%ebx),%edx
	movq	-72(%ebx),%mm0
	psllq	$60,%mm6
	movq	-80(%ebx),%mm3
	por	%mm6,%mm2
	movq	%mm1,-112(%edi)
	psrlq	$4,%mm1
	movq	%mm4,16(%edi)
	movq	%mm4,%mm7
	movq	%mm2,-120(%ebp)
	psrlq	$4,%mm4
	movq	%mm5,8(%ebp)
	shll	$4,%edx
	movb	%dl,3(%esp)
	movl	-56(%ebx),%edx
	movq	-56(%ebx),%mm2
	psllq	$60,%mm7
	movq	-64(%ebx),%mm5
	por	%mm7,%mm1
	movq	%mm0,-104(%edi)
	psrlq	$4,%mm0
	movq	%mm3,24(%edi)
	movq	%mm3,%mm6
	movq	%mm1,-112(%ebp)
	psrlq	$4,%mm3
	movq	%mm4,16(%ebp)
	shll	$4,%edx
	movb	%dl,4(%esp)
	movl	-40(%ebx),%edx
	movq	-40(%ebx),%mm1
	psllq	$60,%mm6
	movq	-48(%ebx),%mm4
	por	%mm6,%mm0
	movq	%mm2,-96(%edi)
	psrlq	$4,%mm2
	movq	%mm5,32(%edi)
	movq	%mm5,%mm7
	movq	%mm0,-104(%ebp)
	psrlq	$4,%mm5
	movq	%mm3,24(%ebp)
	shll	$4,%edx
	movb	%dl,5(%esp)
	movl	-24(%ebx),%edx
	movq	-24(%ebx),%mm0
	psllq	$60,%mm7
	movq	-32(%ebx),%mm3
	por	%mm7,%mm2
	movq	%mm1,-88(%edi)
	psrlq	$4,%mm1
	movq	%mm4,40(%edi)
	movq	%mm4,%mm6
	movq	%mm2,-96(%ebp)
	psrlq	$4,%mm4
	movq	%mm5,32(%ebp)
	shll	$4,%edx
	movb	%dl,6(%esp)
	movl	-8(%ebx),%edx
	movq	-8(%ebx),%mm2
	psllq	$60,%mm6
	movq	-16(%ebx),%mm5
	por	%mm6,%mm1
	movq	%mm0,-80(%edi)
	psrlq	$4,%mm0
	movq	%mm3,48(%edi)
	movq	%mm3,%mm7
	movq	%mm1,-88(%ebp)
	psrlq	$4,%mm3
	movq	%mm4,40(%ebp)
	shll	$4,%edx
	movb	%dl,7(%esp)
	movl	8(%ebx),%edx
	movq	8(%ebx),%mm1
	psllq	$60,%mm7
	movq	(%ebx),%mm4
	por	%mm7,%mm0
	movq	%mm2,-72(%edi)
	psrlq	$4,%mm2
	movq	%mm5,56(%edi)
	movq	%mm5,%mm6
	movq	%mm0,-80(%ebp)
	psrlq	$4,%mm5
	movq	%mm3,48(%ebp)
	shll	$4,%edx
	movb	%dl,8(%esp)
	movl	24(%ebx),%edx
	movq	24(%ebx),%mm0
	psllq	$60,%mm6
	movq	16(%ebx),%mm3
	por	%mm6,%mm2
	movq	%mm1,-64(%edi)
	psrlq	$4,%mm1
	movq	%mm4,64(%edi)
	movq	%mm4,%mm7
	movq	%mm2,-72(%ebp)
	psrlq	$4,%mm4
	movq	%mm5,56(%ebp)
	shll	$4,%edx
	movb	%dl,9(%esp)
	movl	40(%ebx),%edx
	movq	40(%ebx),%mm2
	psllq	$60,%mm7
	movq	32(%ebx),%mm5
	por	%mm7,%mm1
	movq	%mm0,-56(%edi)
	psrlq	$4,%mm0
	movq	%mm3,72(%edi)
	movq	%mm3,%mm6
	movq	%mm1,-64(%ebp)
	psrlq	$4,%mm3
	movq	%mm4,64(%ebp)
	shll	$4,%edx
	movb	%dl,10(%esp)
	movl	56(%ebx),%edx
	movq	56(%ebx),%mm1
	psllq	$60,%mm6
	movq	48(%ebx),%mm4
	por	%mm6,%mm0
	movq	%mm2,-48(%edi)
	psrlq	$4,%mm2
	movq	%mm5,80(%edi)
	movq	%mm5,%mm7
	movq	%mm0,-56(%ebp)
	psrlq	$4,%mm5
	movq	%mm3,72(%ebp)
	shll	$4,%edx
	movb	%dl,11(%esp)
	movl	72(%ebx),%edx
	movq	72(%ebx),%mm0
	psllq	$60,%mm7
	movq	64(%ebx),%mm3
	por	%mm7,%mm2
	movq	%mm1,-40(%edi)
	psrlq	$4,%mm1
	movq	%mm4,88(%edi)
	movq	%mm4,%mm6
	movq	%mm2,-48(%ebp)
	psrlq	$4,%mm4
	movq	%mm5,80(%ebp)
	shll	$4,%edx
	movb	%dl,12(%esp)
	movl	88(%ebx),%edx
	movq	88(%ebx),%mm2
	psllq	$60,%mm6
	movq	80(%ebx),%mm5
	por	%mm6,%mm1
	movq	%mm0,-32(%edi)
	psrlq	$4,%mm0
	movq	%mm3,96(%edi)
	movq	%mm3,%mm7
	movq	%mm1,-40(%ebp)
	psrlq	$4,%mm3
	movq	%mm4,88(%ebp)
	shll	$4,%edx
	movb	%dl,13(%esp)
	movl	104(%ebx),%edx
	movq	104(%ebx),%mm1
	psllq	$60,%mm7
	movq	96(%ebx),%mm4
	por	%mm7,%mm0
	movq	%mm2,-24(%edi)
	psrlq	$4,%mm2
	movq	%mm5,104(%edi)
	movq	%mm5,%mm6
	movq	%mm0,-32(%ebp)
	psrlq	$4,%mm5
	movq	%mm3,96(%ebp)
	shll	$4,%edx
	movb	%dl,14(%esp)
	movl	120(%ebx),%edx
	movq	120(%ebx),%mm0
	psllq	$60,%mm6
	movq	112(%ebx),%mm3
	por	%mm6,%mm2
	movq	%mm1,-16(%edi)
	psrlq	$4,%mm1
	movq	%mm4,112(%edi)
	movq	%mm4,%mm7
	movq	%mm2,-24(%ebp)
	psrlq	$4,%mm4
	movq	%mm5,104(%ebp)
	shll	$4,%edx
	movb	%dl,15(%esp)
	psllq	$60,%mm7
	por	%mm7,%mm1
	movq	%mm0,-8(%edi)
	psrlq	$4,%mm0
	movq	%mm3,120(%edi)
	movq	%mm3,%mm6
	movq	%mm1,-16(%ebp)
	psrlq	$4,%mm3
	movq	%mm4,112(%ebp)
	psllq	$60,%mm6
	por	%mm6,%mm0
	movq	%mm0,-8(%ebp)
	movq	%mm3,120(%ebp)
	movq	(%eax),%mm6
	movl	8(%eax),%ebx
	movl	12(%eax),%edx
.align	4,0x90
L004outer:
	xorl	12(%ecx),%edx
	xorl	8(%ecx),%ebx
	pxor	(%ecx),%mm6
	leal	16(%ecx),%ecx
	movl	%ebx,536(%esp)
	movq	%mm6,528(%esp)
	movl	%ecx,548(%esp)
	xorl	%eax,%eax
	roll	$8,%edx
	movb	%dl,%al
	movl	%eax,%ebp
	andb	$15,%al
	shrl	$4,%ebp
	pxor	%mm0,%mm0
	roll	$8,%edx
	pxor	%mm1,%mm1
	pxor	%mm2,%mm2
	movq	16(%esp,%eax,8),%mm7
	movq	144(%esp,%eax,8),%mm6
	movb	%dl,%al
	movd	%mm7,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	shrl	$4,%edi
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm2
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movl	536(%esp),%edx
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm2,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm1
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm1,%mm6
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm0
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm0,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm2
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm2,%mm6
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm1
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movl	532(%esp),%edx
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm1,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm0
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm0,%mm6
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm2
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm2,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm1
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm1,%mm6
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm0
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movl	528(%esp),%edx
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm0,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm2
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm2,%mm6
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm1
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm1,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm0
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	movb	%dl,%al
	movd	%mm7,%ecx
	movzbl	%bl,%ebx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%ebp
	psrlq	$8,%mm6
	pxor	272(%esp,%edi,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm0,%mm6
	shrl	$4,%ebp
	pinsrw	$2,(%esi,%ebx,2),%mm2
	pxor	16(%esp,%eax,8),%mm7
	roll	$8,%edx
	pxor	144(%esp,%eax,8),%mm6
	pxor	%mm3,%mm7
	pxor	400(%esp,%edi,8),%mm6
	xorb	(%esp,%edi,1),%cl
	movb	%dl,%al
	movl	524(%esp),%edx
	movd	%mm7,%ebx
	movzbl	%cl,%ecx
	psrlq	$8,%mm7
	movq	%mm6,%mm3
	movl	%eax,%edi
	psrlq	$8,%mm6
	pxor	272(%esp,%ebp,8),%mm7
	andb	$15,%al
	psllq	$56,%mm3
	pxor	%mm2,%mm6
	shrl	$4,%edi
	pinsrw	$2,(%esi,%ecx,2),%mm1
	pxor	16(%esp,%eax,8),%mm7
	pxor	144(%esp,%eax,8),%mm6
	xorb	(%esp,%ebp,1),%bl
	pxor	%mm3,%mm7
	pxor	400(%esp,%ebp,8),%mm6
	movzbl	%bl,%ebx
	pxor	%mm2,%mm2
	psllq	$4,%mm1
	movd	%mm7,%ecx
	psrlq	$4,%mm7
	movq	%mm6,%mm3
	psrlq	$4,%mm6
	shll	$4,%ecx
	pxor	16(%esp,%edi,8),%mm7
	psllq	$60,%mm3
	movzbl	%cl,%ecx
	pxor	%mm3,%mm7
	pxor	144(%esp,%edi,8),%mm6
	pinsrw	$2,(%esi,%ebx,2),%mm0
	pxor	%mm1,%mm6
	movd	%mm7,%edx
	pinsrw	$3,(%esi,%ecx,2),%mm2
	psllq	$12,%mm0
	pxor	%mm0,%mm6
	psrlq	$32,%mm7
	pxor	%mm2,%mm6
	movl	548(%esp),%ecx
	movd	%mm7,%ebx
	movq	%mm6,%mm3
	psllw	$8,%mm6
	psrlw	$8,%mm3
	por	%mm3,%mm6
	bswap	%edx
	pshufw	$27,%mm6,%mm6
	bswap	%ebx
	cmpl	552(%esp),%ecx
	jne	L004outer
	movl	544(%esp),%eax
	movl	%edx,12(%eax)
	movl	%ebx,8(%eax)
	movq	%mm6,(%eax)
	movl	556(%esp),%esp
	emms
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.globl	_gcm_init_clmul
.private_extern	_gcm_init_clmul
.align	4
_gcm_init_clmul:
L_gcm_init_clmul_begin:
	movl	4(%esp),%edx
	movl	8(%esp),%eax
	call	L005pic
L005pic:
	popl	%ecx
	leal	Lbswap-L005pic(%ecx),%ecx
	movdqu	(%eax),%xmm2
	pshufd	$78,%xmm2,%xmm2
	pshufd	$255,%xmm2,%xmm4
	movdqa	%xmm2,%xmm3
	psllq	$1,%xmm2
	pxor	%xmm5,%xmm5
	psrlq	$63,%xmm3
	pcmpgtd	%xmm4,%xmm5
	pslldq	$8,%xmm3
	por	%xmm3,%xmm2
	pand	16(%ecx),%xmm5
	pxor	%xmm5,%xmm2
	movdqa	%xmm2,%xmm0
	movdqa	%xmm0,%xmm1
	pshufd	$78,%xmm0,%xmm3
	pshufd	$78,%xmm2,%xmm4
	pxor	%xmm0,%xmm3
	pxor	%xmm2,%xmm4
.byte	102,15,58,68,194,0
.byte	102,15,58,68,202,17
.byte	102,15,58,68,220,0
	xorps	%xmm0,%xmm3
	xorps	%xmm1,%xmm3
	movdqa	%xmm3,%xmm4
	psrldq	$8,%xmm3
	pslldq	$8,%xmm4
	pxor	%xmm3,%xmm1
	pxor	%xmm4,%xmm0
	movdqa	%xmm0,%xmm4
	movdqa	%xmm0,%xmm3
	psllq	$5,%xmm0
	pxor	%xmm0,%xmm3
	psllq	$1,%xmm0
	pxor	%xmm3,%xmm0
	psllq	$57,%xmm0
	movdqa	%xmm0,%xmm3
	pslldq	$8,%xmm0
	psrldq	$8,%xmm3
	pxor	%xmm4,%xmm0
	pxor	%xmm3,%xmm1
	movdqa	%xmm0,%xmm4
	psrlq	$1,%xmm0
	pxor	%xmm4,%xmm1
	pxor	%xmm0,%xmm4
	psrlq	$5,%xmm0
	pxor	%xmm4,%xmm0
	psrlq	$1,%xmm0
	pxor	%xmm1,%xmm0
	pshufd	$78,%xmm2,%xmm3
	pshufd	$78,%xmm0,%xmm4
	pxor	%xmm2,%xmm3
	movdqu	%xmm2,(%edx)
	pxor	%xmm0,%xmm4
	movdqu	%xmm0,16(%edx)
.byte	102,15,58,15,227,8
	movdqu	%xmm4,32(%edx)
	ret
.globl	_gcm_gmult_clmul
.private_extern	_gcm_gmult_clmul
.align	4
_gcm_gmult_clmul:
L_gcm_gmult_clmul_begin:
	movl	4(%esp),%eax
	movl	8(%esp),%edx
	call	L006pic
L006pic:
	popl	%ecx
	leal	Lbswap-L006pic(%ecx),%ecx
	movdqu	(%eax),%xmm0
	movdqa	(%ecx),%xmm5
	movups	(%edx),%xmm2
.byte	102,15,56,0,197
	movups	32(%edx),%xmm4
	movdqa	%xmm0,%xmm1
	pshufd	$78,%xmm0,%xmm3
	pxor	%xmm0,%xmm3
.byte	102,15,58,68,194,0
.byte	102,15,58,68,202,17
.byte	102,15,58,68,220,0
	xorps	%xmm0,%xmm3
	xorps	%xmm1,%xmm3
	movdqa	%xmm3,%xmm4
	psrldq	$8,%xmm3
	pslldq	$8,%xmm4
	pxor	%xmm3,%xmm1
	pxor	%xmm4,%xmm0
	movdqa	%xmm0,%xmm4
	movdqa	%xmm0,%xmm3
	psllq	$5,%xmm0
	pxor	%xmm0,%xmm3
	psllq	$1,%xmm0
	pxor	%xmm3,%xmm0
	psllq	$57,%xmm0
	movdqa	%xmm0,%xmm3
	pslldq	$8,%xmm0
	psrldq	$8,%xmm3
	pxor	%xmm4,%xmm0
	pxor	%xmm3,%xmm1
	movdqa	%xmm0,%xmm4
	psrlq	$1,%xmm0
	pxor	%xmm4,%xmm1
	pxor	%xmm0,%xmm4
	psrlq	$5,%xmm0
	pxor	%xmm4,%xmm0
	psrlq	$1,%xmm0
	pxor	%xmm1,%xmm0
.byte	102,15,56,0,197
	movdqu	%xmm0,(%eax)
	ret
.globl	_gcm_ghash_clmul
.private_extern	_gcm_ghash_clmul
.align	4
_gcm_ghash_clmul:
L_gcm_ghash_clmul_begin:
	pushl	%ebp
	pushl	%ebx
	pushl	%esi
	pushl	%edi
	movl	20(%esp),%eax
	movl	24(%esp),%edx
	movl	28(%esp),%esi
	movl	32(%esp),%ebx
	call	L007pic
L007pic:
	popl	%ecx
	leal	Lbswap-L007pic(%ecx),%ecx
	movdqu	(%eax),%xmm0
	movdqa	(%ecx),%xmm5
	movdqu	(%edx),%xmm2
.byte	102,15,56,0,197
	subl	$16,%ebx
	jz	L008odd_tail
	movdqu	(%esi),%xmm3
	movdqu	16(%esi),%xmm6
.byte	102,15,56,0,221
.byte	102,15,56,0,245
	movdqu	32(%edx),%xmm5
	pxor	%xmm3,%xmm0
	pshufd	$78,%xmm6,%xmm3
	movdqa	%xmm6,%xmm7
	pxor	%xmm6,%xmm3
	leal	32(%esi),%esi
.byte	102,15,58,68,242,0
.byte	102,15,58,68,250,17
.byte	102,15,58,68,221,0
	movups	16(%edx),%xmm2
	nop
	subl	$32,%ebx
	jbe	L009even_tail
	jmp	L010mod_loop
.align	5,0x90
L010mod_loop:
	pshufd	$78,%xmm0,%xmm4
	movdqa	%xmm0,%xmm1
	pxor	%xmm0,%xmm4
	nop
.byte	102,15,58,68,194,0
.byte	102,15,58,68,202,17
.byte	102,15,58,68,229,16
	movups	(%edx),%xmm2
	xorps	%xmm6,%xmm0
	movdqa	(%ecx),%xmm5
	xorps	%xmm7,%xmm1
	movdqu	(%esi),%xmm7
	pxor	%xmm0,%xmm3
	movdqu	16(%esi),%xmm6
	pxor	%xmm1,%xmm3
.byte	102,15,56,0,253
	pxor	%xmm3,%xmm4
	movdqa	%xmm4,%xmm3
	psrldq	$8,%xmm4
	pslldq	$8,%xmm3
	pxor	%xmm4,%xmm1
	pxor	%xmm3,%xmm0
.byte	102,15,56,0,245
	pxor	%xmm7,%xmm1
	movdqa	%xmm6,%xmm7
	movdqa	%xmm0,%xmm4
	movdqa	%xmm0,%xmm3
	psllq	$5,%xmm0
	pxor	%xmm0,%xmm3
	psllq	$1,%xmm0
	pxor	%xmm3,%xmm0
.byte	102,15,58,68,242,0
	movups	32(%edx),%xmm5
	psllq	$57,%xmm0
	movdqa	%xmm0,%xmm3
	pslldq	$8,%xmm0
	psrldq	$8,%xmm3
	pxor	%xmm4,%xmm0
	pxor	%xmm3,%xmm1
	pshufd	$78,%xmm7,%xmm3
	movdqa	%xmm0,%xmm4
	psrlq	$1,%xmm0
	pxor	%xmm7,%xmm3
	pxor	%xmm4,%xmm1
.byte	102,15,58,68,250,17
	movups	16(%edx),%xmm2
	pxor	%xmm0,%xmm4
	psrlq	$5,%xmm0
	pxor	%xmm4,%xmm0
	psrlq	$1,%xmm0
	pxor	%xmm1,%xmm0
.byte	102,15,58,68,221,0
	leal	32(%esi),%esi
	subl	$32,%ebx
	ja	L010mod_loop
L009even_tail:
	pshufd	$78,%xmm0,%xmm4
	movdqa	%xmm0,%xmm1
	pxor	%xmm0,%xmm4
.byte	102,15,58,68,194,0
.byte	102,15,58,68,202,17
.byte	102,15,58,68,229,16
	movdqa	(%ecx),%xmm5
	xorps	%xmm6,%xmm0
	xorps	%xmm7,%xmm1
	pxor	%xmm0,%xmm3
	pxor	%xmm1,%xmm3
	pxor	%xmm3,%xmm4
	movdqa	%xmm4,%xmm3
	psrldq	$8,%xmm4
	pslldq	$8,%xmm3
	pxor	%xmm4,%xmm1
	pxor	%xmm3,%xmm0
	movdqa	%xmm0,%xmm4
	movdqa	%xmm0,%xmm3
	psllq	$5,%xmm0
	pxor	%xmm0,%xmm3
	psllq	$1,%xmm0
	pxor	%xmm3,%xmm0
	psllq	$57,%xmm0
	movdqa	%xmm0,%xmm3
	pslldq	$8,%xmm0
	psrldq	$8,%xmm3
	pxor	%xmm4,%xmm0
	pxor	%xmm3,%xmm1
	movdqa	%xmm0,%xmm4
	psrlq	$1,%xmm0
	pxor	%xmm4,%xmm1
	pxor	%xmm0,%xmm4
	psrlq	$5,%xmm0
	pxor	%xmm4,%xmm0
	psrlq	$1,%xmm0
	pxor	%xmm1,%xmm0
	testl	%ebx,%ebx
	jnz	L011done
	movups	(%edx),%xmm2
L008odd_tail:
	movdqu	(%esi),%xmm3
.byte	102,15,56,0,221
	pxor	%xmm3,%xmm0
	movdqa	%xmm0,%xmm1
	pshufd	$78,%xmm0,%xmm3
	pshufd	$78,%xmm2,%xmm4
	pxor	%xmm0,%xmm3
	pxor	%xmm2,%xmm4
.byte	102,15,58,68,194,0
.byte	102,15,58,68,202,17
.byte	102,15,58,68,220,0
	xorps	%xmm0,%xmm3
	xorps	%xmm1,%xmm3
	movdqa	%xmm3,%xmm4
	psrldq	$8,%xmm3
	pslldq	$8,%xmm4
	pxor	%xmm3,%xmm1
	pxor	%xmm4,%xmm0
	movdqa	%xmm0,%xmm4
	movdqa	%xmm0,%xmm3
	psllq	$5,%xmm0
	pxor	%xmm0,%xmm3
	psllq	$1,%xmm0
	pxor	%xmm3,%xmm0
	psllq	$57,%xmm0
	movdqa	%xmm0,%xmm3
	pslldq	$8,%xmm0
	psrldq	$8,%xmm3
	pxor	%xmm4,%xmm0
	pxor	%xmm3,%xmm1
	movdqa	%xmm0,%xmm4
	psrlq	$1,%xmm0
	pxor	%xmm4,%xmm1
	pxor	%xmm0,%xmm4
	psrlq	$5,%xmm0
	pxor	%xmm4,%xmm0
	psrlq	$1,%xmm0
	pxor	%xmm1,%xmm0
L011done:
.byte	102,15,56,0,197
	movdqu	%xmm0,(%eax)
	popl	%edi
	popl	%esi
	popl	%ebx
	popl	%ebp
	ret
.align	6,0x90
Lbswap:
.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align	6,0x90
Lrem_8bit:
.value	0,450,900,582,1800,1738,1164,1358
.value	3600,4050,3476,3158,2328,2266,2716,2910
.value	7200,7650,8100,7782,6952,6890,6316,6510
.value	4656,5106,4532,4214,5432,5370,5820,6014
.value	14400,14722,15300,14854,16200,16010,15564,15630
.value	13904,14226,13780,13334,12632,12442,13020,13086
.value	9312,9634,10212,9766,9064,8874,8428,8494
.value	10864,11186,10740,10294,11640,11450,12028,12094
.value	28800,28994,29444,29382,30600,30282,29708,30158
.value	32400,32594,32020,31958,31128,30810,31260,31710
.value	27808,28002,28452,28390,27560,27242,26668,27118
.value	25264,25458,24884,24822,26040,25722,26172,26622
.value	18624,18690,19268,19078,20424,19978,19532,19854
.value	18128,18194,17748,17558,16856,16410,16988,17310
.value	21728,21794,22372,22182,21480,21034,20588,20910
.value	23280,23346,22900,22710,24056,23610,24188,24510
.value	57600,57538,57988,58182,58888,59338,58764,58446
.value	61200,61138,60564,60758,59416,59866,60316,59998
.value	64800,64738,65188,65382,64040,64490,63916,63598
.value	62256,62194,61620,61814,62520,62970,63420,63102
.value	55616,55426,56004,56070,56904,57226,56780,56334
.value	55120,54930,54484,54550,53336,53658,54236,53790
.value	50528,50338,50916,50982,49768,50090,49644,49198
.value	52080,51890,51444,51510,52344,52666,53244,52798
.value	37248,36930,37380,37830,38536,38730,38156,38094
.value	40848,40530,39956,40406,39064,39258,39708,39646
.value	36256,35938,36388,36838,35496,35690,35116,35054
.value	33712,33394,32820,33270,33976,34170,34620,34558
.value	43456,43010,43588,43910,44744,44810,44364,44174
.value	42960,42514,42068,42390,41176,41242,41820,41630
.value	46560,46114,46692,47014,45800,45866,45420,45230
.value	48112,47666,47220,47542,48376,48442,49020,48830
.align	6,0x90
Lrem_4bit:
.long	0,0,0,471859200,0,943718400,0,610271232
.long	0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long	0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long	0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte	0
#endif