#if defined(__i386__) .file "ghash-x86.S" .text .globl _gcm_gmult_4bit_mmx .private_extern _gcm_gmult_4bit_mmx .align 4 _gcm_gmult_4bit_mmx: L_gcm_gmult_4bit_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%edi movl 24(%esp),%esi call L000pic_point L000pic_point: popl %eax leal Lrem_4bit-L000pic_point(%eax),%eax movzbl 15(%edi),%ebx xorl %ecx,%ecx movl %ebx,%edx movb %dl,%cl movl $14,%ebp shlb $4,%cl andl $240,%edx movq 8(%esi,%ecx,1),%mm0 movq (%esi,%ecx,1),%mm1 movd %mm0,%ebx jmp L001mmx_loop .align 4,0x90 L001mmx_loop: psrlq $4,%mm0 andl $15,%ebx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 movb (%edi,%ebp,1),%cl psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 decl %ebp movd %mm0,%ebx pxor (%esi,%edx,1),%mm1 movl %ecx,%edx pxor %mm2,%mm0 js L002mmx_break shlb $4,%cl andl $15,%ebx psrlq $4,%mm0 andl $240,%edx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%ecx,1),%mm1 pxor %mm2,%mm0 jmp L001mmx_loop .align 4,0x90 L002mmx_break: shlb $4,%cl andl $15,%ebx psrlq $4,%mm0 andl $240,%edx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%ecx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%ecx,1),%mm1 pxor %mm2,%mm0 psrlq $4,%mm0 andl $15,%ebx movq %mm1,%mm2 psrlq $4,%mm1 pxor 8(%esi,%edx,1),%mm0 psllq $60,%mm2 pxor (%eax,%ebx,8),%mm1 movd %mm0,%ebx pxor (%esi,%edx,1),%mm1 pxor %mm2,%mm0 psrlq $32,%mm0 movd %mm1,%edx psrlq $32,%mm1 movd %mm0,%ecx movd %mm1,%ebp bswap %ebx bswap %edx bswap %ecx bswap %ebp emms movl %ebx,12(%edi) movl %edx,4(%edi) movl %ecx,8(%edi) movl %ebp,(%edi) popl %edi popl %esi popl %ebx popl %ebp ret .globl _gcm_ghash_4bit_mmx .private_extern _gcm_ghash_4bit_mmx .align 4 _gcm_ghash_4bit_mmx: L_gcm_ghash_4bit_mmx_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%ebx movl 28(%esp),%ecx movl 32(%esp),%edx movl %esp,%ebp call L003pic_point L003pic_point: popl %esi leal Lrem_8bit-L003pic_point(%esi),%esi subl $544,%esp andl $-64,%esp subl $16,%esp addl %ecx,%edx movl %eax,544(%esp) movl %edx,552(%esp) movl %ebp,556(%esp) addl $128,%ebx leal 144(%esp),%edi leal 400(%esp),%ebp movl -120(%ebx),%edx movq -120(%ebx),%mm0 movq -128(%ebx),%mm3 shll $4,%edx movb %dl,(%esp) movl -104(%ebx),%edx movq -104(%ebx),%mm2 movq -112(%ebx),%mm5 movq %mm0,-128(%edi) psrlq $4,%mm0 movq %mm3,(%edi) movq %mm3,%mm7 psrlq $4,%mm3 shll $4,%edx movb %dl,1(%esp) movl -88(%ebx),%edx movq -88(%ebx),%mm1 psllq $60,%mm7 movq -96(%ebx),%mm4 por %mm7,%mm0 movq %mm2,-120(%edi) psrlq $4,%mm2 movq %mm5,8(%edi) movq %mm5,%mm6 movq %mm0,-128(%ebp) psrlq $4,%mm5 movq %mm3,(%ebp) shll $4,%edx movb %dl,2(%esp) movl -72(%ebx),%edx movq -72(%ebx),%mm0 psllq $60,%mm6 movq -80(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-112(%edi) psrlq $4,%mm1 movq %mm4,16(%edi) movq %mm4,%mm7 movq %mm2,-120(%ebp) psrlq $4,%mm4 movq %mm5,8(%ebp) shll $4,%edx movb %dl,3(%esp) movl -56(%ebx),%edx movq -56(%ebx),%mm2 psllq $60,%mm7 movq -64(%ebx),%mm5 por %mm7,%mm1 movq %mm0,-104(%edi) psrlq $4,%mm0 movq %mm3,24(%edi) movq %mm3,%mm6 movq %mm1,-112(%ebp) psrlq $4,%mm3 movq %mm4,16(%ebp) shll $4,%edx movb %dl,4(%esp) movl -40(%ebx),%edx movq -40(%ebx),%mm1 psllq $60,%mm6 movq -48(%ebx),%mm4 por %mm6,%mm0 movq %mm2,-96(%edi) psrlq $4,%mm2 movq %mm5,32(%edi) movq %mm5,%mm7 movq %mm0,-104(%ebp) psrlq $4,%mm5 movq %mm3,24(%ebp) shll $4,%edx movb %dl,5(%esp) movl -24(%ebx),%edx movq -24(%ebx),%mm0 psllq $60,%mm7 movq -32(%ebx),%mm3 por %mm7,%mm2 movq %mm1,-88(%edi) psrlq $4,%mm1 movq %mm4,40(%edi) movq %mm4,%mm6 movq %mm2,-96(%ebp) psrlq $4,%mm4 movq %mm5,32(%ebp) shll $4,%edx movb %dl,6(%esp) movl -8(%ebx),%edx movq -8(%ebx),%mm2 psllq $60,%mm6 movq -16(%ebx),%mm5 por %mm6,%mm1 movq %mm0,-80(%edi) psrlq $4,%mm0 movq %mm3,48(%edi) movq %mm3,%mm7 movq %mm1,-88(%ebp) psrlq $4,%mm3 movq %mm4,40(%ebp) shll $4,%edx movb %dl,7(%esp) movl 8(%ebx),%edx movq 8(%ebx),%mm1 psllq $60,%mm7 movq (%ebx),%mm4 por %mm7,%mm0 movq %mm2,-72(%edi) psrlq $4,%mm2 movq %mm5,56(%edi) movq %mm5,%mm6 movq %mm0,-80(%ebp) psrlq $4,%mm5 movq %mm3,48(%ebp) shll $4,%edx movb %dl,8(%esp) movl 24(%ebx),%edx movq 24(%ebx),%mm0 psllq $60,%mm6 movq 16(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-64(%edi) psrlq $4,%mm1 movq %mm4,64(%edi) movq %mm4,%mm7 movq %mm2,-72(%ebp) psrlq $4,%mm4 movq %mm5,56(%ebp) shll $4,%edx movb %dl,9(%esp) movl 40(%ebx),%edx movq 40(%ebx),%mm2 psllq $60,%mm7 movq 32(%ebx),%mm5 por %mm7,%mm1 movq %mm0,-56(%edi) psrlq $4,%mm0 movq %mm3,72(%edi) movq %mm3,%mm6 movq %mm1,-64(%ebp) psrlq $4,%mm3 movq %mm4,64(%ebp) shll $4,%edx movb %dl,10(%esp) movl 56(%ebx),%edx movq 56(%ebx),%mm1 psllq $60,%mm6 movq 48(%ebx),%mm4 por %mm6,%mm0 movq %mm2,-48(%edi) psrlq $4,%mm2 movq %mm5,80(%edi) movq %mm5,%mm7 movq %mm0,-56(%ebp) psrlq $4,%mm5 movq %mm3,72(%ebp) shll $4,%edx movb %dl,11(%esp) movl 72(%ebx),%edx movq 72(%ebx),%mm0 psllq $60,%mm7 movq 64(%ebx),%mm3 por %mm7,%mm2 movq %mm1,-40(%edi) psrlq $4,%mm1 movq %mm4,88(%edi) movq %mm4,%mm6 movq %mm2,-48(%ebp) psrlq $4,%mm4 movq %mm5,80(%ebp) shll $4,%edx movb %dl,12(%esp) movl 88(%ebx),%edx movq 88(%ebx),%mm2 psllq $60,%mm6 movq 80(%ebx),%mm5 por %mm6,%mm1 movq %mm0,-32(%edi) psrlq $4,%mm0 movq %mm3,96(%edi) movq %mm3,%mm7 movq %mm1,-40(%ebp) psrlq $4,%mm3 movq %mm4,88(%ebp) shll $4,%edx movb %dl,13(%esp) movl 104(%ebx),%edx movq 104(%ebx),%mm1 psllq $60,%mm7 movq 96(%ebx),%mm4 por %mm7,%mm0 movq %mm2,-24(%edi) psrlq $4,%mm2 movq %mm5,104(%edi) movq %mm5,%mm6 movq %mm0,-32(%ebp) psrlq $4,%mm5 movq %mm3,96(%ebp) shll $4,%edx movb %dl,14(%esp) movl 120(%ebx),%edx movq 120(%ebx),%mm0 psllq $60,%mm6 movq 112(%ebx),%mm3 por %mm6,%mm2 movq %mm1,-16(%edi) psrlq $4,%mm1 movq %mm4,112(%edi) movq %mm4,%mm7 movq %mm2,-24(%ebp) psrlq $4,%mm4 movq %mm5,104(%ebp) shll $4,%edx movb %dl,15(%esp) psllq $60,%mm7 por %mm7,%mm1 movq %mm0,-8(%edi) psrlq $4,%mm0 movq %mm3,120(%edi) movq %mm3,%mm6 movq %mm1,-16(%ebp) psrlq $4,%mm3 movq %mm4,112(%ebp) psllq $60,%mm6 por %mm6,%mm0 movq %mm0,-8(%ebp) movq %mm3,120(%ebp) movq (%eax),%mm6 movl 8(%eax),%ebx movl 12(%eax),%edx .align 4,0x90 L004outer: xorl 12(%ecx),%edx xorl 8(%ecx),%ebx pxor (%ecx),%mm6 leal 16(%ecx),%ecx movl %ebx,536(%esp) movq %mm6,528(%esp) movl %ecx,548(%esp) xorl %eax,%eax roll $8,%edx movb %dl,%al movl %eax,%ebp andb $15,%al shrl $4,%ebp pxor %mm0,%mm0 roll $8,%edx pxor %mm1,%mm1 pxor %mm2,%mm2 movq 16(%esp,%eax,8),%mm7 movq 144(%esp,%eax,8),%mm6 movb %dl,%al movd %mm7,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 shrl $4,%edi pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 536(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 532(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 528(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm1,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm0 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 xorb (%esp,%ebp,1),%bl movb %dl,%al movd %mm7,%ecx movzbl %bl,%ebx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%ebp psrlq $8,%mm6 pxor 272(%esp,%edi,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm0,%mm6 shrl $4,%ebp pinsrw $2,(%esi,%ebx,2),%mm2 pxor 16(%esp,%eax,8),%mm7 roll $8,%edx pxor 144(%esp,%eax,8),%mm6 pxor %mm3,%mm7 pxor 400(%esp,%edi,8),%mm6 xorb (%esp,%edi,1),%cl movb %dl,%al movl 524(%esp),%edx movd %mm7,%ebx movzbl %cl,%ecx psrlq $8,%mm7 movq %mm6,%mm3 movl %eax,%edi psrlq $8,%mm6 pxor 272(%esp,%ebp,8),%mm7 andb $15,%al psllq $56,%mm3 pxor %mm2,%mm6 shrl $4,%edi pinsrw $2,(%esi,%ecx,2),%mm1 pxor 16(%esp,%eax,8),%mm7 pxor 144(%esp,%eax,8),%mm6 xorb (%esp,%ebp,1),%bl pxor %mm3,%mm7 pxor 400(%esp,%ebp,8),%mm6 movzbl %bl,%ebx pxor %mm2,%mm2 psllq $4,%mm1 movd %mm7,%ecx psrlq $4,%mm7 movq %mm6,%mm3 psrlq $4,%mm6 shll $4,%ecx pxor 16(%esp,%edi,8),%mm7 psllq $60,%mm3 movzbl %cl,%ecx pxor %mm3,%mm7 pxor 144(%esp,%edi,8),%mm6 pinsrw $2,(%esi,%ebx,2),%mm0 pxor %mm1,%mm6 movd %mm7,%edx pinsrw $3,(%esi,%ecx,2),%mm2 psllq $12,%mm0 pxor %mm0,%mm6 psrlq $32,%mm7 pxor %mm2,%mm6 movl 548(%esp),%ecx movd %mm7,%ebx movq %mm6,%mm3 psllw $8,%mm6 psrlw $8,%mm3 por %mm3,%mm6 bswap %edx pshufw $27,%mm6,%mm6 bswap %ebx cmpl 552(%esp),%ecx jne L004outer movl 544(%esp),%eax movl %edx,12(%eax) movl %ebx,8(%eax) movq %mm6,(%eax) movl 556(%esp),%esp emms popl %edi popl %esi popl %ebx popl %ebp ret .globl _gcm_init_clmul .private_extern _gcm_init_clmul .align 4 _gcm_init_clmul: L_gcm_init_clmul_begin: movl 4(%esp),%edx movl 8(%esp),%eax call L005pic L005pic: popl %ecx leal Lbswap-L005pic(%ecx),%ecx movdqu (%eax),%xmm2 pshufd $78,%xmm2,%xmm2 pshufd $255,%xmm2,%xmm4 movdqa %xmm2,%xmm3 psllq $1,%xmm2 pxor %xmm5,%xmm5 psrlq $63,%xmm3 pcmpgtd %xmm4,%xmm5 pslldq $8,%xmm3 por %xmm3,%xmm2 pand 16(%ecx),%xmm5 pxor %xmm5,%xmm2 movdqa %xmm2,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm2,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm2,%xmm3 movdqu %xmm2,(%edx) pxor %xmm0,%xmm4 movdqu %xmm0,16(%edx) .byte 102,15,58,15,227,8 movdqu %xmm4,32(%edx) ret .globl _gcm_gmult_clmul .private_extern _gcm_gmult_clmul .align 4 _gcm_gmult_clmul: L_gcm_gmult_clmul_begin: movl 4(%esp),%eax movl 8(%esp),%edx call L006pic L006pic: popl %ecx leal Lbswap-L006pic(%ecx),%ecx movdqu (%eax),%xmm0 movdqa (%ecx),%xmm5 movups (%edx),%xmm2 .byte 102,15,56,0,197 movups 32(%edx),%xmm4 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%eax) ret .globl _gcm_ghash_clmul .private_extern _gcm_ghash_clmul .align 4 _gcm_ghash_clmul: L_gcm_ghash_clmul_begin: pushl %ebp pushl %ebx pushl %esi pushl %edi movl 20(%esp),%eax movl 24(%esp),%edx movl 28(%esp),%esi movl 32(%esp),%ebx call L007pic L007pic: popl %ecx leal Lbswap-L007pic(%ecx),%ecx movdqu (%eax),%xmm0 movdqa (%ecx),%xmm5 movdqu (%edx),%xmm2 .byte 102,15,56,0,197 subl $16,%ebx jz L008odd_tail movdqu (%esi),%xmm3 movdqu 16(%esi),%xmm6 .byte 102,15,56,0,221 .byte 102,15,56,0,245 movdqu 32(%edx),%xmm5 pxor %xmm3,%xmm0 pshufd $78,%xmm6,%xmm3 movdqa %xmm6,%xmm7 pxor %xmm6,%xmm3 leal 32(%esi),%esi .byte 102,15,58,68,242,0 .byte 102,15,58,68,250,17 .byte 102,15,58,68,221,0 movups 16(%edx),%xmm2 nop subl $32,%ebx jbe L009even_tail jmp L010mod_loop .align 5,0x90 L010mod_loop: pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 pxor %xmm0,%xmm4 nop .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,229,16 movups (%edx),%xmm2 xorps %xmm6,%xmm0 movdqa (%ecx),%xmm5 xorps %xmm7,%xmm1 movdqu (%esi),%xmm7 pxor %xmm0,%xmm3 movdqu 16(%esi),%xmm6 pxor %xmm1,%xmm3 .byte 102,15,56,0,253 pxor %xmm3,%xmm4 movdqa %xmm4,%xmm3 psrldq $8,%xmm4 pslldq $8,%xmm3 pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 .byte 102,15,56,0,245 pxor %xmm7,%xmm1 movdqa %xmm6,%xmm7 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 .byte 102,15,58,68,242,0 movups 32(%edx),%xmm5 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 pshufd $78,%xmm7,%xmm3 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm7,%xmm3 pxor %xmm4,%xmm1 .byte 102,15,58,68,250,17 movups 16(%edx),%xmm2 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,58,68,221,0 leal 32(%esi),%esi subl $32,%ebx ja L010mod_loop L009even_tail: pshufd $78,%xmm0,%xmm4 movdqa %xmm0,%xmm1 pxor %xmm0,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,229,16 movdqa (%ecx),%xmm5 xorps %xmm6,%xmm0 xorps %xmm7,%xmm1 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 pxor %xmm3,%xmm4 movdqa %xmm4,%xmm3 psrldq $8,%xmm4 pslldq $8,%xmm3 pxor %xmm4,%xmm1 pxor %xmm3,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 testl %ebx,%ebx jnz L011done movups (%edx),%xmm2 L008odd_tail: movdqu (%esi),%xmm3 .byte 102,15,56,0,221 pxor %xmm3,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pshufd $78,%xmm2,%xmm4 pxor %xmm0,%xmm3 pxor %xmm2,%xmm4 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 xorps %xmm0,%xmm3 xorps %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 L011done: .byte 102,15,56,0,197 movdqu %xmm0,(%eax) popl %edi popl %esi popl %ebx popl %ebp ret .align 6,0x90 Lbswap: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 .align 6,0x90 Lrem_8bit: .value 0,450,900,582,1800,1738,1164,1358 .value 3600,4050,3476,3158,2328,2266,2716,2910 .value 7200,7650,8100,7782,6952,6890,6316,6510 .value 4656,5106,4532,4214,5432,5370,5820,6014 .value 14400,14722,15300,14854,16200,16010,15564,15630 .value 13904,14226,13780,13334,12632,12442,13020,13086 .value 9312,9634,10212,9766,9064,8874,8428,8494 .value 10864,11186,10740,10294,11640,11450,12028,12094 .value 28800,28994,29444,29382,30600,30282,29708,30158 .value 32400,32594,32020,31958,31128,30810,31260,31710 .value 27808,28002,28452,28390,27560,27242,26668,27118 .value 25264,25458,24884,24822,26040,25722,26172,26622 .value 18624,18690,19268,19078,20424,19978,19532,19854 .value 18128,18194,17748,17558,16856,16410,16988,17310 .value 21728,21794,22372,22182,21480,21034,20588,20910 .value 23280,23346,22900,22710,24056,23610,24188,24510 .value 57600,57538,57988,58182,58888,59338,58764,58446 .value 61200,61138,60564,60758,59416,59866,60316,59998 .value 64800,64738,65188,65382,64040,64490,63916,63598 .value 62256,62194,61620,61814,62520,62970,63420,63102 .value 55616,55426,56004,56070,56904,57226,56780,56334 .value 55120,54930,54484,54550,53336,53658,54236,53790 .value 50528,50338,50916,50982,49768,50090,49644,49198 .value 52080,51890,51444,51510,52344,52666,53244,52798 .value 37248,36930,37380,37830,38536,38730,38156,38094 .value 40848,40530,39956,40406,39064,39258,39708,39646 .value 36256,35938,36388,36838,35496,35690,35116,35054 .value 33712,33394,32820,33270,33976,34170,34620,34558 .value 43456,43010,43588,43910,44744,44810,44364,44174 .value 42960,42514,42068,42390,41176,41242,41820,41630 .value 46560,46114,46692,47014,45800,45866,45420,45230 .value 48112,47666,47220,47542,48376,48442,49020,48830 .align 6,0x90 Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 .byte 0 #endif