default rel %define XMMWORD %define YMMWORD %define ZMMWORD section .text code align=64 ALIGN 32 _aesni_ctr32_ghash_6x: vmovdqu xmm2,XMMWORD[32+r11] sub rdx,6 vpxor xmm4,xmm4,xmm4 vmovdqu xmm15,XMMWORD[((0-128))+rcx] vpaddb xmm10,xmm1,xmm2 vpaddb xmm11,xmm10,xmm2 vpaddb xmm12,xmm11,xmm2 vpaddb xmm13,xmm12,xmm2 vpaddb xmm14,xmm13,xmm2 vpxor xmm9,xmm1,xmm15 vmovdqu XMMWORD[(16+8)+rsp],xmm4 jmp NEAR $L$oop6x ALIGN 32 $L$oop6x: add ebx,100663296 jc NEAR $L$handle_ctr32 vmovdqu xmm3,XMMWORD[((0-32))+r9] vpaddb xmm1,xmm14,xmm2 vpxor xmm10,xmm10,xmm15 vpxor xmm11,xmm11,xmm15 $L$resume_ctr32: vmovdqu XMMWORD[r8],xmm1 vpclmulqdq xmm5,xmm7,xmm3,0x10 vpxor xmm12,xmm12,xmm15 vmovups xmm2,XMMWORD[((16-128))+rcx] vpclmulqdq xmm6,xmm7,xmm3,0x01 xor r12,r12 cmp r15,r14 vaesenc xmm9,xmm9,xmm2 vmovdqu xmm0,XMMWORD[((48+8))+rsp] vpxor xmm13,xmm13,xmm15 vpclmulqdq xmm1,xmm7,xmm3,0x00 vaesenc xmm10,xmm10,xmm2 vpxor xmm14,xmm14,xmm15 setnc r12b vpclmulqdq xmm7,xmm7,xmm3,0x11 vaesenc xmm11,xmm11,xmm2 vmovdqu xmm3,XMMWORD[((16-32))+r9] neg r12 vaesenc xmm12,xmm12,xmm2 vpxor xmm6,xmm6,xmm5 vpclmulqdq xmm5,xmm0,xmm3,0x00 vpxor xmm8,xmm8,xmm4 vaesenc xmm13,xmm13,xmm2 vpxor xmm4,xmm1,xmm5 and r12,0x60 vmovups xmm15,XMMWORD[((32-128))+rcx] vpclmulqdq xmm1,xmm0,xmm3,0x10 vaesenc xmm14,xmm14,xmm2 vpclmulqdq xmm2,xmm0,xmm3,0x01 lea r14,[r12*1+r14] vaesenc xmm9,xmm9,xmm15 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] vpclmulqdq xmm3,xmm0,xmm3,0x11 vmovdqu xmm0,XMMWORD[((64+8))+rsp] vaesenc xmm10,xmm10,xmm15 movbe r13,QWORD[88+r14] vaesenc xmm11,xmm11,xmm15 movbe r12,QWORD[80+r14] vaesenc xmm12,xmm12,xmm15 mov QWORD[((32+8))+rsp],r13 vaesenc xmm13,xmm13,xmm15 mov QWORD[((40+8))+rsp],r12 vmovdqu xmm5,XMMWORD[((48-32))+r9] vaesenc xmm14,xmm14,xmm15 vmovups xmm15,XMMWORD[((48-128))+rcx] vpxor xmm6,xmm6,xmm1 vpclmulqdq xmm1,xmm0,xmm5,0x00 vaesenc xmm9,xmm9,xmm15 vpxor xmm6,xmm6,xmm2 vpclmulqdq xmm2,xmm0,xmm5,0x10 vaesenc xmm10,xmm10,xmm15 vpxor xmm7,xmm7,xmm3 vpclmulqdq xmm3,xmm0,xmm5,0x01 vaesenc xmm11,xmm11,xmm15 vpclmulqdq xmm5,xmm0,xmm5,0x11 vmovdqu xmm0,XMMWORD[((80+8))+rsp] vaesenc xmm12,xmm12,xmm15 vaesenc xmm13,xmm13,xmm15 vpxor xmm4,xmm4,xmm1 vmovdqu xmm1,XMMWORD[((64-32))+r9] vaesenc xmm14,xmm14,xmm15 vmovups xmm15,XMMWORD[((64-128))+rcx] vpxor xmm6,xmm6,xmm2 vpclmulqdq xmm2,xmm0,xmm1,0x00 vaesenc xmm9,xmm9,xmm15 vpxor xmm6,xmm6,xmm3 vpclmulqdq xmm3,xmm0,xmm1,0x10 vaesenc xmm10,xmm10,xmm15 movbe r13,QWORD[72+r14] vpxor xmm7,xmm7,xmm5 vpclmulqdq xmm5,xmm0,xmm1,0x01 vaesenc xmm11,xmm11,xmm15 movbe r12,QWORD[64+r14] vpclmulqdq xmm1,xmm0,xmm1,0x11 vmovdqu xmm0,XMMWORD[((96+8))+rsp] vaesenc xmm12,xmm12,xmm15 mov QWORD[((48+8))+rsp],r13 vaesenc xmm13,xmm13,xmm15 mov QWORD[((56+8))+rsp],r12 vpxor xmm4,xmm4,xmm2 vmovdqu xmm2,XMMWORD[((96-32))+r9] vaesenc xmm14,xmm14,xmm15 vmovups xmm15,XMMWORD[((80-128))+rcx] vpxor xmm6,xmm6,xmm3 vpclmulqdq xmm3,xmm0,xmm2,0x00 vaesenc xmm9,xmm9,xmm15 vpxor xmm6,xmm6,xmm5 vpclmulqdq xmm5,xmm0,xmm2,0x10 vaesenc xmm10,xmm10,xmm15 movbe r13,QWORD[56+r14] vpxor xmm7,xmm7,xmm1 vpclmulqdq xmm1,xmm0,xmm2,0x01 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] vaesenc xmm11,xmm11,xmm15 movbe r12,QWORD[48+r14] vpclmulqdq xmm2,xmm0,xmm2,0x11 vaesenc xmm12,xmm12,xmm15 mov QWORD[((64+8))+rsp],r13 vaesenc xmm13,xmm13,xmm15 mov QWORD[((72+8))+rsp],r12 vpxor xmm4,xmm4,xmm3 vmovdqu xmm3,XMMWORD[((112-32))+r9] vaesenc xmm14,xmm14,xmm15 vmovups xmm15,XMMWORD[((96-128))+rcx] vpxor xmm6,xmm6,xmm5 vpclmulqdq xmm5,xmm8,xmm3,0x10 vaesenc xmm9,xmm9,xmm15 vpxor xmm6,xmm6,xmm1 vpclmulqdq xmm1,xmm8,xmm3,0x01 vaesenc xmm10,xmm10,xmm15 movbe r13,QWORD[40+r14] vpxor xmm7,xmm7,xmm2 vpclmulqdq xmm2,xmm8,xmm3,0x00 vaesenc xmm11,xmm11,xmm15 movbe r12,QWORD[32+r14] vpclmulqdq xmm8,xmm8,xmm3,0x11 vaesenc xmm12,xmm12,xmm15 mov QWORD[((80+8))+rsp],r13 vaesenc xmm13,xmm13,xmm15 mov QWORD[((88+8))+rsp],r12 vpxor xmm6,xmm6,xmm5 vaesenc xmm14,xmm14,xmm15 vpxor xmm6,xmm6,xmm1 vmovups xmm15,XMMWORD[((112-128))+rcx] vpslldq xmm5,xmm6,8 vpxor xmm4,xmm4,xmm2 vmovdqu xmm3,XMMWORD[16+r11] vaesenc xmm9,xmm9,xmm15 vpxor xmm7,xmm7,xmm8 vaesenc xmm10,xmm10,xmm15 vpxor xmm4,xmm4,xmm5 movbe r13,QWORD[24+r14] vaesenc xmm11,xmm11,xmm15 movbe r12,QWORD[16+r14] vpalignr xmm0,xmm4,xmm4,8 vpclmulqdq xmm4,xmm4,xmm3,0x10 mov QWORD[((96+8))+rsp],r13 vaesenc xmm12,xmm12,xmm15 mov QWORD[((104+8))+rsp],r12 vaesenc xmm13,xmm13,xmm15 vmovups xmm1,XMMWORD[((128-128))+rcx] vaesenc xmm14,xmm14,xmm15 vaesenc xmm9,xmm9,xmm1 vmovups xmm15,XMMWORD[((144-128))+rcx] vaesenc xmm10,xmm10,xmm1 vpsrldq xmm6,xmm6,8 vaesenc xmm11,xmm11,xmm1 vpxor xmm7,xmm7,xmm6 vaesenc xmm12,xmm12,xmm1 vpxor xmm4,xmm4,xmm0 movbe r13,QWORD[8+r14] vaesenc xmm13,xmm13,xmm1 movbe r12,QWORD[r14] vaesenc xmm14,xmm14,xmm1 vmovups xmm1,XMMWORD[((160-128))+rcx] cmp ebp,11 jb NEAR $L$enc_tail vaesenc xmm9,xmm9,xmm15 vaesenc xmm10,xmm10,xmm15 vaesenc xmm11,xmm11,xmm15 vaesenc xmm12,xmm12,xmm15 vaesenc xmm13,xmm13,xmm15 vaesenc xmm14,xmm14,xmm15 vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovups xmm15,XMMWORD[((176-128))+rcx] vaesenc xmm14,xmm14,xmm1 vmovups xmm1,XMMWORD[((192-128))+rcx] je NEAR $L$enc_tail vaesenc xmm9,xmm9,xmm15 vaesenc xmm10,xmm10,xmm15 vaesenc xmm11,xmm11,xmm15 vaesenc xmm12,xmm12,xmm15 vaesenc xmm13,xmm13,xmm15 vaesenc xmm14,xmm14,xmm15 vaesenc xmm9,xmm9,xmm1 vaesenc xmm10,xmm10,xmm1 vaesenc xmm11,xmm11,xmm1 vaesenc xmm12,xmm12,xmm1 vaesenc xmm13,xmm13,xmm1 vmovups xmm15,XMMWORD[((208-128))+rcx] vaesenc xmm14,xmm14,xmm1 vmovups xmm1,XMMWORD[((224-128))+rcx] jmp NEAR $L$enc_tail ALIGN 32 $L$handle_ctr32: vmovdqu xmm0,XMMWORD[r11] vpshufb xmm6,xmm1,xmm0 vmovdqu xmm5,XMMWORD[48+r11] vpaddd xmm10,xmm6,XMMWORD[64+r11] vpaddd xmm11,xmm6,xmm5 vmovdqu xmm3,XMMWORD[((0-32))+r9] vpaddd xmm12,xmm10,xmm5 vpshufb xmm10,xmm10,xmm0 vpaddd xmm13,xmm11,xmm5 vpshufb xmm11,xmm11,xmm0 vpxor xmm10,xmm10,xmm15 vpaddd xmm14,xmm12,xmm5 vpshufb xmm12,xmm12,xmm0 vpxor xmm11,xmm11,xmm15 vpaddd xmm1,xmm13,xmm5 vpshufb xmm13,xmm13,xmm0 vpshufb xmm14,xmm14,xmm0 vpshufb xmm1,xmm1,xmm0 jmp NEAR $L$resume_ctr32 ALIGN 32 $L$enc_tail: vaesenc xmm9,xmm9,xmm15 vmovdqu XMMWORD[(16+8)+rsp],xmm7 vpalignr xmm8,xmm4,xmm4,8 vaesenc xmm10,xmm10,xmm15 vpclmulqdq xmm4,xmm4,xmm3,0x10 vpxor xmm2,xmm1,XMMWORD[rdi] vaesenc xmm11,xmm11,xmm15 vpxor xmm0,xmm1,XMMWORD[16+rdi] vaesenc xmm12,xmm12,xmm15 vpxor xmm5,xmm1,XMMWORD[32+rdi] vaesenc xmm13,xmm13,xmm15 vpxor xmm6,xmm1,XMMWORD[48+rdi] vaesenc xmm14,xmm14,xmm15 vpxor xmm7,xmm1,XMMWORD[64+rdi] vpxor xmm3,xmm1,XMMWORD[80+rdi] vmovdqu xmm1,XMMWORD[r8] vaesenclast xmm9,xmm9,xmm2 vmovdqu xmm2,XMMWORD[32+r11] vaesenclast xmm10,xmm10,xmm0 vpaddb xmm0,xmm1,xmm2 mov QWORD[((112+8))+rsp],r13 lea rdi,[96+rdi] vaesenclast xmm11,xmm11,xmm5 vpaddb xmm5,xmm0,xmm2 mov QWORD[((120+8))+rsp],r12 lea rsi,[96+rsi] vmovdqu xmm15,XMMWORD[((0-128))+rcx] vaesenclast xmm12,xmm12,xmm6 vpaddb xmm6,xmm5,xmm2 vaesenclast xmm13,xmm13,xmm7 vpaddb xmm7,xmm6,xmm2 vaesenclast xmm14,xmm14,xmm3 vpaddb xmm3,xmm7,xmm2 add r10,0x60 sub rdx,0x6 jc NEAR $L$6x_done vmovups XMMWORD[(-96)+rsi],xmm9 vpxor xmm9,xmm1,xmm15 vmovups XMMWORD[(-80)+rsi],xmm10 vmovdqa xmm10,xmm0 vmovups XMMWORD[(-64)+rsi],xmm11 vmovdqa xmm11,xmm5 vmovups XMMWORD[(-48)+rsi],xmm12 vmovdqa xmm12,xmm6 vmovups XMMWORD[(-32)+rsi],xmm13 vmovdqa xmm13,xmm7 vmovups XMMWORD[(-16)+rsi],xmm14 vmovdqa xmm14,xmm3 vmovdqu xmm7,XMMWORD[((32+8))+rsp] jmp NEAR $L$oop6x $L$6x_done: vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] vpxor xmm8,xmm8,xmm4 DB 0F3h,0C3h ;repret global aesni_gcm_decrypt ALIGN 32 aesni_gcm_decrypt: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_gcm_decrypt: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] xor r10,r10 cmp rdx,0x60 jb NEAR $L$gcm_dec_abort lea rax,[rsp] push rbx push rbp push r12 push r13 push r14 push r15 lea rsp,[((-168))+rsp] movaps XMMWORD[(-216)+rax],xmm6 movaps XMMWORD[(-200)+rax],xmm7 movaps XMMWORD[(-184)+rax],xmm8 movaps XMMWORD[(-168)+rax],xmm9 movaps XMMWORD[(-152)+rax],xmm10 movaps XMMWORD[(-136)+rax],xmm11 movaps XMMWORD[(-120)+rax],xmm12 movaps XMMWORD[(-104)+rax],xmm13 movaps XMMWORD[(-88)+rax],xmm14 movaps XMMWORD[(-72)+rax],xmm15 $L$gcm_dec_body: vzeroupper vmovdqu xmm1,XMMWORD[r8] add rsp,-128 mov ebx,DWORD[12+r8] lea r11,[$L$bswap_mask] lea r14,[((-128))+rcx] mov r15,0xf80 vmovdqu xmm8,XMMWORD[r9] and rsp,-128 vmovdqu xmm0,XMMWORD[r11] lea rcx,[128+rcx] lea r9,[((32+32))+r9] mov ebp,DWORD[((240-128))+rcx] vpshufb xmm8,xmm8,xmm0 and r14,r15 and r15,rsp sub r15,r14 jc NEAR $L$dec_no_key_aliasing cmp r15,768 jnc NEAR $L$dec_no_key_aliasing sub rsp,r15 $L$dec_no_key_aliasing: vmovdqu xmm7,XMMWORD[80+rdi] lea r14,[rdi] vmovdqu xmm4,XMMWORD[64+rdi] lea r15,[((-192))+rdx*1+rdi] vmovdqu xmm5,XMMWORD[48+rdi] shr rdx,4 xor r10,r10 vmovdqu xmm6,XMMWORD[32+rdi] vpshufb xmm7,xmm7,xmm0 vmovdqu xmm2,XMMWORD[16+rdi] vpshufb xmm4,xmm4,xmm0 vmovdqu xmm3,XMMWORD[rdi] vpshufb xmm5,xmm5,xmm0 vmovdqu XMMWORD[48+rsp],xmm4 vpshufb xmm6,xmm6,xmm0 vmovdqu XMMWORD[64+rsp],xmm5 vpshufb xmm2,xmm2,xmm0 vmovdqu XMMWORD[80+rsp],xmm6 vpshufb xmm3,xmm3,xmm0 vmovdqu XMMWORD[96+rsp],xmm2 vmovdqu XMMWORD[112+rsp],xmm3 call _aesni_ctr32_ghash_6x vmovups XMMWORD[(-96)+rsi],xmm9 vmovups XMMWORD[(-80)+rsi],xmm10 vmovups XMMWORD[(-64)+rsi],xmm11 vmovups XMMWORD[(-48)+rsi],xmm12 vmovups XMMWORD[(-32)+rsi],xmm13 vmovups XMMWORD[(-16)+rsi],xmm14 vpshufb xmm8,xmm8,XMMWORD[r11] vmovdqu XMMWORD[(-64)+r9],xmm8 vzeroupper movaps xmm6,XMMWORD[((-216))+rax] movaps xmm7,XMMWORD[((-200))+rax] movaps xmm8,XMMWORD[((-184))+rax] movaps xmm9,XMMWORD[((-168))+rax] movaps xmm10,XMMWORD[((-152))+rax] movaps xmm11,XMMWORD[((-136))+rax] movaps xmm12,XMMWORD[((-120))+rax] movaps xmm13,XMMWORD[((-104))+rax] movaps xmm14,XMMWORD[((-88))+rax] movaps xmm15,XMMWORD[((-72))+rax] mov r15,QWORD[((-48))+rax] mov r14,QWORD[((-40))+rax] mov r13,QWORD[((-32))+rax] mov r12,QWORD[((-24))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$gcm_dec_abort: mov rax,r10 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_gcm_decrypt: ALIGN 32 _aesni_ctr32_6x: vmovdqu xmm4,XMMWORD[((0-128))+rcx] vmovdqu xmm2,XMMWORD[32+r11] lea r13,[((-1))+rbp] vmovups xmm15,XMMWORD[((16-128))+rcx] lea r12,[((32-128))+rcx] vpxor xmm9,xmm1,xmm4 add ebx,100663296 jc NEAR $L$handle_ctr32_2 vpaddb xmm10,xmm1,xmm2 vpaddb xmm11,xmm10,xmm2 vpxor xmm10,xmm10,xmm4 vpaddb xmm12,xmm11,xmm2 vpxor xmm11,xmm11,xmm4 vpaddb xmm13,xmm12,xmm2 vpxor xmm12,xmm12,xmm4 vpaddb xmm14,xmm13,xmm2 vpxor xmm13,xmm13,xmm4 vpaddb xmm1,xmm14,xmm2 vpxor xmm14,xmm14,xmm4 jmp NEAR $L$oop_ctr32 ALIGN 16 $L$oop_ctr32: vaesenc xmm9,xmm9,xmm15 vaesenc xmm10,xmm10,xmm15 vaesenc xmm11,xmm11,xmm15 vaesenc xmm12,xmm12,xmm15 vaesenc xmm13,xmm13,xmm15 vaesenc xmm14,xmm14,xmm15 vmovups xmm15,XMMWORD[r12] lea r12,[16+r12] dec r13d jnz NEAR $L$oop_ctr32 vmovdqu xmm3,XMMWORD[r12] vaesenc xmm9,xmm9,xmm15 vpxor xmm4,xmm3,XMMWORD[rdi] vaesenc xmm10,xmm10,xmm15 vpxor xmm5,xmm3,XMMWORD[16+rdi] vaesenc xmm11,xmm11,xmm15 vpxor xmm6,xmm3,XMMWORD[32+rdi] vaesenc xmm12,xmm12,xmm15 vpxor xmm8,xmm3,XMMWORD[48+rdi] vaesenc xmm13,xmm13,xmm15 vpxor xmm2,xmm3,XMMWORD[64+rdi] vaesenc xmm14,xmm14,xmm15 vpxor xmm3,xmm3,XMMWORD[80+rdi] lea rdi,[96+rdi] vaesenclast xmm9,xmm9,xmm4 vaesenclast xmm10,xmm10,xmm5 vaesenclast xmm11,xmm11,xmm6 vaesenclast xmm12,xmm12,xmm8 vaesenclast xmm13,xmm13,xmm2 vaesenclast xmm14,xmm14,xmm3 vmovups XMMWORD[rsi],xmm9 vmovups XMMWORD[16+rsi],xmm10 vmovups XMMWORD[32+rsi],xmm11 vmovups XMMWORD[48+rsi],xmm12 vmovups XMMWORD[64+rsi],xmm13 vmovups XMMWORD[80+rsi],xmm14 lea rsi,[96+rsi] DB 0F3h,0C3h ;repret ALIGN 32 $L$handle_ctr32_2: vpshufb xmm6,xmm1,xmm0 vmovdqu xmm5,XMMWORD[48+r11] vpaddd xmm10,xmm6,XMMWORD[64+r11] vpaddd xmm11,xmm6,xmm5 vpaddd xmm12,xmm10,xmm5 vpshufb xmm10,xmm10,xmm0 vpaddd xmm13,xmm11,xmm5 vpshufb xmm11,xmm11,xmm0 vpxor xmm10,xmm10,xmm4 vpaddd xmm14,xmm12,xmm5 vpshufb xmm12,xmm12,xmm0 vpxor xmm11,xmm11,xmm4 vpaddd xmm1,xmm13,xmm5 vpshufb xmm13,xmm13,xmm0 vpxor xmm12,xmm12,xmm4 vpshufb xmm14,xmm14,xmm0 vpxor xmm13,xmm13,xmm4 vpshufb xmm1,xmm1,xmm0 vpxor xmm14,xmm14,xmm4 jmp NEAR $L$oop_ctr32 global aesni_gcm_encrypt ALIGN 32 aesni_gcm_encrypt: mov QWORD[8+rsp],rdi ;WIN64 prologue mov QWORD[16+rsp],rsi mov rax,rsp $L$SEH_begin_aesni_gcm_encrypt: mov rdi,rcx mov rsi,rdx mov rdx,r8 mov rcx,r9 mov r8,QWORD[40+rsp] mov r9,QWORD[48+rsp] xor r10,r10 cmp rdx,0x60*3 jb NEAR $L$gcm_enc_abort lea rax,[rsp] push rbx push rbp push r12 push r13 push r14 push r15 lea rsp,[((-168))+rsp] movaps XMMWORD[(-216)+rax],xmm6 movaps XMMWORD[(-200)+rax],xmm7 movaps XMMWORD[(-184)+rax],xmm8 movaps XMMWORD[(-168)+rax],xmm9 movaps XMMWORD[(-152)+rax],xmm10 movaps XMMWORD[(-136)+rax],xmm11 movaps XMMWORD[(-120)+rax],xmm12 movaps XMMWORD[(-104)+rax],xmm13 movaps XMMWORD[(-88)+rax],xmm14 movaps XMMWORD[(-72)+rax],xmm15 $L$gcm_enc_body: vzeroupper vmovdqu xmm1,XMMWORD[r8] add rsp,-128 mov ebx,DWORD[12+r8] lea r11,[$L$bswap_mask] lea r14,[((-128))+rcx] mov r15,0xf80 lea rcx,[128+rcx] vmovdqu xmm0,XMMWORD[r11] and rsp,-128 mov ebp,DWORD[((240-128))+rcx] and r14,r15 and r15,rsp sub r15,r14 jc NEAR $L$enc_no_key_aliasing cmp r15,768 jnc NEAR $L$enc_no_key_aliasing sub rsp,r15 $L$enc_no_key_aliasing: lea r14,[rsi] lea r15,[((-192))+rdx*1+rsi] shr rdx,4 call _aesni_ctr32_6x vpshufb xmm8,xmm9,xmm0 vpshufb xmm2,xmm10,xmm0 vmovdqu XMMWORD[112+rsp],xmm8 vpshufb xmm4,xmm11,xmm0 vmovdqu XMMWORD[96+rsp],xmm2 vpshufb xmm5,xmm12,xmm0 vmovdqu XMMWORD[80+rsp],xmm4 vpshufb xmm6,xmm13,xmm0 vmovdqu XMMWORD[64+rsp],xmm5 vpshufb xmm7,xmm14,xmm0 vmovdqu XMMWORD[48+rsp],xmm6 call _aesni_ctr32_6x vmovdqu xmm8,XMMWORD[r9] lea r9,[((32+32))+r9] sub rdx,12 mov r10,0x60*2 vpshufb xmm8,xmm8,xmm0 call _aesni_ctr32_ghash_6x vmovdqu xmm7,XMMWORD[32+rsp] vmovdqu xmm0,XMMWORD[r11] vmovdqu xmm3,XMMWORD[((0-32))+r9] vpunpckhqdq xmm1,xmm7,xmm7 vmovdqu xmm15,XMMWORD[((32-32))+r9] vmovups XMMWORD[(-96)+rsi],xmm9 vpshufb xmm9,xmm9,xmm0 vpxor xmm1,xmm1,xmm7 vmovups XMMWORD[(-80)+rsi],xmm10 vpshufb xmm10,xmm10,xmm0 vmovups XMMWORD[(-64)+rsi],xmm11 vpshufb xmm11,xmm11,xmm0 vmovups XMMWORD[(-48)+rsi],xmm12 vpshufb xmm12,xmm12,xmm0 vmovups XMMWORD[(-32)+rsi],xmm13 vpshufb xmm13,xmm13,xmm0 vmovups XMMWORD[(-16)+rsi],xmm14 vpshufb xmm14,xmm14,xmm0 vmovdqu XMMWORD[16+rsp],xmm9 vmovdqu xmm6,XMMWORD[48+rsp] vmovdqu xmm0,XMMWORD[((16-32))+r9] vpunpckhqdq xmm2,xmm6,xmm6 vpclmulqdq xmm5,xmm7,xmm3,0x00 vpxor xmm2,xmm2,xmm6 vpclmulqdq xmm7,xmm7,xmm3,0x11 vpclmulqdq xmm1,xmm1,xmm15,0x00 vmovdqu xmm9,XMMWORD[64+rsp] vpclmulqdq xmm4,xmm6,xmm0,0x00 vmovdqu xmm3,XMMWORD[((48-32))+r9] vpxor xmm4,xmm4,xmm5 vpunpckhqdq xmm5,xmm9,xmm9 vpclmulqdq xmm6,xmm6,xmm0,0x11 vpxor xmm5,xmm5,xmm9 vpxor xmm6,xmm6,xmm7 vpclmulqdq xmm2,xmm2,xmm15,0x10 vmovdqu xmm15,XMMWORD[((80-32))+r9] vpxor xmm2,xmm2,xmm1 vmovdqu xmm1,XMMWORD[80+rsp] vpclmulqdq xmm7,xmm9,xmm3,0x00 vmovdqu xmm0,XMMWORD[((64-32))+r9] vpxor xmm7,xmm7,xmm4 vpunpckhqdq xmm4,xmm1,xmm1 vpclmulqdq xmm9,xmm9,xmm3,0x11 vpxor xmm4,xmm4,xmm1 vpxor xmm9,xmm9,xmm6 vpclmulqdq xmm5,xmm5,xmm15,0x00 vpxor xmm5,xmm5,xmm2 vmovdqu xmm2,XMMWORD[96+rsp] vpclmulqdq xmm6,xmm1,xmm0,0x00 vmovdqu xmm3,XMMWORD[((96-32))+r9] vpxor xmm6,xmm6,xmm7 vpunpckhqdq xmm7,xmm2,xmm2 vpclmulqdq xmm1,xmm1,xmm0,0x11 vpxor xmm7,xmm7,xmm2 vpxor xmm1,xmm1,xmm9 vpclmulqdq xmm4,xmm4,xmm15,0x10 vmovdqu xmm15,XMMWORD[((128-32))+r9] vpxor xmm4,xmm4,xmm5 vpxor xmm8,xmm8,XMMWORD[112+rsp] vpclmulqdq xmm5,xmm2,xmm3,0x00 vmovdqu xmm0,XMMWORD[((112-32))+r9] vpunpckhqdq xmm9,xmm8,xmm8 vpxor xmm5,xmm5,xmm6 vpclmulqdq xmm2,xmm2,xmm3,0x11 vpxor xmm9,xmm9,xmm8 vpxor xmm2,xmm2,xmm1 vpclmulqdq xmm7,xmm7,xmm15,0x00 vpxor xmm4,xmm7,xmm4 vpclmulqdq xmm6,xmm8,xmm0,0x00 vmovdqu xmm3,XMMWORD[((0-32))+r9] vpunpckhqdq xmm1,xmm14,xmm14 vpclmulqdq xmm8,xmm8,xmm0,0x11 vpxor xmm1,xmm1,xmm14 vpxor xmm5,xmm6,xmm5 vpclmulqdq xmm9,xmm9,xmm15,0x10 vmovdqu xmm15,XMMWORD[((32-32))+r9] vpxor xmm7,xmm8,xmm2 vpxor xmm6,xmm9,xmm4 vmovdqu xmm0,XMMWORD[((16-32))+r9] vpxor xmm9,xmm7,xmm5 vpclmulqdq xmm4,xmm14,xmm3,0x00 vpxor xmm6,xmm6,xmm9 vpunpckhqdq xmm2,xmm13,xmm13 vpclmulqdq xmm14,xmm14,xmm3,0x11 vpxor xmm2,xmm2,xmm13 vpslldq xmm9,xmm6,8 vpclmulqdq xmm1,xmm1,xmm15,0x00 vpxor xmm8,xmm5,xmm9 vpsrldq xmm6,xmm6,8 vpxor xmm7,xmm7,xmm6 vpclmulqdq xmm5,xmm13,xmm0,0x00 vmovdqu xmm3,XMMWORD[((48-32))+r9] vpxor xmm5,xmm5,xmm4 vpunpckhqdq xmm9,xmm12,xmm12 vpclmulqdq xmm13,xmm13,xmm0,0x11 vpxor xmm9,xmm9,xmm12 vpxor xmm13,xmm13,xmm14 vpalignr xmm14,xmm8,xmm8,8 vpclmulqdq xmm2,xmm2,xmm15,0x10 vmovdqu xmm15,XMMWORD[((80-32))+r9] vpxor xmm2,xmm2,xmm1 vpclmulqdq xmm4,xmm12,xmm3,0x00 vmovdqu xmm0,XMMWORD[((64-32))+r9] vpxor xmm4,xmm4,xmm5 vpunpckhqdq xmm1,xmm11,xmm11 vpclmulqdq xmm12,xmm12,xmm3,0x11 vpxor xmm1,xmm1,xmm11 vpxor xmm12,xmm12,xmm13 vxorps xmm7,xmm7,XMMWORD[16+rsp] vpclmulqdq xmm9,xmm9,xmm15,0x00 vpxor xmm9,xmm9,xmm2 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 vxorps xmm8,xmm8,xmm14 vpclmulqdq xmm5,xmm11,xmm0,0x00 vmovdqu xmm3,XMMWORD[((96-32))+r9] vpxor xmm5,xmm5,xmm4 vpunpckhqdq xmm2,xmm10,xmm10 vpclmulqdq xmm11,xmm11,xmm0,0x11 vpxor xmm2,xmm2,xmm10 vpalignr xmm14,xmm8,xmm8,8 vpxor xmm11,xmm11,xmm12 vpclmulqdq xmm1,xmm1,xmm15,0x10 vmovdqu xmm15,XMMWORD[((128-32))+r9] vpxor xmm1,xmm1,xmm9 vxorps xmm14,xmm14,xmm7 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 vxorps xmm8,xmm8,xmm14 vpclmulqdq xmm4,xmm10,xmm3,0x00 vmovdqu xmm0,XMMWORD[((112-32))+r9] vpxor xmm4,xmm4,xmm5 vpunpckhqdq xmm9,xmm8,xmm8 vpclmulqdq xmm10,xmm10,xmm3,0x11 vpxor xmm9,xmm9,xmm8 vpxor xmm10,xmm10,xmm11 vpclmulqdq xmm2,xmm2,xmm15,0x00 vpxor xmm2,xmm2,xmm1 vpclmulqdq xmm5,xmm8,xmm0,0x00 vpclmulqdq xmm7,xmm8,xmm0,0x11 vpxor xmm5,xmm5,xmm4 vpclmulqdq xmm6,xmm9,xmm15,0x10 vpxor xmm7,xmm7,xmm10 vpxor xmm6,xmm6,xmm2 vpxor xmm4,xmm7,xmm5 vpxor xmm6,xmm6,xmm4 vpslldq xmm1,xmm6,8 vmovdqu xmm3,XMMWORD[16+r11] vpsrldq xmm6,xmm6,8 vpxor xmm8,xmm5,xmm1 vpxor xmm7,xmm7,xmm6 vpalignr xmm2,xmm8,xmm8,8 vpclmulqdq xmm8,xmm8,xmm3,0x10 vpxor xmm8,xmm8,xmm2 vpalignr xmm2,xmm8,xmm8,8 vpclmulqdq xmm8,xmm8,xmm3,0x10 vpxor xmm2,xmm2,xmm7 vpxor xmm8,xmm8,xmm2 vpshufb xmm8,xmm8,XMMWORD[r11] vmovdqu XMMWORD[(-64)+r9],xmm8 vzeroupper movaps xmm6,XMMWORD[((-216))+rax] movaps xmm7,XMMWORD[((-200))+rax] movaps xmm8,XMMWORD[((-184))+rax] movaps xmm9,XMMWORD[((-168))+rax] movaps xmm10,XMMWORD[((-152))+rax] movaps xmm11,XMMWORD[((-136))+rax] movaps xmm12,XMMWORD[((-120))+rax] movaps xmm13,XMMWORD[((-104))+rax] movaps xmm14,XMMWORD[((-88))+rax] movaps xmm15,XMMWORD[((-72))+rax] mov r15,QWORD[((-48))+rax] mov r14,QWORD[((-40))+rax] mov r13,QWORD[((-32))+rax] mov r12,QWORD[((-24))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] lea rsp,[rax] $L$gcm_enc_abort: mov rax,r10 mov rdi,QWORD[8+rsp] ;WIN64 epilogue mov rsi,QWORD[16+rsp] DB 0F3h,0C3h ;repret $L$SEH_end_aesni_gcm_encrypt: ALIGN 64 $L$bswap_mask: DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 $L$poly: DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 $L$one_msb: DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 $L$two_lsb: DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 $L$one_lsb: DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ALIGN 64 EXTERN __imp_RtlVirtualUnwind ALIGN 16 gcm_se_handler: push rsi push rdi push rbx push rbp push r12 push r13 push r14 push r15 pushfq sub rsp,64 mov rax,QWORD[120+r8] mov rbx,QWORD[248+r8] mov rsi,QWORD[8+r9] mov r11,QWORD[56+r9] mov r10d,DWORD[r11] lea r10,[r10*1+rsi] cmp rbx,r10 jb NEAR $L$common_seh_tail mov rax,QWORD[152+r8] mov r10d,DWORD[4+r11] lea r10,[r10*1+rsi] cmp rbx,r10 jae NEAR $L$common_seh_tail mov rax,QWORD[120+r8] mov r15,QWORD[((-48))+rax] mov r14,QWORD[((-40))+rax] mov r13,QWORD[((-32))+rax] mov r12,QWORD[((-24))+rax] mov rbp,QWORD[((-16))+rax] mov rbx,QWORD[((-8))+rax] mov QWORD[240+r8],r15 mov QWORD[232+r8],r14 mov QWORD[224+r8],r13 mov QWORD[216+r8],r12 mov QWORD[160+r8],rbp mov QWORD[144+r8],rbx lea rsi,[((-216))+rax] lea rdi,[512+r8] mov ecx,20 DD 0xa548f3fc $L$common_seh_tail: mov rdi,QWORD[8+rax] mov rsi,QWORD[16+rax] mov QWORD[152+r8],rax mov QWORD[168+r8],rsi mov QWORD[176+r8],rdi mov rdi,QWORD[40+r9] mov rsi,r8 mov ecx,154 DD 0xa548f3fc mov rsi,r9 xor rcx,rcx mov rdx,QWORD[8+rsi] mov r8,QWORD[rsi] mov r9,QWORD[16+rsi] mov r10,QWORD[40+rsi] lea r11,[56+rsi] lea r12,[24+rsi] mov QWORD[32+rsp],r10 mov QWORD[40+rsp],r11 mov QWORD[48+rsp],r12 mov QWORD[56+rsp],rcx call QWORD[__imp_RtlVirtualUnwind] mov eax,1 add rsp,64 popfq pop r15 pop r14 pop r13 pop r12 pop rbp pop rbx pop rdi pop rsi DB 0F3h,0C3h ;repret section .pdata rdata align=4 ALIGN 4 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase DD $L$SEH_gcm_dec_info wrt ..imagebase DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase DD $L$SEH_gcm_enc_info wrt ..imagebase section .xdata rdata align=8 ALIGN 8 $L$SEH_gcm_dec_info: DB 9,0,0,0 DD gcm_se_handler wrt ..imagebase DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase $L$SEH_gcm_enc_info: DB 9,0,0,0 DD gcm_se_handler wrt ..imagebase DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase