#!/usr/bin/env perl # ==================================================================== # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. # ==================================================================== # SHA1 block procedure for Alpha. # On 21264 performance is 33% better than code generated by vendor # compiler, and 75% better than GCC [3.4], and in absolute terms is # 8.7 cycles per processed byte. Implementation features vectorized # byte swap, but not Xupdate. @X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", "\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); $ctx="a0"; # $16 $inp="a1"; $num="a2"; $A="a3"; $B="a4"; # 20 $C="a5"; $D="t8"; $E="t9"; @V=($A,$B,$C,$D,$E); $t0="t10"; # 24 $t1="t11"; $t2="ra"; $t3="t12"; $K="AT"; # 28 sub BODY_00_19 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i==0); ldq_u @X[0],0+0($inp) ldq_u @X[1],0+7($inp) ___ $code.=<<___ if (!($i&1) && $i<14); ldq_u @X[$i+2],($i+2)*4+0($inp) ldq_u @X[$i+3],($i+2)*4+7($inp) ___ $code.=<<___ if (!($i&1) && $i<15); extql @X[$i],$inp,@X[$i] extqh @X[$i+1],$inp,@X[$i+1] or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched srl @X[$i],24,$t0 # vectorized byte swap srl @X[$i],8,$t2 sll @X[$i],8,$t3 sll @X[$i],24,@X[$i] zapnot $t0,0x11,$t0 zapnot $t2,0x22,$t2 zapnot @X[$i],0x88,@X[$i] or $t0,$t2,$t0 zapnot $t3,0x44,$t3 sll $a,5,$t1 or @X[$i],$t0,@X[$i] addl $K,$e,$e and $b,$c,$t2 zapnot $a,0xf,$a or @X[$i],$t3,@X[$i] srl $a,27,$t0 bic $d,$b,$t3 sll $b,30,$b extll @X[$i],4,@X[$i+1] # extract upper half or $t2,$t3,$t2 addl @X[$i],$e,$e addl $t1,$e,$e srl $b,32,$t3 zapnot @X[$i],0xf,@X[$i] addl $t0,$e,$e addl $t2,$e,$e or $t3,$b,$b ___ $code.=<<___ if (($i&1) && $i<15); sll $a,5,$t1 addl $K,$e,$e and $b,$c,$t2 zapnot $a,0xf,$a srl $a,27,$t0 addl @X[$i%16],$e,$e bic $d,$b,$t3 sll $b,30,$b or $t2,$t3,$t2 addl $t1,$e,$e srl $b,32,$t3 zapnot @X[$i],0xf,@X[$i] addl $t0,$e,$e addl $t2,$e,$e or $t3,$b,$b ___ $code.=<<___ if ($i>=15); # with forward Xupdate sll $a,5,$t1 addl $K,$e,$e and $b,$c,$t2 xor @X[($j+2)%16],@X[$j%16],@X[$j%16] zapnot $a,0xf,$a addl @X[$i%16],$e,$e bic $d,$b,$t3 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] srl $a,27,$t0 addl $t1,$e,$e or $t2,$t3,$t2 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] sll $b,30,$b addl $t0,$e,$e srl @X[$j%16],31,$t1 addl $t2,$e,$e srl $b,32,$t3 addl @X[$j%16],@X[$j%16],@X[$j%16] or $t3,$b,$b zapnot @X[$i%16],0xf,@X[$i%16] or $t1,@X[$j%16],@X[$j%16] ___ } sub BODY_20_39 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___ if ($i<79); # with forward Xupdate sll $a,5,$t1 addl $K,$e,$e zapnot $a,0xf,$a xor @X[($j+2)%16],@X[$j%16],@X[$j%16] sll $b,30,$t3 addl $t1,$e,$e xor $b,$c,$t2 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] srl $b,2,$b addl @X[$i%16],$e,$e xor $d,$t2,$t2 xor @X[($j+13)%16],@X[$j%16],@X[$j%16] srl @X[$j%16],31,$t1 addl $t2,$e,$e srl $a,27,$t0 addl @X[$j%16],@X[$j%16],@X[$j%16] or $t3,$b,$b addl $t0,$e,$e or $t1,@X[$j%16],@X[$j%16] ___ $code.=<<___ if ($i<77); zapnot @X[$i%16],0xf,@X[$i%16] ___ $code.=<<___ if ($i==79); # with context fetch sll $a,5,$t1 addl $K,$e,$e zapnot $a,0xf,$a ldl @X[0],0($ctx) sll $b,30,$t3 addl $t1,$e,$e xor $b,$c,$t2 ldl @X[1],4($ctx) srl $b,2,$b addl @X[$i%16],$e,$e xor $d,$t2,$t2 ldl @X[2],8($ctx) srl $a,27,$t0 addl $t2,$e,$e ldl @X[3],12($ctx) or $t3,$b,$b addl $t0,$e,$e ldl @X[4],16($ctx) ___ } sub BODY_40_59 { my ($i,$a,$b,$c,$d,$e)=@_; my $j=$i+1; $code.=<<___; # with forward Xupdate sll $a,5,$t1 addl $K,$e,$e zapnot $a,0xf,$a xor @X[($j+2)%16],@X[$j%16],@X[$j%16] srl $a,27,$t0 and $b,$c,$t2 and $b,$d,$t3 xor @X[($j+8)%16],@X[$j%16],@X[$j%16] sll $b,30,$b addl $t1,$e,$e xor @X[($j+13)%16],@X[$j%16],@X[$j%16] srl @X[$j%16],31,$t1 addl $t0,$e,$e or $t2,$t3,$t2 and $c,$d,$t3 or $t2,$t3,$t2 srl $b,32,$t3 addl @X[$i%16],$e,$e addl @X[$j%16],@X[$j%16],@X[$j%16] or $t3,$b,$b addl $t2,$e,$e or $t1,@X[$j%16],@X[$j%16] zapnot @X[$i%16],0xf,@X[$i%16] ___ } $code=<<___; #ifdef __linux__ #include <asm/regdef.h> #else #include <asm.h> #include <regdef.h> #endif .text .set noat .set noreorder .globl sha1_block_data_order .align 5 .ent sha1_block_data_order sha1_block_data_order: lda sp,-64(sp) stq ra,0(sp) stq s0,8(sp) stq s1,16(sp) stq s2,24(sp) stq s3,32(sp) stq s4,40(sp) stq s5,48(sp) stq fp,56(sp) .mask 0x0400fe00,-64 .frame sp,64,ra .prologue 0 ldl $A,0($ctx) ldl $B,4($ctx) sll $num,6,$num ldl $C,8($ctx) ldl $D,12($ctx) ldl $E,16($ctx) addq $inp,$num,$num .Lloop: .set noreorder ldah $K,23170(zero) zapnot $B,0xf,$B lda $K,31129($K) # K_00_19 ___ for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } $code.=<<___; ldah $K,28378(zero) lda $K,-5215($K) # K_20_39 ___ for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; ldah $K,-28900(zero) lda $K,-17188($K) # K_40_59 ___ for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } $code.=<<___; ldah $K,-13725(zero) lda $K,-15914($K) # K_60_79 ___ for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; addl @X[0],$A,$A addl @X[1],$B,$B addl @X[2],$C,$C addl @X[3],$D,$D addl @X[4],$E,$E stl $A,0($ctx) stl $B,4($ctx) addq $inp,64,$inp stl $C,8($ctx) stl $D,12($ctx) stl $E,16($ctx) cmpult $inp,$num,$t1 bne $t1,.Lloop .set noreorder ldq ra,0(sp) ldq s0,8(sp) ldq s1,16(sp) ldq s2,24(sp) ldq s3,32(sp) ldq s4,40(sp) ldq s5,48(sp) ldq fp,56(sp) lda sp,64(sp) ret (ra) .end sha1_block_data_order .ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" .align 2 ___ $output=shift and open STDOUT,">$output"; print $code; close STDOUT;