12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490 |
- #include "arm_arch.h"
- .text
- #if defined(__thumb2__)
- .syntax unified
- .thumb
- #else
- .code 32
- #endif
- .global sha1_block_data_order
- .type sha1_block_data_order,%function
- .align 5
- sha1_block_data_order:
- #if __ARM_MAX_ARCH__>=7
- .Lsha1_block:
- adr r3,.Lsha1_block
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- #ifdef __APPLE__
- ldr r12,[r12]
- #endif
- tst r12,#ARMV8_SHA1
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
- #endif
- stmdb sp!,{r4-r12,lr}
- add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
- ldmia r0,{r3,r4,r5,r6,r7}
- .Lloop:
- ldr r8,.LK_00_19
- mov r14,sp
- sub sp,sp,#15*4
- mov r5,r5,ror#30
- mov r6,r6,ror#30
- mov r7,r7,ror#30 @ [6]
- .L_00_15:
- #if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r7,r8,r7,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r5,r6 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
- #else
- ldr r9,[r1],#4 @ handles unaligned
- add r7,r8,r7,ror#2 @ E+=K_00_19
- eor r10,r5,r6 @ F_xx_xx
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- #ifdef __ARMEL__
- rev r9,r9 @ byte swap
- #endif
- #endif
- and r10,r4,r10,ror#2
- add r7,r7,r9 @ E+=X[i]
- eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r7,r7,r10 @ E+=F_00_19(B,C,D)
- #if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r6,r8,r6,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r4,r5 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
- #else
- ldr r9,[r1],#4 @ handles unaligned
- add r6,r8,r6,ror#2 @ E+=K_00_19
- eor r10,r4,r5 @ F_xx_xx
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- #ifdef __ARMEL__
- rev r9,r9 @ byte swap
- #endif
- #endif
- and r10,r3,r10,ror#2
- add r6,r6,r9 @ E+=X[i]
- eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r6,r6,r10 @ E+=F_00_19(B,C,D)
- #if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r5,r8,r5,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r3,r4 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
- #else
- ldr r9,[r1],#4 @ handles unaligned
- add r5,r8,r5,ror#2 @ E+=K_00_19
- eor r10,r3,r4 @ F_xx_xx
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- #ifdef __ARMEL__
- rev r9,r9 @ byte swap
- #endif
- #endif
- and r10,r7,r10,ror#2
- add r5,r5,r9 @ E+=X[i]
- eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r5,r5,r10 @ E+=F_00_19(B,C,D)
- #if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r4,r8,r4,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r7,r3 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
- #else
- ldr r9,[r1],#4 @ handles unaligned
- add r4,r8,r4,ror#2 @ E+=K_00_19
- eor r10,r7,r3 @ F_xx_xx
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- #ifdef __ARMEL__
- rev r9,r9 @ byte swap
- #endif
- #endif
- and r10,r6,r10,ror#2
- add r4,r4,r9 @ E+=X[i]
- eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r4,r4,r10 @ E+=F_00_19(B,C,D)
- #if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r3,r8,r3,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r6,r7 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
- #else
- ldr r9,[r1],#4 @ handles unaligned
- add r3,r8,r3,ror#2 @ E+=K_00_19
- eor r10,r6,r7 @ F_xx_xx
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- #ifdef __ARMEL__
- rev r9,r9 @ byte swap
- #endif
- #endif
- and r10,r5,r10,ror#2
- add r3,r3,r9 @ E+=X[i]
- eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r3,r3,r10 @ E+=F_00_19(B,C,D)
- #if defined(__thumb2__)
- mov r12,sp
- teq r14,r12
- #else
- teq r14,sp
- #endif
- bne .L_00_15 @ [((11+4)*5+2)*3]
- sub sp,sp,#25*4
- #if __ARM_ARCH__<7
- ldrb r10,[r1,#2]
- ldrb r9,[r1,#3]
- ldrb r11,[r1,#1]
- add r7,r8,r7,ror#2 @ E+=K_00_19
- ldrb r12,[r1],#4
- orr r9,r9,r10,lsl#8
- eor r10,r5,r6 @ F_xx_xx
- orr r9,r9,r11,lsl#16
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- orr r9,r9,r12,lsl#24
- #else
- ldr r9,[r1],#4 @ handles unaligned
- add r7,r8,r7,ror#2 @ E+=K_00_19
- eor r10,r5,r6 @ F_xx_xx
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- #ifdef __ARMEL__
- rev r9,r9 @ byte swap
- #endif
- #endif
- and r10,r4,r10,ror#2
- add r7,r7,r9 @ E+=X[i]
- eor r10,r10,r6,ror#2 @ F_00_19(B,C,D)
- str r9,[r14,#-4]!
- add r7,r7,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r6,r8,r6,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r4,r5 @ F_xx_xx
- mov r9,r9,ror#31
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r3,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r6,r6,r9 @ E+=X[i]
- eor r10,r10,r5,ror#2 @ F_00_19(B,C,D)
- add r6,r6,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r5,r8,r5,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r3,r4 @ F_xx_xx
- mov r9,r9,ror#31
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r7,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r5,r5,r9 @ E+=X[i]
- eor r10,r10,r4,ror#2 @ F_00_19(B,C,D)
- add r5,r5,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r4,r8,r4,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r7,r3 @ F_xx_xx
- mov r9,r9,ror#31
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r6,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r4,r4,r9 @ E+=X[i]
- eor r10,r10,r3,ror#2 @ F_00_19(B,C,D)
- add r4,r4,r10 @ E+=F_00_19(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r3,r8,r3,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r6,r7 @ F_xx_xx
- mov r9,r9,ror#31
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r5,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r3,r3,r9 @ E+=X[i]
- eor r10,r10,r7,ror#2 @ F_00_19(B,C,D)
- add r3,r3,r10 @ E+=F_00_19(B,C,D)
- ldr r8,.LK_20_39 @ [+15+16*4]
- cmn sp,#0 @ [+3], clear carry to denote 20_39
- .L_20_39_or_60_79:
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r7,r8,r7,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r5,r6 @ F_xx_xx
- mov r9,r9,ror#31
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r4,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r7,r7,r9 @ E+=X[i]
- add r7,r7,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r6,r8,r6,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r4,r5 @ F_xx_xx
- mov r9,r9,ror#31
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r3,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r6,r6,r9 @ E+=X[i]
- add r6,r6,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r5,r8,r5,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r3,r4 @ F_xx_xx
- mov r9,r9,ror#31
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r7,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r5,r5,r9 @ E+=X[i]
- add r5,r5,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r4,r8,r4,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r7,r3 @ F_xx_xx
- mov r9,r9,ror#31
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r6,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r4,r4,r9 @ E+=X[i]
- add r4,r4,r10 @ E+=F_20_39(B,C,D)
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r3,r8,r3,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r6,r7 @ F_xx_xx
- mov r9,r9,ror#31
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- eor r10,r5,r10,ror#2 @ F_xx_xx
- @ F_xx_xx
- add r3,r3,r9 @ E+=X[i]
- add r3,r3,r10 @ E+=F_20_39(B,C,D)
- #if defined(__thumb2__)
- mov r12,sp
- teq r14,r12
- #else
- teq r14,sp @ preserve carry
- #endif
- bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
- bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes
- ldr r8,.LK_40_59
- sub sp,sp,#20*4 @ [+2]
- .L_40_59:
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r7,r8,r7,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r5,r6 @ F_xx_xx
- mov r9,r9,ror#31
- add r7,r7,r3,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r4,r10,ror#2 @ F_xx_xx
- and r11,r5,r6 @ F_xx_xx
- add r7,r7,r9 @ E+=X[i]
- add r7,r7,r10 @ E+=F_40_59(B,C,D)
- add r7,r7,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r6,r8,r6,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r4,r5 @ F_xx_xx
- mov r9,r9,ror#31
- add r6,r6,r7,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r3,r10,ror#2 @ F_xx_xx
- and r11,r4,r5 @ F_xx_xx
- add r6,r6,r9 @ E+=X[i]
- add r6,r6,r10 @ E+=F_40_59(B,C,D)
- add r6,r6,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r5,r8,r5,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r3,r4 @ F_xx_xx
- mov r9,r9,ror#31
- add r5,r5,r6,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r7,r10,ror#2 @ F_xx_xx
- and r11,r3,r4 @ F_xx_xx
- add r5,r5,r9 @ E+=X[i]
- add r5,r5,r10 @ E+=F_40_59(B,C,D)
- add r5,r5,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r4,r8,r4,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r7,r3 @ F_xx_xx
- mov r9,r9,ror#31
- add r4,r4,r5,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r6,r10,ror#2 @ F_xx_xx
- and r11,r7,r3 @ F_xx_xx
- add r4,r4,r9 @ E+=X[i]
- add r4,r4,r10 @ E+=F_40_59(B,C,D)
- add r4,r4,r11,ror#2
- ldr r9,[r14,#15*4]
- ldr r10,[r14,#13*4]
- ldr r11,[r14,#7*4]
- add r3,r8,r3,ror#2 @ E+=K_xx_xx
- ldr r12,[r14,#2*4]
- eor r9,r9,r10
- eor r11,r11,r12 @ 1 cycle stall
- eor r10,r6,r7 @ F_xx_xx
- mov r9,r9,ror#31
- add r3,r3,r4,ror#27 @ E+=ROR(A,27)
- eor r9,r9,r11,ror#31
- str r9,[r14,#-4]!
- and r10,r5,r10,ror#2 @ F_xx_xx
- and r11,r6,r7 @ F_xx_xx
- add r3,r3,r9 @ E+=X[i]
- add r3,r3,r10 @ E+=F_40_59(B,C,D)
- add r3,r3,r11,ror#2
- #if defined(__thumb2__)
- mov r12,sp
- teq r14,r12
- #else
- teq r14,sp
- #endif
- bne .L_40_59 @ [+((12+5)*5+2)*4]
- ldr r8,.LK_60_79
- sub sp,sp,#20*4
- cmp sp,#0 @ set carry to denote 60_79
- b .L_20_39_or_60_79 @ [+4], spare 300 bytes
- .L_done:
- add sp,sp,#80*4 @ "deallocate" stack frame
- ldmia r0,{r8,r9,r10,r11,r12}
- add r3,r8,r3
- add r4,r9,r4
- add r5,r10,r5,ror#2
- add r6,r11,r6,ror#2
- add r7,r12,r7,ror#2
- stmia r0,{r3,r4,r5,r6,r7}
- teq r1,r2
- bne .Lloop @ [+18], total 1307
- #if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
- #else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
- #endif
- .size sha1_block_data_order,.-sha1_block_data_order
- .align 5
- .LK_00_19: .word 0x5a827999
- .LK_20_39: .word 0x6ed9eba1
- .LK_40_59: .word 0x8f1bbcdc
- .LK_60_79: .word 0xca62c1d6
- #if __ARM_MAX_ARCH__>=7
- .LOPENSSL_armcap:
- .word OPENSSL_armcap_P-.Lsha1_block
- #endif
- .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
- .align 5
- #if __ARM_MAX_ARCH__>=7
- .arch armv7-a
- .fpu neon
- .type sha1_block_data_order_neon,%function
- .align 4
- sha1_block_data_order_neon:
- .LNEON:
- stmdb sp!,{r4-r12,lr}
- add r2,r1,r2,lsl#6 @ r2 to point at the end of r1
- @ dmb @ errata #451034 on early Cortex A8
- @ vstmdb sp!,{d8-d15} @ ABI specification says so
- mov r14,sp
- sub r12,sp,#64
- adr r8,.LK_00_19
- bic r12,r12,#15 @ align for 128-bit stores
- ldmia r0,{r3,r4,r5,r6,r7} @ load context
- mov sp,r12 @ alloca
- vld1.8 {q0-q1},[r1]! @ handles unaligned
- veor q15,q15,q15
- vld1.8 {q2-q3},[r1]!
- vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19
- vrev32.8 q0,q0 @ yes, even on
- vrev32.8 q1,q1 @ big-endian...
- vrev32.8 q2,q2
- vadd.i32 q8,q0,q14
- vrev32.8 q3,q3
- vadd.i32 q9,q1,q14
- vst1.32 {q8},[r12,:128]!
- vadd.i32 q10,q2,q14
- vst1.32 {q9},[r12,:128]!
- vst1.32 {q10},[r12,:128]!
- ldr r9,[sp] @ big RAW stall
- .Loop_neon:
- vext.8 q8,q0,q1,#8
- bic r10,r6,r4
- add r7,r7,r9
- and r11,r5,r4
- vadd.i32 q13,q3,q14
- ldr r9,[sp,#4]
- add r7,r7,r3,ror#27
- vext.8 q12,q3,q15,#4
- eor r11,r11,r10
- mov r4,r4,ror#2
- add r7,r7,r11
- veor q8,q8,q0
- bic r10,r5,r3
- add r6,r6,r9
- veor q12,q12,q2
- and r11,r4,r3
- ldr r9,[sp,#8]
- veor q12,q12,q8
- add r6,r6,r7,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q13,q15,q12,#4
- bic r10,r4,r7
- add r5,r5,r9
- vadd.i32 q8,q12,q12
- and r11,r3,r7
- ldr r9,[sp,#12]
- vsri.32 q8,q12,#31
- add r5,r5,r6,ror#27
- eor r11,r11,r10
- mov r7,r7,ror#2
- vshr.u32 q12,q13,#30
- add r5,r5,r11
- bic r10,r3,r6
- vshl.u32 q13,q13,#2
- add r4,r4,r9
- and r11,r7,r6
- veor q8,q8,q12
- ldr r9,[sp,#16]
- add r4,r4,r5,ror#27
- veor q8,q8,q13
- eor r11,r11,r10
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q9,q1,q2,#8
- bic r10,r7,r5
- add r3,r3,r9
- and r11,r6,r5
- vadd.i32 q13,q8,q14
- ldr r9,[sp,#20]
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r3,r3,r4,ror#27
- vext.8 q12,q8,q15,#4
- eor r11,r11,r10
- mov r5,r5,ror#2
- add r3,r3,r11
- veor q9,q9,q1
- bic r10,r6,r4
- add r7,r7,r9
- veor q12,q12,q3
- and r11,r5,r4
- ldr r9,[sp,#24]
- veor q12,q12,q9
- add r7,r7,r3,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q13,q15,q12,#4
- bic r10,r5,r3
- add r6,r6,r9
- vadd.i32 q9,q12,q12
- and r11,r4,r3
- ldr r9,[sp,#28]
- vsri.32 q9,q12,#31
- add r6,r6,r7,ror#27
- eor r11,r11,r10
- mov r3,r3,ror#2
- vshr.u32 q12,q13,#30
- add r6,r6,r11
- bic r10,r4,r7
- vshl.u32 q13,q13,#2
- add r5,r5,r9
- and r11,r3,r7
- veor q9,q9,q12
- ldr r9,[sp,#32]
- add r5,r5,r6,ror#27
- veor q9,q9,q13
- eor r11,r11,r10
- mov r7,r7,ror#2
- add r5,r5,r11
- vext.8 q10,q2,q3,#8
- bic r10,r3,r6
- add r4,r4,r9
- and r11,r7,r6
- vadd.i32 q13,q9,q14
- ldr r9,[sp,#36]
- add r4,r4,r5,ror#27
- vext.8 q12,q9,q15,#4
- eor r11,r11,r10
- mov r6,r6,ror#2
- add r4,r4,r11
- veor q10,q10,q2
- bic r10,r7,r5
- add r3,r3,r9
- veor q12,q12,q8
- and r11,r6,r5
- ldr r9,[sp,#40]
- veor q12,q12,q10
- add r3,r3,r4,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q13,q15,q12,#4
- bic r10,r6,r4
- add r7,r7,r9
- vadd.i32 q10,q12,q12
- and r11,r5,r4
- ldr r9,[sp,#44]
- vsri.32 q10,q12,#31
- add r7,r7,r3,ror#27
- eor r11,r11,r10
- mov r4,r4,ror#2
- vshr.u32 q12,q13,#30
- add r7,r7,r11
- bic r10,r5,r3
- vshl.u32 q13,q13,#2
- add r6,r6,r9
- and r11,r4,r3
- veor q10,q10,q12
- ldr r9,[sp,#48]
- add r6,r6,r7,ror#27
- veor q10,q10,q13
- eor r11,r11,r10
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q11,q3,q8,#8
- bic r10,r4,r7
- add r5,r5,r9
- and r11,r3,r7
- vadd.i32 q13,q10,q14
- ldr r9,[sp,#52]
- add r5,r5,r6,ror#27
- vext.8 q12,q10,q15,#4
- eor r11,r11,r10
- mov r7,r7,ror#2
- add r5,r5,r11
- veor q11,q11,q3
- bic r10,r3,r6
- add r4,r4,r9
- veor q12,q12,q9
- and r11,r7,r6
- ldr r9,[sp,#56]
- veor q12,q12,q11
- add r4,r4,r5,ror#27
- eor r11,r11,r10
- vst1.32 {q13},[r12,:128]!
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q13,q15,q12,#4
- bic r10,r7,r5
- add r3,r3,r9
- vadd.i32 q11,q12,q12
- and r11,r6,r5
- ldr r9,[sp,#60]
- vsri.32 q11,q12,#31
- add r3,r3,r4,ror#27
- eor r11,r11,r10
- mov r5,r5,ror#2
- vshr.u32 q12,q13,#30
- add r3,r3,r11
- bic r10,r6,r4
- vshl.u32 q13,q13,#2
- add r7,r7,r9
- and r11,r5,r4
- veor q11,q11,q12
- ldr r9,[sp,#0]
- add r7,r7,r3,ror#27
- veor q11,q11,q13
- eor r11,r11,r10
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q12,q10,q11,#8
- bic r10,r5,r3
- add r6,r6,r9
- and r11,r4,r3
- veor q0,q0,q8
- ldr r9,[sp,#4]
- add r6,r6,r7,ror#27
- veor q0,q0,q1
- eor r11,r11,r10
- mov r3,r3,ror#2
- vadd.i32 q13,q11,q14
- add r6,r6,r11
- bic r10,r4,r7
- veor q12,q12,q0
- add r5,r5,r9
- and r11,r3,r7
- vshr.u32 q0,q12,#30
- ldr r9,[sp,#8]
- add r5,r5,r6,ror#27
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- eor r11,r11,r10
- mov r7,r7,ror#2
- vsli.32 q0,q12,#2
- add r5,r5,r11
- bic r10,r3,r6
- add r4,r4,r9
- and r11,r7,r6
- ldr r9,[sp,#12]
- add r4,r4,r5,ror#27
- eor r11,r11,r10
- mov r6,r6,ror#2
- add r4,r4,r11
- bic r10,r7,r5
- add r3,r3,r9
- and r11,r6,r5
- ldr r9,[sp,#16]
- add r3,r3,r4,ror#27
- eor r11,r11,r10
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q12,q11,q0,#8
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#20]
- veor q1,q1,q9
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- veor q1,q1,q2
- mov r4,r4,ror#2
- add r7,r7,r11
- vadd.i32 q13,q0,q14
- eor r10,r3,r5
- add r6,r6,r9
- veor q12,q12,q1
- ldr r9,[sp,#24]
- eor r11,r10,r4
- vshr.u32 q1,q12,#30
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- vst1.32 {q13},[r12,:128]!
- add r6,r6,r11
- eor r10,r7,r4
- vsli.32 q1,q12,#2
- add r5,r5,r9
- ldr r9,[sp,#28]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#32]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q12,q0,q1,#8
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#36]
- veor q2,q2,q10
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- veor q2,q2,q3
- mov r5,r5,ror#2
- add r3,r3,r11
- vadd.i32 q13,q1,q14
- eor r10,r4,r6
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r7,r7,r9
- veor q12,q12,q2
- ldr r9,[sp,#40]
- eor r11,r10,r5
- vshr.u32 q2,q12,#30
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- vst1.32 {q13},[r12,:128]!
- add r7,r7,r11
- eor r10,r3,r5
- vsli.32 q2,q12,#2
- add r6,r6,r9
- ldr r9,[sp,#44]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#48]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- vext.8 q12,q1,q2,#8
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#52]
- veor q3,q3,q11
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- veor q3,q3,q8
- mov r6,r6,ror#2
- add r4,r4,r11
- vadd.i32 q13,q2,q14
- eor r10,r5,r7
- add r3,r3,r9
- veor q12,q12,q3
- ldr r9,[sp,#56]
- eor r11,r10,r6
- vshr.u32 q3,q12,#30
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- vst1.32 {q13},[r12,:128]!
- add r3,r3,r11
- eor r10,r4,r6
- vsli.32 q3,q12,#2
- add r7,r7,r9
- ldr r9,[sp,#60]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#0]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q12,q2,q3,#8
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#4]
- veor q8,q8,q0
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- veor q8,q8,q9
- mov r7,r7,ror#2
- add r5,r5,r11
- vadd.i32 q13,q3,q14
- eor r10,r6,r3
- add r4,r4,r9
- veor q12,q12,q8
- ldr r9,[sp,#8]
- eor r11,r10,r7
- vshr.u32 q8,q12,#30
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- add r4,r4,r11
- eor r10,r5,r7
- vsli.32 q8,q12,#2
- add r3,r3,r9
- ldr r9,[sp,#12]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#16]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q12,q3,q8,#8
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#20]
- veor q9,q9,q1
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- veor q9,q9,q10
- mov r3,r3,ror#2
- add r6,r6,r11
- vadd.i32 q13,q8,q14
- eor r10,r7,r4
- add r5,r5,r9
- veor q12,q12,q9
- ldr r9,[sp,#24]
- eor r11,r10,r3
- vshr.u32 q9,q12,#30
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- vst1.32 {q13},[r12,:128]!
- add r5,r5,r11
- eor r10,r6,r3
- vsli.32 q9,q12,#2
- add r4,r4,r9
- ldr r9,[sp,#28]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#32]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q12,q8,q9,#8
- add r7,r7,r9
- and r10,r5,r6
- ldr r9,[sp,#36]
- veor q10,q10,q2
- add r7,r7,r3,ror#27
- eor r11,r5,r6
- veor q10,q10,q11
- add r7,r7,r10
- and r11,r11,r4
- vadd.i32 q13,q9,q14
- mov r4,r4,ror#2
- add r7,r7,r11
- veor q12,q12,q10
- add r6,r6,r9
- and r10,r4,r5
- vshr.u32 q10,q12,#30
- ldr r9,[sp,#40]
- add r6,r6,r7,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r4,r5
- add r6,r6,r10
- vsli.32 q10,q12,#2
- and r11,r11,r3
- mov r3,r3,ror#2
- add r6,r6,r11
- add r5,r5,r9
- and r10,r3,r4
- ldr r9,[sp,#44]
- add r5,r5,r6,ror#27
- eor r11,r3,r4
- add r5,r5,r10
- and r11,r11,r7
- mov r7,r7,ror#2
- add r5,r5,r11
- add r4,r4,r9
- and r10,r7,r3
- ldr r9,[sp,#48]
- add r4,r4,r5,ror#27
- eor r11,r7,r3
- add r4,r4,r10
- and r11,r11,r6
- mov r6,r6,ror#2
- add r4,r4,r11
- vext.8 q12,q9,q10,#8
- add r3,r3,r9
- and r10,r6,r7
- ldr r9,[sp,#52]
- veor q11,q11,q3
- add r3,r3,r4,ror#27
- eor r11,r6,r7
- veor q11,q11,q0
- add r3,r3,r10
- and r11,r11,r5
- vadd.i32 q13,q10,q14
- mov r5,r5,ror#2
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r3,r3,r11
- veor q12,q12,q11
- add r7,r7,r9
- and r10,r5,r6
- vshr.u32 q11,q12,#30
- ldr r9,[sp,#56]
- add r7,r7,r3,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r5,r6
- add r7,r7,r10
- vsli.32 q11,q12,#2
- and r11,r11,r4
- mov r4,r4,ror#2
- add r7,r7,r11
- add r6,r6,r9
- and r10,r4,r5
- ldr r9,[sp,#60]
- add r6,r6,r7,ror#27
- eor r11,r4,r5
- add r6,r6,r10
- and r11,r11,r3
- mov r3,r3,ror#2
- add r6,r6,r11
- add r5,r5,r9
- and r10,r3,r4
- ldr r9,[sp,#0]
- add r5,r5,r6,ror#27
- eor r11,r3,r4
- add r5,r5,r10
- and r11,r11,r7
- mov r7,r7,ror#2
- add r5,r5,r11
- vext.8 q12,q10,q11,#8
- add r4,r4,r9
- and r10,r7,r3
- ldr r9,[sp,#4]
- veor q0,q0,q8
- add r4,r4,r5,ror#27
- eor r11,r7,r3
- veor q0,q0,q1
- add r4,r4,r10
- and r11,r11,r6
- vadd.i32 q13,q11,q14
- mov r6,r6,ror#2
- add r4,r4,r11
- veor q12,q12,q0
- add r3,r3,r9
- and r10,r6,r7
- vshr.u32 q0,q12,#30
- ldr r9,[sp,#8]
- add r3,r3,r4,ror#27
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- eor r11,r6,r7
- add r3,r3,r10
- vsli.32 q0,q12,#2
- and r11,r11,r5
- mov r5,r5,ror#2
- add r3,r3,r11
- add r7,r7,r9
- and r10,r5,r6
- ldr r9,[sp,#12]
- add r7,r7,r3,ror#27
- eor r11,r5,r6
- add r7,r7,r10
- and r11,r11,r4
- mov r4,r4,ror#2
- add r7,r7,r11
- add r6,r6,r9
- and r10,r4,r5
- ldr r9,[sp,#16]
- add r6,r6,r7,ror#27
- eor r11,r4,r5
- add r6,r6,r10
- and r11,r11,r3
- mov r3,r3,ror#2
- add r6,r6,r11
- vext.8 q12,q11,q0,#8
- add r5,r5,r9
- and r10,r3,r4
- ldr r9,[sp,#20]
- veor q1,q1,q9
- add r5,r5,r6,ror#27
- eor r11,r3,r4
- veor q1,q1,q2
- add r5,r5,r10
- and r11,r11,r7
- vadd.i32 q13,q0,q14
- mov r7,r7,ror#2
- add r5,r5,r11
- veor q12,q12,q1
- add r4,r4,r9
- and r10,r7,r3
- vshr.u32 q1,q12,#30
- ldr r9,[sp,#24]
- add r4,r4,r5,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r7,r3
- add r4,r4,r10
- vsli.32 q1,q12,#2
- and r11,r11,r6
- mov r6,r6,ror#2
- add r4,r4,r11
- add r3,r3,r9
- and r10,r6,r7
- ldr r9,[sp,#28]
- add r3,r3,r4,ror#27
- eor r11,r6,r7
- add r3,r3,r10
- and r11,r11,r5
- mov r5,r5,ror#2
- add r3,r3,r11
- add r7,r7,r9
- and r10,r5,r6
- ldr r9,[sp,#32]
- add r7,r7,r3,ror#27
- eor r11,r5,r6
- add r7,r7,r10
- and r11,r11,r4
- mov r4,r4,ror#2
- add r7,r7,r11
- vext.8 q12,q0,q1,#8
- add r6,r6,r9
- and r10,r4,r5
- ldr r9,[sp,#36]
- veor q2,q2,q10
- add r6,r6,r7,ror#27
- eor r11,r4,r5
- veor q2,q2,q3
- add r6,r6,r10
- and r11,r11,r3
- vadd.i32 q13,q1,q14
- mov r3,r3,ror#2
- add r6,r6,r11
- veor q12,q12,q2
- add r5,r5,r9
- and r10,r3,r4
- vshr.u32 q2,q12,#30
- ldr r9,[sp,#40]
- add r5,r5,r6,ror#27
- vst1.32 {q13},[r12,:128]!
- eor r11,r3,r4
- add r5,r5,r10
- vsli.32 q2,q12,#2
- and r11,r11,r7
- mov r7,r7,ror#2
- add r5,r5,r11
- add r4,r4,r9
- and r10,r7,r3
- ldr r9,[sp,#44]
- add r4,r4,r5,ror#27
- eor r11,r7,r3
- add r4,r4,r10
- and r11,r11,r6
- mov r6,r6,ror#2
- add r4,r4,r11
- add r3,r3,r9
- and r10,r6,r7
- ldr r9,[sp,#48]
- add r3,r3,r4,ror#27
- eor r11,r6,r7
- add r3,r3,r10
- and r11,r11,r5
- mov r5,r5,ror#2
- add r3,r3,r11
- vext.8 q12,q1,q2,#8
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#52]
- veor q3,q3,q11
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- veor q3,q3,q8
- mov r4,r4,ror#2
- add r7,r7,r11
- vadd.i32 q13,q2,q14
- eor r10,r3,r5
- add r6,r6,r9
- veor q12,q12,q3
- ldr r9,[sp,#56]
- eor r11,r10,r4
- vshr.u32 q3,q12,#30
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- vst1.32 {q13},[r12,:128]!
- add r6,r6,r11
- eor r10,r7,r4
- vsli.32 q3,q12,#2
- add r5,r5,r9
- ldr r9,[sp,#60]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#0]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- vadd.i32 q13,q3,q14
- eor r10,r5,r7
- add r3,r3,r9
- vst1.32 {q13},[r12,:128]!
- sub r12,r12,#64
- teq r1,r2
- sub r8,r8,#16
- it eq
- subeq r1,r1,#64
- vld1.8 {q0-q1},[r1]!
- ldr r9,[sp,#4]
- eor r11,r10,r6
- vld1.8 {q2-q3},[r1]!
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- vld1.32 {d28[],d29[]},[r8,:32]!
- add r3,r3,r11
- eor r10,r4,r6
- vrev32.8 q0,q0
- add r7,r7,r9
- ldr r9,[sp,#8]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#12]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#16]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- vrev32.8 q1,q1
- eor r10,r6,r3
- add r4,r4,r9
- vadd.i32 q8,q0,q14
- ldr r9,[sp,#20]
- eor r11,r10,r7
- vst1.32 {q8},[r12,:128]!
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#24]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#28]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- eor r10,r3,r5
- add r6,r6,r9
- ldr r9,[sp,#32]
- eor r11,r10,r4
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- vrev32.8 q2,q2
- eor r10,r7,r4
- add r5,r5,r9
- vadd.i32 q9,q1,q14
- ldr r9,[sp,#36]
- eor r11,r10,r3
- vst1.32 {q9},[r12,:128]!
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#40]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- ldr r9,[sp,#44]
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- eor r10,r4,r6
- add r7,r7,r9
- ldr r9,[sp,#48]
- eor r11,r10,r5
- add r7,r7,r3,ror#27
- mov r4,r4,ror#2
- add r7,r7,r11
- vrev32.8 q3,q3
- eor r10,r3,r5
- add r6,r6,r9
- vadd.i32 q10,q2,q14
- ldr r9,[sp,#52]
- eor r11,r10,r4
- vst1.32 {q10},[r12,:128]!
- add r6,r6,r7,ror#27
- mov r3,r3,ror#2
- add r6,r6,r11
- eor r10,r7,r4
- add r5,r5,r9
- ldr r9,[sp,#56]
- eor r11,r10,r3
- add r5,r5,r6,ror#27
- mov r7,r7,ror#2
- add r5,r5,r11
- eor r10,r6,r3
- add r4,r4,r9
- ldr r9,[sp,#60]
- eor r11,r10,r7
- add r4,r4,r5,ror#27
- mov r6,r6,ror#2
- add r4,r4,r11
- eor r10,r5,r7
- add r3,r3,r9
- eor r11,r10,r6
- add r3,r3,r4,ror#27
- mov r5,r5,ror#2
- add r3,r3,r11
- ldmia r0,{r9,r10,r11,r12} @ accumulate context
- add r3,r3,r9
- ldr r9,[r0,#16]
- add r4,r4,r10
- add r5,r5,r11
- add r6,r6,r12
- it eq
- moveq sp,r14
- add r7,r7,r9
- it ne
- ldrne r9,[sp]
- stmia r0,{r3,r4,r5,r6,r7}
- itt ne
- addne r12,sp,#3*16
- bne .Loop_neon
- @ vldmia sp!,{d8-d15}
- ldmia sp!,{r4-r12,pc}
- .size sha1_block_data_order_neon,.-sha1_block_data_order_neon
- #endif
- #if __ARM_MAX_ARCH__>=7
- # if defined(__thumb2__)
- # define INST(a,b,c,d) .byte c,d|0xf,a,b
- # else
- # define INST(a,b,c,d) .byte a,b,c,d|0x10
- # endif
- .type sha1_block_data_order_armv8,%function
- .align 5
- sha1_block_data_order_armv8:
- .LARMv8:
- vstmdb sp!,{d8-d15} @ ABI specification says so
- veor q1,q1,q1
- adr r3,.LK_00_19
- vld1.32 {q0},[r0]!
- vld1.32 {d2[0]},[r0]
- sub r0,r0,#16
- vld1.32 {d16[],d17[]},[r3,:32]!
- vld1.32 {d18[],d19[]},[r3,:32]!
- vld1.32 {d20[],d21[]},[r3,:32]!
- vld1.32 {d22[],d23[]},[r3,:32]
- .Loop_v8:
- vld1.8 {q4-q5},[r1]!
- vld1.8 {q6-q7},[r1]!
- vrev32.8 q4,q4
- vrev32.8 q5,q5
- vadd.i32 q12,q8,q4
- vrev32.8 q6,q6
- vmov q14,q0 @ offload
- subs r2,r2,#1
- vadd.i32 q13,q8,q5
- vrev32.8 q7,q7
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 0
- INST(0x68,0x0c,0x02,0xe2) @ sha1c q0,q1,q12
- vadd.i32 q12,q8,q6
- INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 1
- INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13
- vadd.i32 q13,q8,q7
- INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
- INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 2
- INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12
- vadd.i32 q12,q8,q4
- INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
- INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 3
- INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13
- vadd.i32 q13,q9,q5
- INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
- INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 4
- INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12
- vadd.i32 q12,q9,q6
- INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
- INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 5
- INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
- vadd.i32 q13,q9,q7
- INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
- INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 6
- INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
- vadd.i32 q12,q9,q4
- INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
- INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 7
- INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
- vadd.i32 q13,q9,q5
- INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
- INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 8
- INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
- vadd.i32 q12,q10,q6
- INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
- INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 9
- INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
- vadd.i32 q13,q10,q7
- INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
- INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 10
- INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12
- vadd.i32 q12,q10,q4
- INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
- INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 11
- INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13
- vadd.i32 q13,q10,q5
- INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
- INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 12
- INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12
- vadd.i32 q12,q10,q6
- INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
- INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 13
- INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13
- vadd.i32 q13,q11,q7
- INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7
- INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 14
- INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12
- vadd.i32 q12,q11,q4
- INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4
- INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 15
- INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
- vadd.i32 q13,q11,q5
- INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5
- INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 16
- INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
- vadd.i32 q12,q11,q6
- INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 17
- INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
- vadd.i32 q13,q11,q7
- INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 18
- INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12
- INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 19
- INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13
- vadd.i32 q1,q1,q2
- vadd.i32 q0,q0,q14
- bne .Loop_v8
- vst1.32 {q0},[r0]!
- vst1.32 {d2[0]},[r0]
- vldmia sp!,{d8-d15}
- bx lr @ bx lr
- .size sha1_block_data_order_armv8,.-sha1_block_data_order_armv8
- #endif
- #if __ARM_MAX_ARCH__>=7
- .comm OPENSSL_armcap_P,4,4
- #endif
|