e_padlock-x86_64.s 17 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. .text
  2. .globl _padlock_capability
  3. .p2align 4
  4. _padlock_capability:
  5. movq %rbx,%r8
  6. xorl %eax,%eax
  7. cpuid
  8. xorl %eax,%eax
  9. cmpl $0x746e6543,%ebx
  10. jne L$zhaoxin
  11. cmpl $0x48727561,%edx
  12. jne L$noluck
  13. cmpl $0x736c7561,%ecx
  14. jne L$noluck
  15. jmp L$zhaoxinEnd
  16. L$zhaoxin:
  17. cmpl $0x68532020,%ebx
  18. jne L$noluck
  19. cmpl $0x68676e61,%edx
  20. jne L$noluck
  21. cmpl $0x20206961,%ecx
  22. jne L$noluck
  23. L$zhaoxinEnd:
  24. movl $0xC0000000,%eax
  25. cpuid
  26. movl %eax,%edx
  27. xorl %eax,%eax
  28. cmpl $0xC0000001,%edx
  29. jb L$noluck
  30. movl $0xC0000001,%eax
  31. cpuid
  32. movl %edx,%eax
  33. andl $0xffffffef,%eax
  34. orl $0x10,%eax
  35. L$noluck:
  36. movq %r8,%rbx
  37. .byte 0xf3,0xc3
  38. .globl _padlock_key_bswap
  39. .p2align 4
  40. _padlock_key_bswap:
  41. movl 240(%rdi),%edx
  42. incl %edx
  43. shll $2,%edx
  44. L$bswap_loop:
  45. movl (%rdi),%eax
  46. bswapl %eax
  47. movl %eax,(%rdi)
  48. leaq 4(%rdi),%rdi
  49. subl $1,%edx
  50. jnz L$bswap_loop
  51. .byte 0xf3,0xc3
  52. .globl _padlock_verify_context
  53. .p2align 4
  54. _padlock_verify_context:
  55. movq %rdi,%rdx
  56. pushf
  57. leaq L$padlock_saved_context(%rip),%rax
  58. call _padlock_verify_ctx
  59. leaq 8(%rsp),%rsp
  60. .byte 0xf3,0xc3
  61. .p2align 4
  62. _padlock_verify_ctx:
  63. movq 8(%rsp),%r8
  64. btq $30,%r8
  65. jnc L$verified
  66. cmpq (%rax),%rdx
  67. je L$verified
  68. pushf
  69. popf
  70. L$verified:
  71. movq %rdx,(%rax)
  72. .byte 0xf3,0xc3
  73. .globl _padlock_reload_key
  74. .p2align 4
  75. _padlock_reload_key:
  76. pushf
  77. popf
  78. .byte 0xf3,0xc3
  79. .globl _padlock_aes_block
  80. .p2align 4
  81. _padlock_aes_block:
  82. movq %rbx,%r8
  83. movq $1,%rcx
  84. leaq 32(%rdx),%rbx
  85. leaq 16(%rdx),%rdx
  86. .byte 0xf3,0x0f,0xa7,0xc8
  87. movq %r8,%rbx
  88. .byte 0xf3,0xc3
  89. .globl _padlock_xstore
  90. .p2align 4
  91. _padlock_xstore:
  92. movl %esi,%edx
  93. .byte 0x0f,0xa7,0xc0
  94. .byte 0xf3,0xc3
  95. .globl _padlock_sha1_oneshot
  96. .p2align 4
  97. _padlock_sha1_oneshot:
  98. movq %rdx,%rcx
  99. movq %rdi,%rdx
  100. movups (%rdi),%xmm0
  101. subq $128+8,%rsp
  102. movl 16(%rdi),%eax
  103. movaps %xmm0,(%rsp)
  104. movq %rsp,%rdi
  105. movl %eax,16(%rsp)
  106. xorq %rax,%rax
  107. .byte 0xf3,0x0f,0xa6,0xc8
  108. movaps (%rsp),%xmm0
  109. movl 16(%rsp),%eax
  110. addq $128+8,%rsp
  111. movups %xmm0,(%rdx)
  112. movl %eax,16(%rdx)
  113. .byte 0xf3,0xc3
  114. .globl _padlock_sha1_blocks
  115. .p2align 4
  116. _padlock_sha1_blocks:
  117. movq %rdx,%rcx
  118. movq %rdi,%rdx
  119. movups (%rdi),%xmm0
  120. subq $128+8,%rsp
  121. movl 16(%rdi),%eax
  122. movaps %xmm0,(%rsp)
  123. movq %rsp,%rdi
  124. movl %eax,16(%rsp)
  125. movq $-1,%rax
  126. .byte 0xf3,0x0f,0xa6,0xc8
  127. movaps (%rsp),%xmm0
  128. movl 16(%rsp),%eax
  129. addq $128+8,%rsp
  130. movups %xmm0,(%rdx)
  131. movl %eax,16(%rdx)
  132. .byte 0xf3,0xc3
  133. .globl _padlock_sha256_oneshot
  134. .p2align 4
  135. _padlock_sha256_oneshot:
  136. movq %rdx,%rcx
  137. movq %rdi,%rdx
  138. movups (%rdi),%xmm0
  139. subq $128+8,%rsp
  140. movups 16(%rdi),%xmm1
  141. movaps %xmm0,(%rsp)
  142. movq %rsp,%rdi
  143. movaps %xmm1,16(%rsp)
  144. xorq %rax,%rax
  145. .byte 0xf3,0x0f,0xa6,0xd0
  146. movaps (%rsp),%xmm0
  147. movaps 16(%rsp),%xmm1
  148. addq $128+8,%rsp
  149. movups %xmm0,(%rdx)
  150. movups %xmm1,16(%rdx)
  151. .byte 0xf3,0xc3
  152. .globl _padlock_sha256_blocks
  153. .p2align 4
  154. _padlock_sha256_blocks:
  155. movq %rdx,%rcx
  156. movq %rdi,%rdx
  157. movups (%rdi),%xmm0
  158. subq $128+8,%rsp
  159. movups 16(%rdi),%xmm1
  160. movaps %xmm0,(%rsp)
  161. movq %rsp,%rdi
  162. movaps %xmm1,16(%rsp)
  163. movq $-1,%rax
  164. .byte 0xf3,0x0f,0xa6,0xd0
  165. movaps (%rsp),%xmm0
  166. movaps 16(%rsp),%xmm1
  167. addq $128+8,%rsp
  168. movups %xmm0,(%rdx)
  169. movups %xmm1,16(%rdx)
  170. .byte 0xf3,0xc3
  171. .globl _padlock_sha512_blocks
  172. .p2align 4
  173. _padlock_sha512_blocks:
  174. movq %rdx,%rcx
  175. movq %rdi,%rdx
  176. movups (%rdi),%xmm0
  177. subq $128+8,%rsp
  178. movups 16(%rdi),%xmm1
  179. movups 32(%rdi),%xmm2
  180. movups 48(%rdi),%xmm3
  181. movaps %xmm0,(%rsp)
  182. movq %rsp,%rdi
  183. movaps %xmm1,16(%rsp)
  184. movaps %xmm2,32(%rsp)
  185. movaps %xmm3,48(%rsp)
  186. .byte 0xf3,0x0f,0xa6,0xe0
  187. movaps (%rsp),%xmm0
  188. movaps 16(%rsp),%xmm1
  189. movaps 32(%rsp),%xmm2
  190. movaps 48(%rsp),%xmm3
  191. addq $128+8,%rsp
  192. movups %xmm0,(%rdx)
  193. movups %xmm1,16(%rdx)
  194. movups %xmm2,32(%rdx)
  195. movups %xmm3,48(%rdx)
  196. .byte 0xf3,0xc3
  197. .globl _padlock_ecb_encrypt
  198. .p2align 4
  199. _padlock_ecb_encrypt:
  200. pushq %rbp
  201. pushq %rbx
  202. xorl %eax,%eax
  203. testq $15,%rdx
  204. jnz L$ecb_abort
  205. testq $15,%rcx
  206. jnz L$ecb_abort
  207. leaq L$padlock_saved_context(%rip),%rax
  208. pushf
  209. cld
  210. call _padlock_verify_ctx
  211. leaq 16(%rdx),%rdx
  212. xorl %eax,%eax
  213. xorl %ebx,%ebx
  214. testl $32,(%rdx)
  215. jnz L$ecb_aligned
  216. testq $0x0f,%rdi
  217. setz %al
  218. testq $0x0f,%rsi
  219. setz %bl
  220. testl %ebx,%eax
  221. jnz L$ecb_aligned
  222. negq %rax
  223. movq $512,%rbx
  224. notq %rax
  225. leaq (%rsp),%rbp
  226. cmpq %rbx,%rcx
  227. cmovcq %rcx,%rbx
  228. andq %rbx,%rax
  229. movq %rcx,%rbx
  230. negq %rax
  231. andq $512-1,%rbx
  232. leaq (%rax,%rbp,1),%rsp
  233. movq $512,%rax
  234. cmovzq %rax,%rbx
  235. cmpq %rbx,%rcx
  236. ja L$ecb_loop
  237. movq %rsi,%rax
  238. cmpq %rsp,%rbp
  239. cmoveq %rdi,%rax
  240. addq %rcx,%rax
  241. negq %rax
  242. andq $0xfff,%rax
  243. cmpq $128,%rax
  244. movq $-128,%rax
  245. cmovaeq %rbx,%rax
  246. andq %rax,%rbx
  247. jz L$ecb_unaligned_tail
  248. jmp L$ecb_loop
  249. .p2align 4
  250. L$ecb_loop:
  251. cmpq %rcx,%rbx
  252. cmovaq %rcx,%rbx
  253. movq %rdi,%r8
  254. movq %rsi,%r9
  255. movq %rcx,%r10
  256. movq %rbx,%rcx
  257. movq %rbx,%r11
  258. testq $0x0f,%rdi
  259. cmovnzq %rsp,%rdi
  260. testq $0x0f,%rsi
  261. jz L$ecb_inp_aligned
  262. shrq $3,%rcx
  263. .byte 0xf3,0x48,0xa5
  264. subq %rbx,%rdi
  265. movq %rbx,%rcx
  266. movq %rdi,%rsi
  267. L$ecb_inp_aligned:
  268. leaq -16(%rdx),%rax
  269. leaq 16(%rdx),%rbx
  270. shrq $4,%rcx
  271. .byte 0xf3,0x0f,0xa7,200
  272. movq %r8,%rdi
  273. movq %r11,%rbx
  274. testq $0x0f,%rdi
  275. jz L$ecb_out_aligned
  276. movq %rbx,%rcx
  277. leaq (%rsp),%rsi
  278. shrq $3,%rcx
  279. .byte 0xf3,0x48,0xa5
  280. subq %rbx,%rdi
  281. L$ecb_out_aligned:
  282. movq %r9,%rsi
  283. movq %r10,%rcx
  284. addq %rbx,%rdi
  285. addq %rbx,%rsi
  286. subq %rbx,%rcx
  287. movq $512,%rbx
  288. jz L$ecb_break
  289. cmpq %rbx,%rcx
  290. jae L$ecb_loop
  291. L$ecb_unaligned_tail:
  292. xorl %eax,%eax
  293. cmpq %rsp,%rbp
  294. cmoveq %rcx,%rax
  295. movq %rdi,%r8
  296. movq %rcx,%rbx
  297. subq %rax,%rsp
  298. shrq $3,%rcx
  299. leaq (%rsp),%rdi
  300. .byte 0xf3,0x48,0xa5
  301. movq %rsp,%rsi
  302. movq %r8,%rdi
  303. movq %rbx,%rcx
  304. jmp L$ecb_loop
  305. .p2align 4
  306. L$ecb_break:
  307. cmpq %rbp,%rsp
  308. je L$ecb_done
  309. pxor %xmm0,%xmm0
  310. leaq (%rsp),%rax
  311. L$ecb_bzero:
  312. movaps %xmm0,(%rax)
  313. leaq 16(%rax),%rax
  314. cmpq %rax,%rbp
  315. ja L$ecb_bzero
  316. L$ecb_done:
  317. leaq (%rbp),%rsp
  318. jmp L$ecb_exit
  319. .p2align 4
  320. L$ecb_aligned:
  321. leaq (%rsi,%rcx,1),%rbp
  322. negq %rbp
  323. andq $0xfff,%rbp
  324. xorl %eax,%eax
  325. cmpq $128,%rbp
  326. movq $128-1,%rbp
  327. cmovaeq %rax,%rbp
  328. andq %rcx,%rbp
  329. subq %rbp,%rcx
  330. jz L$ecb_aligned_tail
  331. leaq -16(%rdx),%rax
  332. leaq 16(%rdx),%rbx
  333. shrq $4,%rcx
  334. .byte 0xf3,0x0f,0xa7,200
  335. testq %rbp,%rbp
  336. jz L$ecb_exit
  337. L$ecb_aligned_tail:
  338. movq %rdi,%r8
  339. movq %rbp,%rbx
  340. movq %rbp,%rcx
  341. leaq (%rsp),%rbp
  342. subq %rcx,%rsp
  343. shrq $3,%rcx
  344. leaq (%rsp),%rdi
  345. .byte 0xf3,0x48,0xa5
  346. leaq (%r8),%rdi
  347. leaq (%rsp),%rsi
  348. movq %rbx,%rcx
  349. jmp L$ecb_loop
  350. L$ecb_exit:
  351. movl $1,%eax
  352. leaq 8(%rsp),%rsp
  353. L$ecb_abort:
  354. popq %rbx
  355. popq %rbp
  356. .byte 0xf3,0xc3
  357. .globl _padlock_cbc_encrypt
  358. .p2align 4
  359. _padlock_cbc_encrypt:
  360. pushq %rbp
  361. pushq %rbx
  362. xorl %eax,%eax
  363. testq $15,%rdx
  364. jnz L$cbc_abort
  365. testq $15,%rcx
  366. jnz L$cbc_abort
  367. leaq L$padlock_saved_context(%rip),%rax
  368. pushf
  369. cld
  370. call _padlock_verify_ctx
  371. leaq 16(%rdx),%rdx
  372. xorl %eax,%eax
  373. xorl %ebx,%ebx
  374. testl $32,(%rdx)
  375. jnz L$cbc_aligned
  376. testq $0x0f,%rdi
  377. setz %al
  378. testq $0x0f,%rsi
  379. setz %bl
  380. testl %ebx,%eax
  381. jnz L$cbc_aligned
  382. negq %rax
  383. movq $512,%rbx
  384. notq %rax
  385. leaq (%rsp),%rbp
  386. cmpq %rbx,%rcx
  387. cmovcq %rcx,%rbx
  388. andq %rbx,%rax
  389. movq %rcx,%rbx
  390. negq %rax
  391. andq $512-1,%rbx
  392. leaq (%rax,%rbp,1),%rsp
  393. movq $512,%rax
  394. cmovzq %rax,%rbx
  395. cmpq %rbx,%rcx
  396. ja L$cbc_loop
  397. movq %rsi,%rax
  398. cmpq %rsp,%rbp
  399. cmoveq %rdi,%rax
  400. addq %rcx,%rax
  401. negq %rax
  402. andq $0xfff,%rax
  403. cmpq $64,%rax
  404. movq $-64,%rax
  405. cmovaeq %rbx,%rax
  406. andq %rax,%rbx
  407. jz L$cbc_unaligned_tail
  408. jmp L$cbc_loop
  409. .p2align 4
  410. L$cbc_loop:
  411. cmpq %rcx,%rbx
  412. cmovaq %rcx,%rbx
  413. movq %rdi,%r8
  414. movq %rsi,%r9
  415. movq %rcx,%r10
  416. movq %rbx,%rcx
  417. movq %rbx,%r11
  418. testq $0x0f,%rdi
  419. cmovnzq %rsp,%rdi
  420. testq $0x0f,%rsi
  421. jz L$cbc_inp_aligned
  422. shrq $3,%rcx
  423. .byte 0xf3,0x48,0xa5
  424. subq %rbx,%rdi
  425. movq %rbx,%rcx
  426. movq %rdi,%rsi
  427. L$cbc_inp_aligned:
  428. leaq -16(%rdx),%rax
  429. leaq 16(%rdx),%rbx
  430. shrq $4,%rcx
  431. .byte 0xf3,0x0f,0xa7,208
  432. movdqa (%rax),%xmm0
  433. movdqa %xmm0,-16(%rdx)
  434. movq %r8,%rdi
  435. movq %r11,%rbx
  436. testq $0x0f,%rdi
  437. jz L$cbc_out_aligned
  438. movq %rbx,%rcx
  439. leaq (%rsp),%rsi
  440. shrq $3,%rcx
  441. .byte 0xf3,0x48,0xa5
  442. subq %rbx,%rdi
  443. L$cbc_out_aligned:
  444. movq %r9,%rsi
  445. movq %r10,%rcx
  446. addq %rbx,%rdi
  447. addq %rbx,%rsi
  448. subq %rbx,%rcx
  449. movq $512,%rbx
  450. jz L$cbc_break
  451. cmpq %rbx,%rcx
  452. jae L$cbc_loop
  453. L$cbc_unaligned_tail:
  454. xorl %eax,%eax
  455. cmpq %rsp,%rbp
  456. cmoveq %rcx,%rax
  457. movq %rdi,%r8
  458. movq %rcx,%rbx
  459. subq %rax,%rsp
  460. shrq $3,%rcx
  461. leaq (%rsp),%rdi
  462. .byte 0xf3,0x48,0xa5
  463. movq %rsp,%rsi
  464. movq %r8,%rdi
  465. movq %rbx,%rcx
  466. jmp L$cbc_loop
  467. .p2align 4
  468. L$cbc_break:
  469. cmpq %rbp,%rsp
  470. je L$cbc_done
  471. pxor %xmm0,%xmm0
  472. leaq (%rsp),%rax
  473. L$cbc_bzero:
  474. movaps %xmm0,(%rax)
  475. leaq 16(%rax),%rax
  476. cmpq %rax,%rbp
  477. ja L$cbc_bzero
  478. L$cbc_done:
  479. leaq (%rbp),%rsp
  480. jmp L$cbc_exit
  481. .p2align 4
  482. L$cbc_aligned:
  483. leaq (%rsi,%rcx,1),%rbp
  484. negq %rbp
  485. andq $0xfff,%rbp
  486. xorl %eax,%eax
  487. cmpq $64,%rbp
  488. movq $64-1,%rbp
  489. cmovaeq %rax,%rbp
  490. andq %rcx,%rbp
  491. subq %rbp,%rcx
  492. jz L$cbc_aligned_tail
  493. leaq -16(%rdx),%rax
  494. leaq 16(%rdx),%rbx
  495. shrq $4,%rcx
  496. .byte 0xf3,0x0f,0xa7,208
  497. movdqa (%rax),%xmm0
  498. movdqa %xmm0,-16(%rdx)
  499. testq %rbp,%rbp
  500. jz L$cbc_exit
  501. L$cbc_aligned_tail:
  502. movq %rdi,%r8
  503. movq %rbp,%rbx
  504. movq %rbp,%rcx
  505. leaq (%rsp),%rbp
  506. subq %rcx,%rsp
  507. shrq $3,%rcx
  508. leaq (%rsp),%rdi
  509. .byte 0xf3,0x48,0xa5
  510. leaq (%r8),%rdi
  511. leaq (%rsp),%rsi
  512. movq %rbx,%rcx
  513. jmp L$cbc_loop
  514. L$cbc_exit:
  515. movl $1,%eax
  516. leaq 8(%rsp),%rsp
  517. L$cbc_abort:
  518. popq %rbx
  519. popq %rbp
  520. .byte 0xf3,0xc3
  521. .globl _padlock_cfb_encrypt
  522. .p2align 4
  523. _padlock_cfb_encrypt:
  524. pushq %rbp
  525. pushq %rbx
  526. xorl %eax,%eax
  527. testq $15,%rdx
  528. jnz L$cfb_abort
  529. testq $15,%rcx
  530. jnz L$cfb_abort
  531. leaq L$padlock_saved_context(%rip),%rax
  532. pushf
  533. cld
  534. call _padlock_verify_ctx
  535. leaq 16(%rdx),%rdx
  536. xorl %eax,%eax
  537. xorl %ebx,%ebx
  538. testl $32,(%rdx)
  539. jnz L$cfb_aligned
  540. testq $0x0f,%rdi
  541. setz %al
  542. testq $0x0f,%rsi
  543. setz %bl
  544. testl %ebx,%eax
  545. jnz L$cfb_aligned
  546. negq %rax
  547. movq $512,%rbx
  548. notq %rax
  549. leaq (%rsp),%rbp
  550. cmpq %rbx,%rcx
  551. cmovcq %rcx,%rbx
  552. andq %rbx,%rax
  553. movq %rcx,%rbx
  554. negq %rax
  555. andq $512-1,%rbx
  556. leaq (%rax,%rbp,1),%rsp
  557. movq $512,%rax
  558. cmovzq %rax,%rbx
  559. jmp L$cfb_loop
  560. .p2align 4
  561. L$cfb_loop:
  562. cmpq %rcx,%rbx
  563. cmovaq %rcx,%rbx
  564. movq %rdi,%r8
  565. movq %rsi,%r9
  566. movq %rcx,%r10
  567. movq %rbx,%rcx
  568. movq %rbx,%r11
  569. testq $0x0f,%rdi
  570. cmovnzq %rsp,%rdi
  571. testq $0x0f,%rsi
  572. jz L$cfb_inp_aligned
  573. shrq $3,%rcx
  574. .byte 0xf3,0x48,0xa5
  575. subq %rbx,%rdi
  576. movq %rbx,%rcx
  577. movq %rdi,%rsi
  578. L$cfb_inp_aligned:
  579. leaq -16(%rdx),%rax
  580. leaq 16(%rdx),%rbx
  581. shrq $4,%rcx
  582. .byte 0xf3,0x0f,0xa7,224
  583. movdqa (%rax),%xmm0
  584. movdqa %xmm0,-16(%rdx)
  585. movq %r8,%rdi
  586. movq %r11,%rbx
  587. testq $0x0f,%rdi
  588. jz L$cfb_out_aligned
  589. movq %rbx,%rcx
  590. leaq (%rsp),%rsi
  591. shrq $3,%rcx
  592. .byte 0xf3,0x48,0xa5
  593. subq %rbx,%rdi
  594. L$cfb_out_aligned:
  595. movq %r9,%rsi
  596. movq %r10,%rcx
  597. addq %rbx,%rdi
  598. addq %rbx,%rsi
  599. subq %rbx,%rcx
  600. movq $512,%rbx
  601. jnz L$cfb_loop
  602. cmpq %rbp,%rsp
  603. je L$cfb_done
  604. pxor %xmm0,%xmm0
  605. leaq (%rsp),%rax
  606. L$cfb_bzero:
  607. movaps %xmm0,(%rax)
  608. leaq 16(%rax),%rax
  609. cmpq %rax,%rbp
  610. ja L$cfb_bzero
  611. L$cfb_done:
  612. leaq (%rbp),%rsp
  613. jmp L$cfb_exit
  614. .p2align 4
  615. L$cfb_aligned:
  616. leaq -16(%rdx),%rax
  617. leaq 16(%rdx),%rbx
  618. shrq $4,%rcx
  619. .byte 0xf3,0x0f,0xa7,224
  620. movdqa (%rax),%xmm0
  621. movdqa %xmm0,-16(%rdx)
  622. L$cfb_exit:
  623. movl $1,%eax
  624. leaq 8(%rsp),%rsp
  625. L$cfb_abort:
  626. popq %rbx
  627. popq %rbp
  628. .byte 0xf3,0xc3
  629. .globl _padlock_ofb_encrypt
  630. .p2align 4
  631. _padlock_ofb_encrypt:
  632. pushq %rbp
  633. pushq %rbx
  634. xorl %eax,%eax
  635. testq $15,%rdx
  636. jnz L$ofb_abort
  637. testq $15,%rcx
  638. jnz L$ofb_abort
  639. leaq L$padlock_saved_context(%rip),%rax
  640. pushf
  641. cld
  642. call _padlock_verify_ctx
  643. leaq 16(%rdx),%rdx
  644. xorl %eax,%eax
  645. xorl %ebx,%ebx
  646. testl $32,(%rdx)
  647. jnz L$ofb_aligned
  648. testq $0x0f,%rdi
  649. setz %al
  650. testq $0x0f,%rsi
  651. setz %bl
  652. testl %ebx,%eax
  653. jnz L$ofb_aligned
  654. negq %rax
  655. movq $512,%rbx
  656. notq %rax
  657. leaq (%rsp),%rbp
  658. cmpq %rbx,%rcx
  659. cmovcq %rcx,%rbx
  660. andq %rbx,%rax
  661. movq %rcx,%rbx
  662. negq %rax
  663. andq $512-1,%rbx
  664. leaq (%rax,%rbp,1),%rsp
  665. movq $512,%rax
  666. cmovzq %rax,%rbx
  667. jmp L$ofb_loop
  668. .p2align 4
  669. L$ofb_loop:
  670. cmpq %rcx,%rbx
  671. cmovaq %rcx,%rbx
  672. movq %rdi,%r8
  673. movq %rsi,%r9
  674. movq %rcx,%r10
  675. movq %rbx,%rcx
  676. movq %rbx,%r11
  677. testq $0x0f,%rdi
  678. cmovnzq %rsp,%rdi
  679. testq $0x0f,%rsi
  680. jz L$ofb_inp_aligned
  681. shrq $3,%rcx
  682. .byte 0xf3,0x48,0xa5
  683. subq %rbx,%rdi
  684. movq %rbx,%rcx
  685. movq %rdi,%rsi
  686. L$ofb_inp_aligned:
  687. leaq -16(%rdx),%rax
  688. leaq 16(%rdx),%rbx
  689. shrq $4,%rcx
  690. .byte 0xf3,0x0f,0xa7,232
  691. movdqa (%rax),%xmm0
  692. movdqa %xmm0,-16(%rdx)
  693. movq %r8,%rdi
  694. movq %r11,%rbx
  695. testq $0x0f,%rdi
  696. jz L$ofb_out_aligned
  697. movq %rbx,%rcx
  698. leaq (%rsp),%rsi
  699. shrq $3,%rcx
  700. .byte 0xf3,0x48,0xa5
  701. subq %rbx,%rdi
  702. L$ofb_out_aligned:
  703. movq %r9,%rsi
  704. movq %r10,%rcx
  705. addq %rbx,%rdi
  706. addq %rbx,%rsi
  707. subq %rbx,%rcx
  708. movq $512,%rbx
  709. jnz L$ofb_loop
  710. cmpq %rbp,%rsp
  711. je L$ofb_done
  712. pxor %xmm0,%xmm0
  713. leaq (%rsp),%rax
  714. L$ofb_bzero:
  715. movaps %xmm0,(%rax)
  716. leaq 16(%rax),%rax
  717. cmpq %rax,%rbp
  718. ja L$ofb_bzero
  719. L$ofb_done:
  720. leaq (%rbp),%rsp
  721. jmp L$ofb_exit
  722. .p2align 4
  723. L$ofb_aligned:
  724. leaq -16(%rdx),%rax
  725. leaq 16(%rdx),%rbx
  726. shrq $4,%rcx
  727. .byte 0xf3,0x0f,0xa7,232
  728. movdqa (%rax),%xmm0
  729. movdqa %xmm0,-16(%rdx)
  730. L$ofb_exit:
  731. movl $1,%eax
  732. leaq 8(%rsp),%rsp
  733. L$ofb_abort:
  734. popq %rbx
  735. popq %rbp
  736. .byte 0xf3,0xc3
  737. .globl _padlock_ctr32_encrypt
  738. .p2align 4
  739. _padlock_ctr32_encrypt:
  740. pushq %rbp
  741. pushq %rbx
  742. xorl %eax,%eax
  743. testq $15,%rdx
  744. jnz L$ctr32_abort
  745. testq $15,%rcx
  746. jnz L$ctr32_abort
  747. leaq L$padlock_saved_context(%rip),%rax
  748. pushf
  749. cld
  750. call _padlock_verify_ctx
  751. leaq 16(%rdx),%rdx
  752. xorl %eax,%eax
  753. xorl %ebx,%ebx
  754. testl $32,(%rdx)
  755. jnz L$ctr32_aligned
  756. testq $0x0f,%rdi
  757. setz %al
  758. testq $0x0f,%rsi
  759. setz %bl
  760. testl %ebx,%eax
  761. jnz L$ctr32_aligned
  762. negq %rax
  763. movq $512,%rbx
  764. notq %rax
  765. leaq (%rsp),%rbp
  766. cmpq %rbx,%rcx
  767. cmovcq %rcx,%rbx
  768. andq %rbx,%rax
  769. movq %rcx,%rbx
  770. negq %rax
  771. andq $512-1,%rbx
  772. leaq (%rax,%rbp,1),%rsp
  773. movq $512,%rax
  774. cmovzq %rax,%rbx
  775. L$ctr32_reenter:
  776. movl -4(%rdx),%eax
  777. bswapl %eax
  778. negl %eax
  779. andl $31,%eax
  780. movq $512,%rbx
  781. shll $4,%eax
  782. cmovzq %rbx,%rax
  783. cmpq %rax,%rcx
  784. cmovaq %rax,%rbx
  785. cmovbeq %rcx,%rbx
  786. cmpq %rbx,%rcx
  787. ja L$ctr32_loop
  788. movq %rsi,%rax
  789. cmpq %rsp,%rbp
  790. cmoveq %rdi,%rax
  791. addq %rcx,%rax
  792. negq %rax
  793. andq $0xfff,%rax
  794. cmpq $32,%rax
  795. movq $-32,%rax
  796. cmovaeq %rbx,%rax
  797. andq %rax,%rbx
  798. jz L$ctr32_unaligned_tail
  799. jmp L$ctr32_loop
  800. .p2align 4
  801. L$ctr32_loop:
  802. cmpq %rcx,%rbx
  803. cmovaq %rcx,%rbx
  804. movq %rdi,%r8
  805. movq %rsi,%r9
  806. movq %rcx,%r10
  807. movq %rbx,%rcx
  808. movq %rbx,%r11
  809. testq $0x0f,%rdi
  810. cmovnzq %rsp,%rdi
  811. testq $0x0f,%rsi
  812. jz L$ctr32_inp_aligned
  813. shrq $3,%rcx
  814. .byte 0xf3,0x48,0xa5
  815. subq %rbx,%rdi
  816. movq %rbx,%rcx
  817. movq %rdi,%rsi
  818. L$ctr32_inp_aligned:
  819. leaq -16(%rdx),%rax
  820. leaq 16(%rdx),%rbx
  821. shrq $4,%rcx
  822. .byte 0xf3,0x0f,0xa7,216
  823. movl -4(%rdx),%eax
  824. testl $0xffff0000,%eax
  825. jnz L$ctr32_no_carry
  826. bswapl %eax
  827. addl $0x10000,%eax
  828. bswapl %eax
  829. movl %eax,-4(%rdx)
  830. L$ctr32_no_carry:
  831. movq %r8,%rdi
  832. movq %r11,%rbx
  833. testq $0x0f,%rdi
  834. jz L$ctr32_out_aligned
  835. movq %rbx,%rcx
  836. leaq (%rsp),%rsi
  837. shrq $3,%rcx
  838. .byte 0xf3,0x48,0xa5
  839. subq %rbx,%rdi
  840. L$ctr32_out_aligned:
  841. movq %r9,%rsi
  842. movq %r10,%rcx
  843. addq %rbx,%rdi
  844. addq %rbx,%rsi
  845. subq %rbx,%rcx
  846. movq $512,%rbx
  847. jz L$ctr32_break
  848. cmpq %rbx,%rcx
  849. jae L$ctr32_loop
  850. movq %rcx,%rbx
  851. movq %rsi,%rax
  852. cmpq %rsp,%rbp
  853. cmoveq %rdi,%rax
  854. addq %rcx,%rax
  855. negq %rax
  856. andq $0xfff,%rax
  857. cmpq $32,%rax
  858. movq $-32,%rax
  859. cmovaeq %rbx,%rax
  860. andq %rax,%rbx
  861. jnz L$ctr32_loop
  862. L$ctr32_unaligned_tail:
  863. xorl %eax,%eax
  864. cmpq %rsp,%rbp
  865. cmoveq %rcx,%rax
  866. movq %rdi,%r8
  867. movq %rcx,%rbx
  868. subq %rax,%rsp
  869. shrq $3,%rcx
  870. leaq (%rsp),%rdi
  871. .byte 0xf3,0x48,0xa5
  872. movq %rsp,%rsi
  873. movq %r8,%rdi
  874. movq %rbx,%rcx
  875. jmp L$ctr32_loop
  876. .p2align 4
  877. L$ctr32_break:
  878. cmpq %rbp,%rsp
  879. je L$ctr32_done
  880. pxor %xmm0,%xmm0
  881. leaq (%rsp),%rax
  882. L$ctr32_bzero:
  883. movaps %xmm0,(%rax)
  884. leaq 16(%rax),%rax
  885. cmpq %rax,%rbp
  886. ja L$ctr32_bzero
  887. L$ctr32_done:
  888. leaq (%rbp),%rsp
  889. jmp L$ctr32_exit
  890. .p2align 4
  891. L$ctr32_aligned:
  892. movl -4(%rdx),%eax
  893. bswapl %eax
  894. negl %eax
  895. andl $0xffff,%eax
  896. movq $1048576,%rbx
  897. shll $4,%eax
  898. cmovzq %rbx,%rax
  899. cmpq %rax,%rcx
  900. cmovaq %rax,%rbx
  901. cmovbeq %rcx,%rbx
  902. jbe L$ctr32_aligned_skip
  903. L$ctr32_aligned_loop:
  904. movq %rcx,%r10
  905. movq %rbx,%rcx
  906. movq %rbx,%r11
  907. leaq -16(%rdx),%rax
  908. leaq 16(%rdx),%rbx
  909. shrq $4,%rcx
  910. .byte 0xf3,0x0f,0xa7,216
  911. movl -4(%rdx),%eax
  912. bswapl %eax
  913. addl $0x10000,%eax
  914. bswapl %eax
  915. movl %eax,-4(%rdx)
  916. movq %r10,%rcx
  917. subq %r11,%rcx
  918. movq $1048576,%rbx
  919. jz L$ctr32_exit
  920. cmpq %rbx,%rcx
  921. jae L$ctr32_aligned_loop
  922. L$ctr32_aligned_skip:
  923. leaq (%rsi,%rcx,1),%rbp
  924. negq %rbp
  925. andq $0xfff,%rbp
  926. xorl %eax,%eax
  927. cmpq $32,%rbp
  928. movq $32-1,%rbp
  929. cmovaeq %rax,%rbp
  930. andq %rcx,%rbp
  931. subq %rbp,%rcx
  932. jz L$ctr32_aligned_tail
  933. leaq -16(%rdx),%rax
  934. leaq 16(%rdx),%rbx
  935. shrq $4,%rcx
  936. .byte 0xf3,0x0f,0xa7,216
  937. testq %rbp,%rbp
  938. jz L$ctr32_exit
  939. L$ctr32_aligned_tail:
  940. movq %rdi,%r8
  941. movq %rbp,%rbx
  942. movq %rbp,%rcx
  943. leaq (%rsp),%rbp
  944. subq %rcx,%rsp
  945. shrq $3,%rcx
  946. leaq (%rsp),%rdi
  947. .byte 0xf3,0x48,0xa5
  948. leaq (%r8),%rdi
  949. leaq (%rsp),%rsi
  950. movq %rbx,%rcx
  951. jmp L$ctr32_loop
  952. L$ctr32_exit:
  953. movl $1,%eax
  954. leaq 8(%rsp),%rsp
  955. L$ctr32_abort:
  956. popq %rbx
  957. popq %rbp
  958. .byte 0xf3,0xc3
  959. .byte 86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  960. .p2align 4
  961. .data
  962. .p2align 3
  963. L$padlock_saved_context:
  964. .quad 0