bn-586.s 23 KB


  1. .text
  2. .globl bn_mul_add_words
  3. .type bn_mul_add_words,@function
  4. .align 16
  5. bn_mul_add_words:
  6. .L_bn_mul_add_words_begin:
  7. call .L000PIC_me_up
  8. .L000PIC_me_up:
  9. popl %eax
  10. leal OPENSSL_ia32cap_P-.L000PIC_me_up(%eax),%eax
  11. btl $26,(%eax)
  12. jnc .L001maw_non_sse2
  13. movl 4(%esp),%eax
  14. movl 8(%esp),%edx
  15. movl 12(%esp),%ecx
  16. movd 16(%esp),%mm0
  17. pxor %mm1,%mm1
  18. jmp .L002maw_sse2_entry
  19. .align 16
  20. .L003maw_sse2_unrolled:
  21. movd (%eax),%mm3
  22. paddq %mm3,%mm1
  23. movd (%edx),%mm2
  24. pmuludq %mm0,%mm2
  25. movd 4(%edx),%mm4
  26. pmuludq %mm0,%mm4
  27. movd 8(%edx),%mm6
  28. pmuludq %mm0,%mm6
  29. movd 12(%edx),%mm7
  30. pmuludq %mm0,%mm7
  31. paddq %mm2,%mm1
  32. movd 4(%eax),%mm3
  33. paddq %mm4,%mm3
  34. movd 8(%eax),%mm5
  35. paddq %mm6,%mm5
  36. movd 12(%eax),%mm4
  37. paddq %mm4,%mm7
  38. movd %mm1,(%eax)
  39. movd 16(%edx),%mm2
  40. pmuludq %mm0,%mm2
  41. psrlq $32,%mm1
  42. movd 20(%edx),%mm4
  43. pmuludq %mm0,%mm4
  44. paddq %mm3,%mm1
  45. movd 24(%edx),%mm6
  46. pmuludq %mm0,%mm6
  47. movd %mm1,4(%eax)
  48. psrlq $32,%mm1
  49. movd 28(%edx),%mm3
  50. addl $32,%edx
  51. pmuludq %mm0,%mm3
  52. paddq %mm5,%mm1
  53. movd 16(%eax),%mm5
  54. paddq %mm5,%mm2
  55. movd %mm1,8(%eax)
  56. psrlq $32,%mm1
  57. paddq %mm7,%mm1
  58. movd 20(%eax),%mm5
  59. paddq %mm5,%mm4
  60. movd %mm1,12(%eax)
  61. psrlq $32,%mm1
  62. paddq %mm2,%mm1
  63. movd 24(%eax),%mm5
  64. paddq %mm5,%mm6
  65. movd %mm1,16(%eax)
  66. psrlq $32,%mm1
  67. paddq %mm4,%mm1
  68. movd 28(%eax),%mm5
  69. paddq %mm5,%mm3
  70. movd %mm1,20(%eax)
  71. psrlq $32,%mm1
  72. paddq %mm6,%mm1
  73. movd %mm1,24(%eax)
  74. psrlq $32,%mm1
  75. paddq %mm3,%mm1
  76. movd %mm1,28(%eax)
  77. leal 32(%eax),%eax
  78. psrlq $32,%mm1
  79. subl $8,%ecx
  80. jz .L004maw_sse2_exit
  81. .L002maw_sse2_entry:
  82. testl $4294967288,%ecx
  83. jnz .L003maw_sse2_unrolled
  84. .align 4
  85. .L005maw_sse2_loop:
  86. movd (%edx),%mm2
  87. movd (%eax),%mm3
  88. pmuludq %mm0,%mm2
  89. leal 4(%edx),%edx
  90. paddq %mm3,%mm1
  91. paddq %mm2,%mm1
  92. movd %mm1,(%eax)
  93. subl $1,%ecx
  94. psrlq $32,%mm1
  95. leal 4(%eax),%eax
  96. jnz .L005maw_sse2_loop
  97. .L004maw_sse2_exit:
  98. movd %mm1,%eax
  99. emms
  100. ret
  101. .align 16
  102. .L001maw_non_sse2:
  103. pushl %ebp
  104. pushl %ebx
  105. pushl %esi
  106. pushl %edi
  107. xorl %esi,%esi
  108. movl 20(%esp),%edi
  109. movl 28(%esp),%ecx
  110. movl 24(%esp),%ebx
  111. andl $4294967288,%ecx
  112. movl 32(%esp),%ebp
  113. pushl %ecx
  114. jz .L006maw_finish
  115. .align 16
  116. .L007maw_loop:
  117. movl (%ebx),%eax
  118. mull %ebp
  119. addl %esi,%eax
  120. adcl $0,%edx
  121. addl (%edi),%eax
  122. adcl $0,%edx
  123. movl %eax,(%edi)
  124. movl %edx,%esi
  125. movl 4(%ebx),%eax
  126. mull %ebp
  127. addl %esi,%eax
  128. adcl $0,%edx
  129. addl 4(%edi),%eax
  130. adcl $0,%edx
  131. movl %eax,4(%edi)
  132. movl %edx,%esi
  133. movl 8(%ebx),%eax
  134. mull %ebp
  135. addl %esi,%eax
  136. adcl $0,%edx
  137. addl 8(%edi),%eax
  138. adcl $0,%edx
  139. movl %eax,8(%edi)
  140. movl %edx,%esi
  141. movl 12(%ebx),%eax
  142. mull %ebp
  143. addl %esi,%eax
  144. adcl $0,%edx
  145. addl 12(%edi),%eax
  146. adcl $0,%edx
  147. movl %eax,12(%edi)
  148. movl %edx,%esi
  149. movl 16(%ebx),%eax
  150. mull %ebp
  151. addl %esi,%eax
  152. adcl $0,%edx
  153. addl 16(%edi),%eax
  154. adcl $0,%edx
  155. movl %eax,16(%edi)
  156. movl %edx,%esi
  157. movl 20(%ebx),%eax
  158. mull %ebp
  159. addl %esi,%eax
  160. adcl $0,%edx
  161. addl 20(%edi),%eax
  162. adcl $0,%edx
  163. movl %eax,20(%edi)
  164. movl %edx,%esi
  165. movl 24(%ebx),%eax
  166. mull %ebp
  167. addl %esi,%eax
  168. adcl $0,%edx
  169. addl 24(%edi),%eax
  170. adcl $0,%edx
  171. movl %eax,24(%edi)
  172. movl %edx,%esi
  173. movl 28(%ebx),%eax
  174. mull %ebp
  175. addl %esi,%eax
  176. adcl $0,%edx
  177. addl 28(%edi),%eax
  178. adcl $0,%edx
  179. movl %eax,28(%edi)
  180. movl %edx,%esi
  181. subl $8,%ecx
  182. leal 32(%ebx),%ebx
  183. leal 32(%edi),%edi
  184. jnz .L007maw_loop
  185. .L006maw_finish:
  186. movl 32(%esp),%ecx
  187. andl $7,%ecx
  188. jnz .L008maw_finish2
  189. jmp .L009maw_end
  190. .L008maw_finish2:
  191. movl (%ebx),%eax
  192. mull %ebp
  193. addl %esi,%eax
  194. adcl $0,%edx
  195. addl (%edi),%eax
  196. adcl $0,%edx
  197. decl %ecx
  198. movl %eax,(%edi)
  199. movl %edx,%esi
  200. jz .L009maw_end
  201. movl 4(%ebx),%eax
  202. mull %ebp
  203. addl %esi,%eax
  204. adcl $0,%edx
  205. addl 4(%edi),%eax
  206. adcl $0,%edx
  207. decl %ecx
  208. movl %eax,4(%edi)
  209. movl %edx,%esi
  210. jz .L009maw_end
  211. movl 8(%ebx),%eax
  212. mull %ebp
  213. addl %esi,%eax
  214. adcl $0,%edx
  215. addl 8(%edi),%eax
  216. adcl $0,%edx
  217. decl %ecx
  218. movl %eax,8(%edi)
  219. movl %edx,%esi
  220. jz .L009maw_end
  221. movl 12(%ebx),%eax
  222. mull %ebp
  223. addl %esi,%eax
  224. adcl $0,%edx
  225. addl 12(%edi),%eax
  226. adcl $0,%edx
  227. decl %ecx
  228. movl %eax,12(%edi)
  229. movl %edx,%esi
  230. jz .L009maw_end
  231. movl 16(%ebx),%eax
  232. mull %ebp
  233. addl %esi,%eax
  234. adcl $0,%edx
  235. addl 16(%edi),%eax
  236. adcl $0,%edx
  237. decl %ecx
  238. movl %eax,16(%edi)
  239. movl %edx,%esi
  240. jz .L009maw_end
  241. movl 20(%ebx),%eax
  242. mull %ebp
  243. addl %esi,%eax
  244. adcl $0,%edx
  245. addl 20(%edi),%eax
  246. adcl $0,%edx
  247. decl %ecx
  248. movl %eax,20(%edi)
  249. movl %edx,%esi
  250. jz .L009maw_end
  251. movl 24(%ebx),%eax
  252. mull %ebp
  253. addl %esi,%eax
  254. adcl $0,%edx
  255. addl 24(%edi),%eax
  256. adcl $0,%edx
  257. movl %eax,24(%edi)
  258. movl %edx,%esi
  259. .L009maw_end:
  260. movl %esi,%eax
  261. popl %ecx
  262. popl %edi
  263. popl %esi
  264. popl %ebx
  265. popl %ebp
  266. ret
  267. .size bn_mul_add_words,.-.L_bn_mul_add_words_begin
  268. .globl bn_mul_words
  269. .type bn_mul_words,@function
  270. .align 16
  271. bn_mul_words:
  272. .L_bn_mul_words_begin:
  273. call .L010PIC_me_up
  274. .L010PIC_me_up:
  275. popl %eax
  276. leal OPENSSL_ia32cap_P-.L010PIC_me_up(%eax),%eax
  277. btl $26,(%eax)
  278. jnc .L011mw_non_sse2
  279. movl 4(%esp),%eax
  280. movl 8(%esp),%edx
  281. movl 12(%esp),%ecx
  282. movd 16(%esp),%mm0
  283. pxor %mm1,%mm1
  284. .align 16
  285. .L012mw_sse2_loop:
  286. movd (%edx),%mm2
  287. pmuludq %mm0,%mm2
  288. leal 4(%edx),%edx
  289. paddq %mm2,%mm1
  290. movd %mm1,(%eax)
  291. subl $1,%ecx
  292. psrlq $32,%mm1
  293. leal 4(%eax),%eax
  294. jnz .L012mw_sse2_loop
  295. movd %mm1,%eax
  296. emms
  297. ret
  298. .align 16
  299. .L011mw_non_sse2:
  300. pushl %ebp
  301. pushl %ebx
  302. pushl %esi
  303. pushl %edi
  304. xorl %esi,%esi
  305. movl 20(%esp),%edi
  306. movl 24(%esp),%ebx
  307. movl 28(%esp),%ebp
  308. movl 32(%esp),%ecx
  309. andl $4294967288,%ebp
  310. jz .L013mw_finish
  311. .L014mw_loop:
  312. movl (%ebx),%eax
  313. mull %ecx
  314. addl %esi,%eax
  315. adcl $0,%edx
  316. movl %eax,(%edi)
  317. movl %edx,%esi
  318. movl 4(%ebx),%eax
  319. mull %ecx
  320. addl %esi,%eax
  321. adcl $0,%edx
  322. movl %eax,4(%edi)
  323. movl %edx,%esi
  324. movl 8(%ebx),%eax
  325. mull %ecx
  326. addl %esi,%eax
  327. adcl $0,%edx
  328. movl %eax,8(%edi)
  329. movl %edx,%esi
  330. movl 12(%ebx),%eax
  331. mull %ecx
  332. addl %esi,%eax
  333. adcl $0,%edx
  334. movl %eax,12(%edi)
  335. movl %edx,%esi
  336. movl 16(%ebx),%eax
  337. mull %ecx
  338. addl %esi,%eax
  339. adcl $0,%edx
  340. movl %eax,16(%edi)
  341. movl %edx,%esi
  342. movl 20(%ebx),%eax
  343. mull %ecx
  344. addl %esi,%eax
  345. adcl $0,%edx
  346. movl %eax,20(%edi)
  347. movl %edx,%esi
  348. movl 24(%ebx),%eax
  349. mull %ecx
  350. addl %esi,%eax
  351. adcl $0,%edx
  352. movl %eax,24(%edi)
  353. movl %edx,%esi
  354. movl 28(%ebx),%eax
  355. mull %ecx
  356. addl %esi,%eax
  357. adcl $0,%edx
  358. movl %eax,28(%edi)
  359. movl %edx,%esi
  360. addl $32,%ebx
  361. addl $32,%edi
  362. subl $8,%ebp
  363. jz .L013mw_finish
  364. jmp .L014mw_loop
  365. .L013mw_finish:
  366. movl 28(%esp),%ebp
  367. andl $7,%ebp
  368. jnz .L015mw_finish2
  369. jmp .L016mw_end
  370. .L015mw_finish2:
  371. movl (%ebx),%eax
  372. mull %ecx
  373. addl %esi,%eax
  374. adcl $0,%edx
  375. movl %eax,(%edi)
  376. movl %edx,%esi
  377. decl %ebp
  378. jz .L016mw_end
  379. movl 4(%ebx),%eax
  380. mull %ecx
  381. addl %esi,%eax
  382. adcl $0,%edx
  383. movl %eax,4(%edi)
  384. movl %edx,%esi
  385. decl %ebp
  386. jz .L016mw_end
  387. movl 8(%ebx),%eax
  388. mull %ecx
  389. addl %esi,%eax
  390. adcl $0,%edx
  391. movl %eax,8(%edi)
  392. movl %edx,%esi
  393. decl %ebp
  394. jz .L016mw_end
  395. movl 12(%ebx),%eax
  396. mull %ecx
  397. addl %esi,%eax
  398. adcl $0,%edx
  399. movl %eax,12(%edi)
  400. movl %edx,%esi
  401. decl %ebp
  402. jz .L016mw_end
  403. movl 16(%ebx),%eax
  404. mull %ecx
  405. addl %esi,%eax
  406. adcl $0,%edx
  407. movl %eax,16(%edi)
  408. movl %edx,%esi
  409. decl %ebp
  410. jz .L016mw_end
  411. movl 20(%ebx),%eax
  412. mull %ecx
  413. addl %esi,%eax
  414. adcl $0,%edx
  415. movl %eax,20(%edi)
  416. movl %edx,%esi
  417. decl %ebp
  418. jz .L016mw_end
  419. movl 24(%ebx),%eax
  420. mull %ecx
  421. addl %esi,%eax
  422. adcl $0,%edx
  423. movl %eax,24(%edi)
  424. movl %edx,%esi
  425. .L016mw_end:
  426. movl %esi,%eax
  427. popl %edi
  428. popl %esi
  429. popl %ebx
  430. popl %ebp
  431. ret
  432. .size bn_mul_words,.-.L_bn_mul_words_begin
  433. .globl bn_sqr_words
  434. .type bn_sqr_words,@function
  435. .align 16
  436. bn_sqr_words:
  437. .L_bn_sqr_words_begin:
  438. call .L017PIC_me_up
  439. .L017PIC_me_up:
  440. popl %eax
  441. leal OPENSSL_ia32cap_P-.L017PIC_me_up(%eax),%eax
  442. btl $26,(%eax)
  443. jnc .L018sqr_non_sse2
  444. movl 4(%esp),%eax
  445. movl 8(%esp),%edx
  446. movl 12(%esp),%ecx
  447. .align 16
  448. .L019sqr_sse2_loop:
  449. movd (%edx),%mm0
  450. pmuludq %mm0,%mm0
  451. leal 4(%edx),%edx
  452. movq %mm0,(%eax)
  453. subl $1,%ecx
  454. leal 8(%eax),%eax
  455. jnz .L019sqr_sse2_loop
  456. emms
  457. ret
  458. .align 16
  459. .L018sqr_non_sse2:
  460. pushl %ebp
  461. pushl %ebx
  462. pushl %esi
  463. pushl %edi
  464. movl 20(%esp),%esi
  465. movl 24(%esp),%edi
  466. movl 28(%esp),%ebx
  467. andl $4294967288,%ebx
  468. jz .L020sw_finish
  469. .L021sw_loop:
  470. movl (%edi),%eax
  471. mull %eax
  472. movl %eax,(%esi)
  473. movl %edx,4(%esi)
  474. movl 4(%edi),%eax
  475. mull %eax
  476. movl %eax,8(%esi)
  477. movl %edx,12(%esi)
  478. movl 8(%edi),%eax
  479. mull %eax
  480. movl %eax,16(%esi)
  481. movl %edx,20(%esi)
  482. movl 12(%edi),%eax
  483. mull %eax
  484. movl %eax,24(%esi)
  485. movl %edx,28(%esi)
  486. movl 16(%edi),%eax
  487. mull %eax
  488. movl %eax,32(%esi)
  489. movl %edx,36(%esi)
  490. movl 20(%edi),%eax
  491. mull %eax
  492. movl %eax,40(%esi)
  493. movl %edx,44(%esi)
  494. movl 24(%edi),%eax
  495. mull %eax
  496. movl %eax,48(%esi)
  497. movl %edx,52(%esi)
  498. movl 28(%edi),%eax
  499. mull %eax
  500. movl %eax,56(%esi)
  501. movl %edx,60(%esi)
  502. addl $32,%edi
  503. addl $64,%esi
  504. subl $8,%ebx
  505. jnz .L021sw_loop
  506. .L020sw_finish:
  507. movl 28(%esp),%ebx
  508. andl $7,%ebx
  509. jz .L022sw_end
  510. movl (%edi),%eax
  511. mull %eax
  512. movl %eax,(%esi)
  513. decl %ebx
  514. movl %edx,4(%esi)
  515. jz .L022sw_end
  516. movl 4(%edi),%eax
  517. mull %eax
  518. movl %eax,8(%esi)
  519. decl %ebx
  520. movl %edx,12(%esi)
  521. jz .L022sw_end
  522. movl 8(%edi),%eax
  523. mull %eax
  524. movl %eax,16(%esi)
  525. decl %ebx
  526. movl %edx,20(%esi)
  527. jz .L022sw_end
  528. movl 12(%edi),%eax
  529. mull %eax
  530. movl %eax,24(%esi)
  531. decl %ebx
  532. movl %edx,28(%esi)
  533. jz .L022sw_end
  534. movl 16(%edi),%eax
  535. mull %eax
  536. movl %eax,32(%esi)
  537. decl %ebx
  538. movl %edx,36(%esi)
  539. jz .L022sw_end
  540. movl 20(%edi),%eax
  541. mull %eax
  542. movl %eax,40(%esi)
  543. decl %ebx
  544. movl %edx,44(%esi)
  545. jz .L022sw_end
  546. movl 24(%edi),%eax
  547. mull %eax
  548. movl %eax,48(%esi)
  549. movl %edx,52(%esi)
  550. .L022sw_end:
  551. popl %edi
  552. popl %esi
  553. popl %ebx
  554. popl %ebp
  555. ret
  556. .size bn_sqr_words,.-.L_bn_sqr_words_begin
  557. .globl bn_div_words
  558. .type bn_div_words,@function
  559. .align 16
  560. bn_div_words:
  561. .L_bn_div_words_begin:
  562. movl 4(%esp),%edx
  563. movl 8(%esp),%eax
  564. movl 12(%esp),%ecx
  565. divl %ecx
  566. ret
  567. .size bn_div_words,.-.L_bn_div_words_begin
  568. .globl bn_add_words
  569. .type bn_add_words,@function
  570. .align 16
  571. bn_add_words:
  572. .L_bn_add_words_begin:
  573. pushl %ebp
  574. pushl %ebx
  575. pushl %esi
  576. pushl %edi
  577. movl 20(%esp),%ebx
  578. movl 24(%esp),%esi
  579. movl 28(%esp),%edi
  580. movl 32(%esp),%ebp
  581. xorl %eax,%eax
  582. andl $4294967288,%ebp
  583. jz .L023aw_finish
  584. .L024aw_loop:
  585. movl (%esi),%ecx
  586. movl (%edi),%edx
  587. addl %eax,%ecx
  588. movl $0,%eax
  589. adcl %eax,%eax
  590. addl %edx,%ecx
  591. adcl $0,%eax
  592. movl %ecx,(%ebx)
  593. movl 4(%esi),%ecx
  594. movl 4(%edi),%edx
  595. addl %eax,%ecx
  596. movl $0,%eax
  597. adcl %eax,%eax
  598. addl %edx,%ecx
  599. adcl $0,%eax
  600. movl %ecx,4(%ebx)
  601. movl 8(%esi),%ecx
  602. movl 8(%edi),%edx
  603. addl %eax,%ecx
  604. movl $0,%eax
  605. adcl %eax,%eax
  606. addl %edx,%ecx
  607. adcl $0,%eax
  608. movl %ecx,8(%ebx)
  609. movl 12(%esi),%ecx
  610. movl 12(%edi),%edx
  611. addl %eax,%ecx
  612. movl $0,%eax
  613. adcl %eax,%eax
  614. addl %edx,%ecx
  615. adcl $0,%eax
  616. movl %ecx,12(%ebx)
  617. movl 16(%esi),%ecx
  618. movl 16(%edi),%edx
  619. addl %eax,%ecx
  620. movl $0,%eax
  621. adcl %eax,%eax
  622. addl %edx,%ecx
  623. adcl $0,%eax
  624. movl %ecx,16(%ebx)
  625. movl 20(%esi),%ecx
  626. movl 20(%edi),%edx
  627. addl %eax,%ecx
  628. movl $0,%eax
  629. adcl %eax,%eax
  630. addl %edx,%ecx
  631. adcl $0,%eax
  632. movl %ecx,20(%ebx)
  633. movl 24(%esi),%ecx
  634. movl 24(%edi),%edx
  635. addl %eax,%ecx
  636. movl $0,%eax
  637. adcl %eax,%eax
  638. addl %edx,%ecx
  639. adcl $0,%eax
  640. movl %ecx,24(%ebx)
  641. movl 28(%esi),%ecx
  642. movl 28(%edi),%edx
  643. addl %eax,%ecx
  644. movl $0,%eax
  645. adcl %eax,%eax
  646. addl %edx,%ecx
  647. adcl $0,%eax
  648. movl %ecx,28(%ebx)
  649. addl $32,%esi
  650. addl $32,%edi
  651. addl $32,%ebx
  652. subl $8,%ebp
  653. jnz .L024aw_loop
  654. .L023aw_finish:
  655. movl 32(%esp),%ebp
  656. andl $7,%ebp
  657. jz .L025aw_end
  658. movl (%esi),%ecx
  659. movl (%edi),%edx
  660. addl %eax,%ecx
  661. movl $0,%eax
  662. adcl %eax,%eax
  663. addl %edx,%ecx
  664. adcl $0,%eax
  665. decl %ebp
  666. movl %ecx,(%ebx)
  667. jz .L025aw_end
  668. movl 4(%esi),%ecx
  669. movl 4(%edi),%edx
  670. addl %eax,%ecx
  671. movl $0,%eax
  672. adcl %eax,%eax
  673. addl %edx,%ecx
  674. adcl $0,%eax
  675. decl %ebp
  676. movl %ecx,4(%ebx)
  677. jz .L025aw_end
  678. movl 8(%esi),%ecx
  679. movl 8(%edi),%edx
  680. addl %eax,%ecx
  681. movl $0,%eax
  682. adcl %eax,%eax
  683. addl %edx,%ecx
  684. adcl $0,%eax
  685. decl %ebp
  686. movl %ecx,8(%ebx)
  687. jz .L025aw_end
  688. movl 12(%esi),%ecx
  689. movl 12(%edi),%edx
  690. addl %eax,%ecx
  691. movl $0,%eax
  692. adcl %eax,%eax
  693. addl %edx,%ecx
  694. adcl $0,%eax
  695. decl %ebp
  696. movl %ecx,12(%ebx)
  697. jz .L025aw_end
  698. movl 16(%esi),%ecx
  699. movl 16(%edi),%edx
  700. addl %eax,%ecx
  701. movl $0,%eax
  702. adcl %eax,%eax
  703. addl %edx,%ecx
  704. adcl $0,%eax
  705. decl %ebp
  706. movl %ecx,16(%ebx)
  707. jz .L025aw_end
  708. movl 20(%esi),%ecx
  709. movl 20(%edi),%edx
  710. addl %eax,%ecx
  711. movl $0,%eax
  712. adcl %eax,%eax
  713. addl %edx,%ecx
  714. adcl $0,%eax
  715. decl %ebp
  716. movl %ecx,20(%ebx)
  717. jz .L025aw_end
  718. movl 24(%esi),%ecx
  719. movl 24(%edi),%edx
  720. addl %eax,%ecx
  721. movl $0,%eax
  722. adcl %eax,%eax
  723. addl %edx,%ecx
  724. adcl $0,%eax
  725. movl %ecx,24(%ebx)
  726. .L025aw_end:
  727. popl %edi
  728. popl %esi
  729. popl %ebx
  730. popl %ebp
  731. ret
  732. .size bn_add_words,.-.L_bn_add_words_begin
  733. .globl bn_sub_words
  734. .type bn_sub_words,@function
  735. .align 16
  736. bn_sub_words:
  737. .L_bn_sub_words_begin:
  738. pushl %ebp
  739. pushl %ebx
  740. pushl %esi
  741. pushl %edi
  742. movl 20(%esp),%ebx
  743. movl 24(%esp),%esi
  744. movl 28(%esp),%edi
  745. movl 32(%esp),%ebp
  746. xorl %eax,%eax
  747. andl $4294967288,%ebp
  748. jz .L026aw_finish
  749. .L027aw_loop:
  750. movl (%esi),%ecx
  751. movl (%edi),%edx
  752. subl %eax,%ecx
  753. movl $0,%eax
  754. adcl %eax,%eax
  755. subl %edx,%ecx
  756. adcl $0,%eax
  757. movl %ecx,(%ebx)
  758. movl 4(%esi),%ecx
  759. movl 4(%edi),%edx
  760. subl %eax,%ecx
  761. movl $0,%eax
  762. adcl %eax,%eax
  763. subl %edx,%ecx
  764. adcl $0,%eax
  765. movl %ecx,4(%ebx)
  766. movl 8(%esi),%ecx
  767. movl 8(%edi),%edx
  768. subl %eax,%ecx
  769. movl $0,%eax
  770. adcl %eax,%eax
  771. subl %edx,%ecx
  772. adcl $0,%eax
  773. movl %ecx,8(%ebx)
  774. movl 12(%esi),%ecx
  775. movl 12(%edi),%edx
  776. subl %eax,%ecx
  777. movl $0,%eax
  778. adcl %eax,%eax
  779. subl %edx,%ecx
  780. adcl $0,%eax
  781. movl %ecx,12(%ebx)
  782. movl 16(%esi),%ecx
  783. movl 16(%edi),%edx
  784. subl %eax,%ecx
  785. movl $0,%eax
  786. adcl %eax,%eax
  787. subl %edx,%ecx
  788. adcl $0,%eax
  789. movl %ecx,16(%ebx)
  790. movl 20(%esi),%ecx
  791. movl 20(%edi),%edx
  792. subl %eax,%ecx
  793. movl $0,%eax
  794. adcl %eax,%eax
  795. subl %edx,%ecx
  796. adcl $0,%eax
  797. movl %ecx,20(%ebx)
  798. movl 24(%esi),%ecx
  799. movl 24(%edi),%edx
  800. subl %eax,%ecx
  801. movl $0,%eax
  802. adcl %eax,%eax
  803. subl %edx,%ecx
  804. adcl $0,%eax
  805. movl %ecx,24(%ebx)
  806. movl 28(%esi),%ecx
  807. movl 28(%edi),%edx
  808. subl %eax,%ecx
  809. movl $0,%eax
  810. adcl %eax,%eax
  811. subl %edx,%ecx
  812. adcl $0,%eax
  813. movl %ecx,28(%ebx)
  814. addl $32,%esi
  815. addl $32,%edi
  816. addl $32,%ebx
  817. subl $8,%ebp
  818. jnz .L027aw_loop
  819. .L026aw_finish:
  820. movl 32(%esp),%ebp
  821. andl $7,%ebp
  822. jz .L028aw_end
  823. movl (%esi),%ecx
  824. movl (%edi),%edx
  825. subl %eax,%ecx
  826. movl $0,%eax
  827. adcl %eax,%eax
  828. subl %edx,%ecx
  829. adcl $0,%eax
  830. decl %ebp
  831. movl %ecx,(%ebx)
  832. jz .L028aw_end
  833. movl 4(%esi),%ecx
  834. movl 4(%edi),%edx
  835. subl %eax,%ecx
  836. movl $0,%eax
  837. adcl %eax,%eax
  838. subl %edx,%ecx
  839. adcl $0,%eax
  840. decl %ebp
  841. movl %ecx,4(%ebx)
  842. jz .L028aw_end
  843. movl 8(%esi),%ecx
  844. movl 8(%edi),%edx
  845. subl %eax,%ecx
  846. movl $0,%eax
  847. adcl %eax,%eax
  848. subl %edx,%ecx
  849. adcl $0,%eax
  850. decl %ebp
  851. movl %ecx,8(%ebx)
  852. jz .L028aw_end
  853. movl 12(%esi),%ecx
  854. movl 12(%edi),%edx
  855. subl %eax,%ecx
  856. movl $0,%eax
  857. adcl %eax,%eax
  858. subl %edx,%ecx
  859. adcl $0,%eax
  860. decl %ebp
  861. movl %ecx,12(%ebx)
  862. jz .L028aw_end
  863. movl 16(%esi),%ecx
  864. movl 16(%edi),%edx
  865. subl %eax,%ecx
  866. movl $0,%eax
  867. adcl %eax,%eax
  868. subl %edx,%ecx
  869. adcl $0,%eax
  870. decl %ebp
  871. movl %ecx,16(%ebx)
  872. jz .L028aw_end
  873. movl 20(%esi),%ecx
  874. movl 20(%edi),%edx
  875. subl %eax,%ecx
  876. movl $0,%eax
  877. adcl %eax,%eax
  878. subl %edx,%ecx
  879. adcl $0,%eax
  880. decl %ebp
  881. movl %ecx,20(%ebx)
  882. jz .L028aw_end
  883. movl 24(%esi),%ecx
  884. movl 24(%edi),%edx
  885. subl %eax,%ecx
  886. movl $0,%eax
  887. adcl %eax,%eax
  888. subl %edx,%ecx
  889. adcl $0,%eax
  890. movl %ecx,24(%ebx)
  891. .L028aw_end:
  892. popl %edi
  893. popl %esi
  894. popl %ebx
  895. popl %ebp
  896. ret
  897. .size bn_sub_words,.-.L_bn_sub_words_begin
  898. .globl bn_sub_part_words
  899. .type bn_sub_part_words,@function
  900. .align 16
  901. bn_sub_part_words:
  902. .L_bn_sub_part_words_begin:
  903. pushl %ebp
  904. pushl %ebx
  905. pushl %esi
  906. pushl %edi
  907. movl 20(%esp),%ebx
  908. movl 24(%esp),%esi
  909. movl 28(%esp),%edi
  910. movl 32(%esp),%ebp
  911. xorl %eax,%eax
  912. andl $4294967288,%ebp
  913. jz .L029aw_finish
  914. .L030aw_loop:
  915. movl (%esi),%ecx
  916. movl (%edi),%edx
  917. subl %eax,%ecx
  918. movl $0,%eax
  919. adcl %eax,%eax
  920. subl %edx,%ecx
  921. adcl $0,%eax
  922. movl %ecx,(%ebx)
  923. movl 4(%esi),%ecx
  924. movl 4(%edi),%edx
  925. subl %eax,%ecx
  926. movl $0,%eax
  927. adcl %eax,%eax
  928. subl %edx,%ecx
  929. adcl $0,%eax
  930. movl %ecx,4(%ebx)
  931. movl 8(%esi),%ecx
  932. movl 8(%edi),%edx
  933. subl %eax,%ecx
  934. movl $0,%eax
  935. adcl %eax,%eax
  936. subl %edx,%ecx
  937. adcl $0,%eax
  938. movl %ecx,8(%ebx)
  939. movl 12(%esi),%ecx
  940. movl 12(%edi),%edx
  941. subl %eax,%ecx
  942. movl $0,%eax
  943. adcl %eax,%eax
  944. subl %edx,%ecx
  945. adcl $0,%eax
  946. movl %ecx,12(%ebx)
  947. movl 16(%esi),%ecx
  948. movl 16(%edi),%edx
  949. subl %eax,%ecx
  950. movl $0,%eax
  951. adcl %eax,%eax
  952. subl %edx,%ecx
  953. adcl $0,%eax
  954. movl %ecx,16(%ebx)
  955. movl 20(%esi),%ecx
  956. movl 20(%edi),%edx
  957. subl %eax,%ecx
  958. movl $0,%eax
  959. adcl %eax,%eax
  960. subl %edx,%ecx
  961. adcl $0,%eax
  962. movl %ecx,20(%ebx)
  963. movl 24(%esi),%ecx
  964. movl 24(%edi),%edx
  965. subl %eax,%ecx
  966. movl $0,%eax
  967. adcl %eax,%eax
  968. subl %edx,%ecx
  969. adcl $0,%eax
  970. movl %ecx,24(%ebx)
  971. movl 28(%esi),%ecx
  972. movl 28(%edi),%edx
  973. subl %eax,%ecx
  974. movl $0,%eax
  975. adcl %eax,%eax
  976. subl %edx,%ecx
  977. adcl $0,%eax
  978. movl %ecx,28(%ebx)
  979. addl $32,%esi
  980. addl $32,%edi
  981. addl $32,%ebx
  982. subl $8,%ebp
  983. jnz .L030aw_loop
  984. .L029aw_finish:
  985. movl 32(%esp),%ebp
  986. andl $7,%ebp
  987. jz .L031aw_end
  988. movl (%esi),%ecx
  989. movl (%edi),%edx
  990. subl %eax,%ecx
  991. movl $0,%eax
  992. adcl %eax,%eax
  993. subl %edx,%ecx
  994. adcl $0,%eax
  995. movl %ecx,(%ebx)
  996. addl $4,%esi
  997. addl $4,%edi
  998. addl $4,%ebx
  999. decl %ebp
  1000. jz .L031aw_end
  1001. movl (%esi),%ecx
  1002. movl (%edi),%edx
  1003. subl %eax,%ecx
  1004. movl $0,%eax
  1005. adcl %eax,%eax
  1006. subl %edx,%ecx
  1007. adcl $0,%eax
  1008. movl %ecx,(%ebx)
  1009. addl $4,%esi
  1010. addl $4,%edi
  1011. addl $4,%ebx
  1012. decl %ebp
  1013. jz .L031aw_end
  1014. movl (%esi),%ecx
  1015. movl (%edi),%edx
  1016. subl %eax,%ecx
  1017. movl $0,%eax
  1018. adcl %eax,%eax
  1019. subl %edx,%ecx
  1020. adcl $0,%eax
  1021. movl %ecx,(%ebx)
  1022. addl $4,%esi
  1023. addl $4,%edi
  1024. addl $4,%ebx
  1025. decl %ebp
  1026. jz .L031aw_end
  1027. movl (%esi),%ecx
  1028. movl (%edi),%edx
  1029. subl %eax,%ecx
  1030. movl $0,%eax
  1031. adcl %eax,%eax
  1032. subl %edx,%ecx
  1033. adcl $0,%eax
  1034. movl %ecx,(%ebx)
  1035. addl $4,%esi
  1036. addl $4,%edi
  1037. addl $4,%ebx
  1038. decl %ebp
  1039. jz .L031aw_end
  1040. movl (%esi),%ecx
  1041. movl (%edi),%edx
  1042. subl %eax,%ecx
  1043. movl $0,%eax
  1044. adcl %eax,%eax
  1045. subl %edx,%ecx
  1046. adcl $0,%eax
  1047. movl %ecx,(%ebx)
  1048. addl $4,%esi
  1049. addl $4,%edi
  1050. addl $4,%ebx
  1051. decl %ebp
  1052. jz .L031aw_end
  1053. movl (%esi),%ecx
  1054. movl (%edi),%edx
  1055. subl %eax,%ecx
  1056. movl $0,%eax
  1057. adcl %eax,%eax
  1058. subl %edx,%ecx
  1059. adcl $0,%eax
  1060. movl %ecx,(%ebx)
  1061. addl $4,%esi
  1062. addl $4,%edi
  1063. addl $4,%ebx
  1064. decl %ebp
  1065. jz .L031aw_end
  1066. movl (%esi),%ecx
  1067. movl (%edi),%edx
  1068. subl %eax,%ecx
  1069. movl $0,%eax
  1070. adcl %eax,%eax
  1071. subl %edx,%ecx
  1072. adcl $0,%eax
  1073. movl %ecx,(%ebx)
  1074. addl $4,%esi
  1075. addl $4,%edi
  1076. addl $4,%ebx
  1077. .L031aw_end:
  1078. cmpl $0,36(%esp)
  1079. je .L032pw_end
  1080. movl 36(%esp),%ebp
  1081. cmpl $0,%ebp
  1082. je .L032pw_end
  1083. jge .L033pw_pos
  1084. movl $0,%edx
  1085. subl %ebp,%edx
  1086. movl %edx,%ebp
  1087. andl $4294967288,%ebp
  1088. jz .L034pw_neg_finish
  1089. .L035pw_neg_loop:
  1090. movl $0,%ecx
  1091. movl (%edi),%edx
  1092. subl %eax,%ecx
  1093. movl $0,%eax
  1094. adcl %eax,%eax
  1095. subl %edx,%ecx
  1096. adcl $0,%eax
  1097. movl %ecx,(%ebx)
  1098. movl $0,%ecx
  1099. movl 4(%edi),%edx
  1100. subl %eax,%ecx
  1101. movl $0,%eax
  1102. adcl %eax,%eax
  1103. subl %edx,%ecx
  1104. adcl $0,%eax
  1105. movl %ecx,4(%ebx)
  1106. movl $0,%ecx
  1107. movl 8(%edi),%edx
  1108. subl %eax,%ecx
  1109. movl $0,%eax
  1110. adcl %eax,%eax
  1111. subl %edx,%ecx
  1112. adcl $0,%eax
  1113. movl %ecx,8(%ebx)
  1114. movl $0,%ecx
  1115. movl 12(%edi),%edx
  1116. subl %eax,%ecx
  1117. movl $0,%eax
  1118. adcl %eax,%eax
  1119. subl %edx,%ecx
  1120. adcl $0,%eax
  1121. movl %ecx,12(%ebx)
  1122. movl $0,%ecx
  1123. movl 16(%edi),%edx
  1124. subl %eax,%ecx
  1125. movl $0,%eax
  1126. adcl %eax,%eax
  1127. subl %edx,%ecx
  1128. adcl $0,%eax
  1129. movl %ecx,16(%ebx)
  1130. movl $0,%ecx
  1131. movl 20(%edi),%edx
  1132. subl %eax,%ecx
  1133. movl $0,%eax
  1134. adcl %eax,%eax
  1135. subl %edx,%ecx
  1136. adcl $0,%eax
  1137. movl %ecx,20(%ebx)
  1138. movl $0,%ecx
  1139. movl 24(%edi),%edx
  1140. subl %eax,%ecx
  1141. movl $0,%eax
  1142. adcl %eax,%eax
  1143. subl %edx,%ecx
  1144. adcl $0,%eax
  1145. movl %ecx,24(%ebx)
  1146. movl $0,%ecx
  1147. movl 28(%edi),%edx
  1148. subl %eax,%ecx
  1149. movl $0,%eax
  1150. adcl %eax,%eax
  1151. subl %edx,%ecx
  1152. adcl $0,%eax
  1153. movl %ecx,28(%ebx)
  1154. addl $32,%edi
  1155. addl $32,%ebx
  1156. subl $8,%ebp
  1157. jnz .L035pw_neg_loop
  1158. .L034pw_neg_finish:
  1159. movl 36(%esp),%edx
  1160. movl $0,%ebp
  1161. subl %edx,%ebp
  1162. andl $7,%ebp
  1163. jz .L032pw_end
  1164. movl $0,%ecx
  1165. movl (%edi),%edx
  1166. subl %eax,%ecx
  1167. movl $0,%eax
  1168. adcl %eax,%eax
  1169. subl %edx,%ecx
  1170. adcl $0,%eax
  1171. decl %ebp
  1172. movl %ecx,(%ebx)
  1173. jz .L032pw_end
  1174. movl $0,%ecx
  1175. movl 4(%edi),%edx
  1176. subl %eax,%ecx
  1177. movl $0,%eax
  1178. adcl %eax,%eax
  1179. subl %edx,%ecx
  1180. adcl $0,%eax
  1181. decl %ebp
  1182. movl %ecx,4(%ebx)
  1183. jz .L032pw_end
  1184. movl $0,%ecx
  1185. movl 8(%edi),%edx
  1186. subl %eax,%ecx
  1187. movl $0,%eax
  1188. adcl %eax,%eax
  1189. subl %edx,%ecx
  1190. adcl $0,%eax
  1191. decl %ebp
  1192. movl %ecx,8(%ebx)
  1193. jz .L032pw_end
  1194. movl $0,%ecx
  1195. movl 12(%edi),%edx
  1196. subl %eax,%ecx
  1197. movl $0,%eax
  1198. adcl %eax,%eax
  1199. subl %edx,%ecx
  1200. adcl $0,%eax
  1201. decl %ebp
  1202. movl %ecx,12(%ebx)
  1203. jz .L032pw_end
  1204. movl $0,%ecx
  1205. movl 16(%edi),%edx
  1206. subl %eax,%ecx
  1207. movl $0,%eax
  1208. adcl %eax,%eax
  1209. subl %edx,%ecx
  1210. adcl $0,%eax
  1211. decl %ebp
  1212. movl %ecx,16(%ebx)
  1213. jz .L032pw_end
  1214. movl $0,%ecx
  1215. movl 20(%edi),%edx
  1216. subl %eax,%ecx
  1217. movl $0,%eax
  1218. adcl %eax,%eax
  1219. subl %edx,%ecx
  1220. adcl $0,%eax
  1221. decl %ebp
  1222. movl %ecx,20(%ebx)
  1223. jz .L032pw_end
  1224. movl $0,%ecx
  1225. movl 24(%edi),%edx
  1226. subl %eax,%ecx
  1227. movl $0,%eax
  1228. adcl %eax,%eax
  1229. subl %edx,%ecx
  1230. adcl $0,%eax
  1231. movl %ecx,24(%ebx)
  1232. jmp .L032pw_end
  1233. .L033pw_pos:
  1234. andl $4294967288,%ebp
  1235. jz .L036pw_pos_finish
  1236. .L037pw_pos_loop:
  1237. movl (%esi),%ecx
  1238. subl %eax,%ecx
  1239. movl %ecx,(%ebx)
  1240. jnc .L038pw_nc0
  1241. movl 4(%esi),%ecx
  1242. subl %eax,%ecx
  1243. movl %ecx,4(%ebx)
  1244. jnc .L039pw_nc1
  1245. movl 8(%esi),%ecx
  1246. subl %eax,%ecx
  1247. movl %ecx,8(%ebx)
  1248. jnc .L040pw_nc2
  1249. movl 12(%esi),%ecx
  1250. subl %eax,%ecx
  1251. movl %ecx,12(%ebx)
  1252. jnc .L041pw_nc3
  1253. movl 16(%esi),%ecx
  1254. subl %eax,%ecx
  1255. movl %ecx,16(%ebx)
  1256. jnc .L042pw_nc4
  1257. movl 20(%esi),%ecx
  1258. subl %eax,%ecx
  1259. movl %ecx,20(%ebx)
  1260. jnc .L043pw_nc5
  1261. movl 24(%esi),%ecx
  1262. subl %eax,%ecx
  1263. movl %ecx,24(%ebx)
  1264. jnc .L044pw_nc6
  1265. movl 28(%esi),%ecx
  1266. subl %eax,%ecx
  1267. movl %ecx,28(%ebx)
  1268. jnc .L045pw_nc7
  1269. addl $32,%esi
  1270. addl $32,%ebx
  1271. subl $8,%ebp
  1272. jnz .L037pw_pos_loop
  1273. .L036pw_pos_finish:
  1274. movl 36(%esp),%ebp
  1275. andl $7,%ebp
  1276. jz .L032pw_end
  1277. movl (%esi),%ecx
  1278. subl %eax,%ecx
  1279. movl %ecx,(%ebx)
  1280. jnc .L046pw_tail_nc0
  1281. decl %ebp
  1282. jz .L032pw_end
  1283. movl 4(%esi),%ecx
  1284. subl %eax,%ecx
  1285. movl %ecx,4(%ebx)
  1286. jnc .L047pw_tail_nc1
  1287. decl %ebp
  1288. jz .L032pw_end
  1289. movl 8(%esi),%ecx
  1290. subl %eax,%ecx
  1291. movl %ecx,8(%ebx)
  1292. jnc .L048pw_tail_nc2
  1293. decl %ebp
  1294. jz .L032pw_end
  1295. movl 12(%esi),%ecx
  1296. subl %eax,%ecx
  1297. movl %ecx,12(%ebx)
  1298. jnc .L049pw_tail_nc3
  1299. decl %ebp
  1300. jz .L032pw_end
  1301. movl 16(%esi),%ecx
  1302. subl %eax,%ecx
  1303. movl %ecx,16(%ebx)
  1304. jnc .L050pw_tail_nc4
  1305. decl %ebp
  1306. jz .L032pw_end
  1307. movl 20(%esi),%ecx
  1308. subl %eax,%ecx
  1309. movl %ecx,20(%ebx)
  1310. jnc .L051pw_tail_nc5
  1311. decl %ebp
  1312. jz .L032pw_end
  1313. movl 24(%esi),%ecx
  1314. subl %eax,%ecx
  1315. movl %ecx,24(%ebx)
  1316. jnc .L052pw_tail_nc6
  1317. movl $1,%eax
  1318. jmp .L032pw_end
  1319. .L053pw_nc_loop:
  1320. movl (%esi),%ecx
  1321. movl %ecx,(%ebx)
  1322. .L038pw_nc0:
  1323. movl 4(%esi),%ecx
  1324. movl %ecx,4(%ebx)
  1325. .L039pw_nc1:
  1326. movl 8(%esi),%ecx
  1327. movl %ecx,8(%ebx)
  1328. .L040pw_nc2:
  1329. movl 12(%esi),%ecx
  1330. movl %ecx,12(%ebx)
  1331. .L041pw_nc3:
  1332. movl 16(%esi),%ecx
  1333. movl %ecx,16(%ebx)
  1334. .L042pw_nc4:
  1335. movl 20(%esi),%ecx
  1336. movl %ecx,20(%ebx)
  1337. .L043pw_nc5:
  1338. movl 24(%esi),%ecx
  1339. movl %ecx,24(%ebx)
  1340. .L044pw_nc6:
  1341. movl 28(%esi),%ecx
  1342. movl %ecx,28(%ebx)
  1343. .L045pw_nc7:
  1344. addl $32,%esi
  1345. addl $32,%ebx
  1346. subl $8,%ebp
  1347. jnz .L053pw_nc_loop
  1348. movl 36(%esp),%ebp
  1349. andl $7,%ebp
  1350. jz .L054pw_nc_end
  1351. movl (%esi),%ecx
  1352. movl %ecx,(%ebx)
  1353. .L046pw_tail_nc0:
  1354. decl %ebp
  1355. jz .L054pw_nc_end
  1356. movl 4(%esi),%ecx
  1357. movl %ecx,4(%ebx)
  1358. .L047pw_tail_nc1:
  1359. decl %ebp
  1360. jz .L054pw_nc_end
  1361. movl 8(%esi),%ecx
  1362. movl %ecx,8(%ebx)
  1363. .L048pw_tail_nc2:
  1364. decl %ebp
  1365. jz .L054pw_nc_end
  1366. movl 12(%esi),%ecx
  1367. movl %ecx,12(%ebx)
  1368. .L049pw_tail_nc3:
  1369. decl %ebp
  1370. jz .L054pw_nc_end
  1371. movl 16(%esi),%ecx
  1372. movl %ecx,16(%ebx)
  1373. .L050pw_tail_nc4:
  1374. decl %ebp
  1375. jz .L054pw_nc_end
  1376. movl 20(%esi),%ecx
  1377. movl %ecx,20(%ebx)
  1378. .L051pw_tail_nc5:
  1379. decl %ebp
  1380. jz .L054pw_nc_end
  1381. movl 24(%esi),%ecx
  1382. movl %ecx,24(%ebx)
  1383. .L052pw_tail_nc6:
  1384. .L054pw_nc_end:
  1385. movl $0,%eax
  1386. .L032pw_end:
  1387. popl %edi
  1388. popl %esi
  1389. popl %ebx
  1390. popl %ebp
  1391. ret
  1392. .size bn_sub_part_words,.-.L_bn_sub_part_words_begin
  1393. .comm OPENSSL_ia32cap_P,16,4