x86_64cpuid.s 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. .hidden OPENSSL_cpuid_setup
  2. .section .init
  3. call OPENSSL_cpuid_setup
  4. .hidden OPENSSL_ia32cap_P
  5. .comm OPENSSL_ia32cap_P,16,4
  6. .text
  7. .globl OPENSSL_atomic_add
  8. .type OPENSSL_atomic_add,@function
  9. .align 16
  10. OPENSSL_atomic_add:
  11. .cfi_startproc
  12. movl (%rdi),%eax
  13. .Lspin: leaq (%rsi,%rax,1),%r8
  14. .byte 0xf0
  15. cmpxchgl %r8d,(%rdi)
  16. jne .Lspin
  17. movl %r8d,%eax
  18. .byte 0x48,0x98
  19. .byte 0xf3,0xc3
  20. .cfi_endproc
  21. .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
  22. .globl OPENSSL_rdtsc
  23. .type OPENSSL_rdtsc,@function
  24. .align 16
  25. OPENSSL_rdtsc:
  26. .cfi_startproc
  27. rdtsc
  28. shlq $32,%rdx
  29. orq %rdx,%rax
  30. .byte 0xf3,0xc3
  31. .cfi_endproc
  32. .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
  33. .globl OPENSSL_ia32_cpuid
  34. .type OPENSSL_ia32_cpuid,@function
  35. .align 16
  36. OPENSSL_ia32_cpuid:
  37. .cfi_startproc
  38. movq %rbx,%r8
  39. .cfi_register %rbx,%r8
  40. xorl %eax,%eax
  41. movq %rax,8(%rdi)
  42. cpuid
  43. movl %eax,%r11d
  44. xorl %eax,%eax
  45. cmpl $0x756e6547,%ebx
  46. setne %al
  47. movl %eax,%r9d
  48. cmpl $0x49656e69,%edx
  49. setne %al
  50. orl %eax,%r9d
  51. cmpl $0x6c65746e,%ecx
  52. setne %al
  53. orl %eax,%r9d
  54. jz .Lintel
  55. cmpl $0x68747541,%ebx
  56. setne %al
  57. movl %eax,%r10d
  58. cmpl $0x69746E65,%edx
  59. setne %al
  60. orl %eax,%r10d
  61. cmpl $0x444D4163,%ecx
  62. setne %al
  63. orl %eax,%r10d
  64. jnz .Lintel
  65. movl $0x80000000,%eax
  66. cpuid
  67. cmpl $0x80000001,%eax
  68. jb .Lintel
  69. movl %eax,%r10d
  70. movl $0x80000001,%eax
  71. cpuid
  72. orl %ecx,%r9d
  73. andl $0x00000801,%r9d
  74. cmpl $0x80000008,%r10d
  75. jb .Lintel
  76. movl $0x80000008,%eax
  77. cpuid
  78. movzbq %cl,%r10
  79. incq %r10
  80. movl $1,%eax
  81. cpuid
  82. btl $28,%edx
  83. jnc .Lgeneric
  84. shrl $16,%ebx
  85. cmpb %r10b,%bl
  86. ja .Lgeneric
  87. andl $0xefffffff,%edx
  88. jmp .Lgeneric
  89. .Lintel:
  90. cmpl $4,%r11d
  91. movl $-1,%r10d
  92. jb .Lnocacheinfo
  93. movl $4,%eax
  94. movl $0,%ecx
  95. cpuid
  96. movl %eax,%r10d
  97. shrl $14,%r10d
  98. andl $0xfff,%r10d
  99. .Lnocacheinfo:
  100. movl $1,%eax
  101. cpuid
  102. movd %eax,%xmm0
  103. andl $0xbfefffff,%edx
  104. cmpl $0,%r9d
  105. jne .Lnotintel
  106. orl $0x40000000,%edx
  107. andb $15,%ah
  108. cmpb $15,%ah
  109. jne .LnotP4
  110. orl $0x00100000,%edx
  111. .LnotP4:
  112. cmpb $6,%ah
  113. jne .Lnotintel
  114. andl $0x0fff0ff0,%eax
  115. cmpl $0x00050670,%eax
  116. je .Lknights
  117. cmpl $0x00080650,%eax
  118. jne .Lnotintel
  119. .Lknights:
  120. andl $0xfbffffff,%ecx
  121. .Lnotintel:
  122. btl $28,%edx
  123. jnc .Lgeneric
  124. andl $0xefffffff,%edx
  125. cmpl $0,%r10d
  126. je .Lgeneric
  127. orl $0x10000000,%edx
  128. shrl $16,%ebx
  129. cmpb $1,%bl
  130. ja .Lgeneric
  131. andl $0xefffffff,%edx
  132. .Lgeneric:
  133. andl $0x00000800,%r9d
  134. andl $0xfffff7ff,%ecx
  135. orl %ecx,%r9d
  136. movl %edx,%r10d
  137. cmpl $7,%r11d
  138. jb .Lno_extended_info
  139. movl $7,%eax
  140. xorl %ecx,%ecx
  141. cpuid
  142. btl $26,%r9d
  143. jc .Lnotknights
  144. andl $0xfff7ffff,%ebx
  145. .Lnotknights:
  146. movd %xmm0,%eax
  147. andl $0x0fff0ff0,%eax
  148. cmpl $0x00050650,%eax
  149. jne .Lnotskylakex
  150. andl $0xfffeffff,%ebx
  151. .Lnotskylakex:
  152. movl %ebx,8(%rdi)
  153. movl %ecx,12(%rdi)
  154. .Lno_extended_info:
  155. btl $27,%r9d
  156. jnc .Lclear_avx
  157. xorl %ecx,%ecx
  158. .byte 0x0f,0x01,0xd0
  159. andl $0xe6,%eax
  160. cmpl $0xe6,%eax
  161. je .Ldone
  162. andl $0x3fdeffff,8(%rdi)
  163. andl $6,%eax
  164. cmpl $6,%eax
  165. je .Ldone
  166. .Lclear_avx:
  167. movl $0xefffe7ff,%eax
  168. andl %eax,%r9d
  169. movl $0x3fdeffdf,%eax
  170. andl %eax,8(%rdi)
  171. .Ldone:
  172. shlq $32,%r9
  173. movl %r10d,%eax
  174. movq %r8,%rbx
  175. .cfi_restore %rbx
  176. orq %r9,%rax
  177. .byte 0xf3,0xc3
  178. .cfi_endproc
  179. .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
  180. .globl OPENSSL_cleanse
  181. .type OPENSSL_cleanse,@function
  182. .align 16
  183. OPENSSL_cleanse:
  184. .cfi_startproc
  185. xorq %rax,%rax
  186. cmpq $15,%rsi
  187. jae .Lot
  188. cmpq $0,%rsi
  189. je .Lret
  190. .Little:
  191. movb %al,(%rdi)
  192. subq $1,%rsi
  193. leaq 1(%rdi),%rdi
  194. jnz .Little
  195. .Lret:
  196. .byte 0xf3,0xc3
  197. .align 16
  198. .Lot:
  199. testq $7,%rdi
  200. jz .Laligned
  201. movb %al,(%rdi)
  202. leaq -1(%rsi),%rsi
  203. leaq 1(%rdi),%rdi
  204. jmp .Lot
  205. .Laligned:
  206. movq %rax,(%rdi)
  207. leaq -8(%rsi),%rsi
  208. testq $-8,%rsi
  209. leaq 8(%rdi),%rdi
  210. jnz .Laligned
  211. cmpq $0,%rsi
  212. jne .Little
  213. .byte 0xf3,0xc3
  214. .cfi_endproc
  215. .size OPENSSL_cleanse,.-OPENSSL_cleanse
  216. .globl CRYPTO_memcmp
  217. .type CRYPTO_memcmp,@function
  218. .align 16
  219. CRYPTO_memcmp:
  220. .cfi_startproc
  221. xorq %rax,%rax
  222. xorq %r10,%r10
  223. cmpq $0,%rdx
  224. je .Lno_data
  225. cmpq $16,%rdx
  226. jne .Loop_cmp
  227. movq (%rdi),%r10
  228. movq 8(%rdi),%r11
  229. movq $1,%rdx
  230. xorq (%rsi),%r10
  231. xorq 8(%rsi),%r11
  232. orq %r11,%r10
  233. cmovnzq %rdx,%rax
  234. .byte 0xf3,0xc3
  235. .align 16
  236. .Loop_cmp:
  237. movb (%rdi),%r10b
  238. leaq 1(%rdi),%rdi
  239. xorb (%rsi),%r10b
  240. leaq 1(%rsi),%rsi
  241. orb %r10b,%al
  242. decq %rdx
  243. jnz .Loop_cmp
  244. negq %rax
  245. shrq $63,%rax
  246. .Lno_data:
  247. .byte 0xf3,0xc3
  248. .cfi_endproc
  249. .size CRYPTO_memcmp,.-CRYPTO_memcmp
  250. .globl OPENSSL_wipe_cpu
  251. .type OPENSSL_wipe_cpu,@function
  252. .align 16
  253. OPENSSL_wipe_cpu:
  254. .cfi_startproc
  255. pxor %xmm0,%xmm0
  256. pxor %xmm1,%xmm1
  257. pxor %xmm2,%xmm2
  258. pxor %xmm3,%xmm3
  259. pxor %xmm4,%xmm4
  260. pxor %xmm5,%xmm5
  261. pxor %xmm6,%xmm6
  262. pxor %xmm7,%xmm7
  263. pxor %xmm8,%xmm8
  264. pxor %xmm9,%xmm9
  265. pxor %xmm10,%xmm10
  266. pxor %xmm11,%xmm11
  267. pxor %xmm12,%xmm12
  268. pxor %xmm13,%xmm13
  269. pxor %xmm14,%xmm14
  270. pxor %xmm15,%xmm15
  271. xorq %rcx,%rcx
  272. xorq %rdx,%rdx
  273. xorq %rsi,%rsi
  274. xorq %rdi,%rdi
  275. xorq %r8,%r8
  276. xorq %r9,%r9
  277. xorq %r10,%r10
  278. xorq %r11,%r11
  279. leaq 8(%rsp),%rax
  280. .byte 0xf3,0xc3
  281. .cfi_endproc
  282. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  283. .globl OPENSSL_instrument_bus
  284. .type OPENSSL_instrument_bus,@function
  285. .align 16
  286. OPENSSL_instrument_bus:
  287. .cfi_startproc
  288. movq %rdi,%r10
  289. movq %rsi,%rcx
  290. movq %rsi,%r11
  291. rdtsc
  292. movl %eax,%r8d
  293. movl $0,%r9d
  294. clflush (%r10)
  295. .byte 0xf0
  296. addl %r9d,(%r10)
  297. jmp .Loop
  298. .align 16
  299. .Loop: rdtsc
  300. movl %eax,%edx
  301. subl %r8d,%eax
  302. movl %edx,%r8d
  303. movl %eax,%r9d
  304. clflush (%r10)
  305. .byte 0xf0
  306. addl %eax,(%r10)
  307. leaq 4(%r10),%r10
  308. subq $1,%rcx
  309. jnz .Loop
  310. movq %r11,%rax
  311. .byte 0xf3,0xc3
  312. .cfi_endproc
  313. .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
  314. .globl OPENSSL_instrument_bus2
  315. .type OPENSSL_instrument_bus2,@function
  316. .align 16
  317. OPENSSL_instrument_bus2:
  318. .cfi_startproc
  319. movq %rdi,%r10
  320. movq %rsi,%rcx
  321. movq %rdx,%r11
  322. movq %rcx,8(%rsp)
  323. rdtsc
  324. movl %eax,%r8d
  325. movl $0,%r9d
  326. clflush (%r10)
  327. .byte 0xf0
  328. addl %r9d,(%r10)
  329. rdtsc
  330. movl %eax,%edx
  331. subl %r8d,%eax
  332. movl %edx,%r8d
  333. movl %eax,%r9d
  334. .Loop2:
  335. clflush (%r10)
  336. .byte 0xf0
  337. addl %eax,(%r10)
  338. subq $1,%r11
  339. jz .Ldone2
  340. rdtsc
  341. movl %eax,%edx
  342. subl %r8d,%eax
  343. movl %edx,%r8d
  344. cmpl %r9d,%eax
  345. movl %eax,%r9d
  346. movl $0,%edx
  347. setne %dl
  348. subq %rdx,%rcx
  349. leaq (%r10,%rdx,4),%r10
  350. jnz .Loop2
  351. .Ldone2:
  352. movq 8(%rsp),%rax
  353. subq %rcx,%rax
  354. .byte 0xf3,0xc3
  355. .cfi_endproc
  356. .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
  357. .globl OPENSSL_ia32_rdrand_bytes
  358. .type OPENSSL_ia32_rdrand_bytes,@function
  359. .align 16
  360. OPENSSL_ia32_rdrand_bytes:
  361. .cfi_startproc
  362. xorq %rax,%rax
  363. cmpq $0,%rsi
  364. je .Ldone_rdrand_bytes
  365. movq $8,%r11
  366. .Loop_rdrand_bytes:
  367. .byte 73,15,199,242
  368. jc .Lbreak_rdrand_bytes
  369. decq %r11
  370. jnz .Loop_rdrand_bytes
  371. jmp .Ldone_rdrand_bytes
  372. .align 16
  373. .Lbreak_rdrand_bytes:
  374. cmpq $8,%rsi
  375. jb .Ltail_rdrand_bytes
  376. movq %r10,(%rdi)
  377. leaq 8(%rdi),%rdi
  378. addq $8,%rax
  379. subq $8,%rsi
  380. jz .Ldone_rdrand_bytes
  381. movq $8,%r11
  382. jmp .Loop_rdrand_bytes
  383. .align 16
  384. .Ltail_rdrand_bytes:
  385. movb %r10b,(%rdi)
  386. leaq 1(%rdi),%rdi
  387. incq %rax
  388. shrq $8,%r10
  389. decq %rsi
  390. jnz .Ltail_rdrand_bytes
  391. .Ldone_rdrand_bytes:
  392. xorq %r10,%r10
  393. .byte 0xf3,0xc3
  394. .cfi_endproc
  395. .size OPENSSL_ia32_rdrand_bytes,.-OPENSSL_ia32_rdrand_bytes
  396. .globl OPENSSL_ia32_rdseed_bytes
  397. .type OPENSSL_ia32_rdseed_bytes,@function
  398. .align 16
  399. OPENSSL_ia32_rdseed_bytes:
  400. .cfi_startproc
  401. xorq %rax,%rax
  402. cmpq $0,%rsi
  403. je .Ldone_rdseed_bytes
  404. movq $8,%r11
  405. .Loop_rdseed_bytes:
  406. .byte 73,15,199,250
  407. jc .Lbreak_rdseed_bytes
  408. decq %r11
  409. jnz .Loop_rdseed_bytes
  410. jmp .Ldone_rdseed_bytes
  411. .align 16
  412. .Lbreak_rdseed_bytes:
  413. cmpq $8,%rsi
  414. jb .Ltail_rdseed_bytes
  415. movq %r10,(%rdi)
  416. leaq 8(%rdi),%rdi
  417. addq $8,%rax
  418. subq $8,%rsi
  419. jz .Ldone_rdseed_bytes
  420. movq $8,%r11
  421. jmp .Loop_rdseed_bytes
  422. .align 16
  423. .Ltail_rdseed_bytes:
  424. movb %r10b,(%rdi)
  425. leaq 1(%rdi),%rdi
  426. incq %rax
  427. shrq $8,%r10
  428. decq %rsi
  429. jnz .Ltail_rdseed_bytes
  430. .Ldone_rdseed_bytes:
  431. xorq %r10,%r10
  432. .byte 0xf3,0xc3
  433. .cfi_endproc
  434. .size OPENSSL_ia32_rdseed_bytes,.-OPENSSL_ia32_rdseed_bytes