x86_64cpuid.s 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482
  1. .private_extern _OPENSSL_cpuid_setup
  2. .mod_init_func
  3. .p2align 3
  4. .quad _OPENSSL_cpuid_setup
  5. .private_extern _OPENSSL_ia32cap_P
  6. .comm _OPENSSL_ia32cap_P,16,2
  7. .text
  8. .globl _OPENSSL_atomic_add
  9. .p2align 4
  10. _OPENSSL_atomic_add:
  11. movl (%rdi),%eax
  12. L$spin: leaq (%rsi,%rax,1),%r8
  13. .byte 0xf0
  14. cmpxchgl %r8d,(%rdi)
  15. jne L$spin
  16. movl %r8d,%eax
  17. .byte 0x48,0x98
  18. .byte 0xf3,0xc3
  19. .globl _OPENSSL_rdtsc
  20. .p2align 4
  21. _OPENSSL_rdtsc:
  22. rdtsc
  23. shlq $32,%rdx
  24. orq %rdx,%rax
  25. .byte 0xf3,0xc3
  26. .globl _OPENSSL_ia32_cpuid
  27. .p2align 4
  28. _OPENSSL_ia32_cpuid:
  29. movq %rbx,%r8
  30. xorl %eax,%eax
  31. movq %rax,8(%rdi)
  32. cpuid
  33. movl %eax,%r11d
  34. xorl %eax,%eax
  35. cmpl $0x756e6547,%ebx
  36. setne %al
  37. movl %eax,%r9d
  38. cmpl $0x49656e69,%edx
  39. setne %al
  40. orl %eax,%r9d
  41. cmpl $0x6c65746e,%ecx
  42. setne %al
  43. orl %eax,%r9d
  44. jz L$intel
  45. cmpl $0x68747541,%ebx
  46. setne %al
  47. movl %eax,%r10d
  48. cmpl $0x69746E65,%edx
  49. setne %al
  50. orl %eax,%r10d
  51. cmpl $0x444D4163,%ecx
  52. setne %al
  53. orl %eax,%r10d
  54. jnz L$intel
  55. movl $0x80000000,%eax
  56. cpuid
  57. cmpl $0x80000001,%eax
  58. jb L$intel
  59. movl %eax,%r10d
  60. movl $0x80000001,%eax
  61. cpuid
  62. orl %ecx,%r9d
  63. andl $0x00000801,%r9d
  64. cmpl $0x80000008,%r10d
  65. jb L$intel
  66. movl $0x80000008,%eax
  67. cpuid
  68. movzbq %cl,%r10
  69. incq %r10
  70. movl $1,%eax
  71. cpuid
  72. btl $28,%edx
  73. jnc L$generic
  74. shrl $16,%ebx
  75. cmpb %r10b,%bl
  76. ja L$generic
  77. andl $0xefffffff,%edx
  78. jmp L$generic
  79. L$intel:
  80. cmpl $4,%r11d
  81. movl $-1,%r10d
  82. jb L$nocacheinfo
  83. movl $4,%eax
  84. movl $0,%ecx
  85. cpuid
  86. movl %eax,%r10d
  87. shrl $14,%r10d
  88. andl $0xfff,%r10d
  89. L$nocacheinfo:
  90. movl $1,%eax
  91. cpuid
  92. movd %eax,%xmm0
  93. andl $0xbfefffff,%edx
  94. cmpl $0,%r9d
  95. jne L$notintel
  96. orl $0x40000000,%edx
  97. andb $15,%ah
  98. cmpb $15,%ah
  99. jne L$notP4
  100. orl $0x00100000,%edx
  101. L$notP4:
  102. cmpb $6,%ah
  103. jne L$notintel
  104. andl $0x0fff0ff0,%eax
  105. cmpl $0x00050670,%eax
  106. je L$knights
  107. cmpl $0x00080650,%eax
  108. jne L$notintel
  109. L$knights:
  110. andl $0xfbffffff,%ecx
  111. L$notintel:
  112. btl $28,%edx
  113. jnc L$generic
  114. andl $0xefffffff,%edx
  115. cmpl $0,%r10d
  116. je L$generic
  117. orl $0x10000000,%edx
  118. shrl $16,%ebx
  119. cmpb $1,%bl
  120. ja L$generic
  121. andl $0xefffffff,%edx
  122. L$generic:
  123. andl $0x00000800,%r9d
  124. andl $0xfffff7ff,%ecx
  125. orl %ecx,%r9d
  126. movl %edx,%r10d
  127. cmpl $7,%r11d
  128. jb L$no_extended_info
  129. movl $7,%eax
  130. xorl %ecx,%ecx
  131. cpuid
  132. btl $26,%r9d
  133. jc L$notknights
  134. andl $0xfff7ffff,%ebx
  135. L$notknights:
  136. movd %xmm0,%eax
  137. andl $0x0fff0ff0,%eax
  138. cmpl $0x00050650,%eax
  139. jne L$notskylakex
  140. andl $0xfffeffff,%ebx
  141. L$notskylakex:
  142. movl %ebx,8(%rdi)
  143. movl %ecx,12(%rdi)
  144. L$no_extended_info:
  145. btl $27,%r9d
  146. jnc L$clear_avx
  147. xorl %ecx,%ecx
  148. .byte 0x0f,0x01,0xd0
  149. andl $0xe6,%eax
  150. cmpl $0xe6,%eax
  151. je L$done
  152. andl $0x3fdeffff,8(%rdi)
  153. andl $6,%eax
  154. cmpl $6,%eax
  155. je L$done
  156. L$clear_avx:
  157. movl $0xefffe7ff,%eax
  158. andl %eax,%r9d
  159. movl $0x3fdeffdf,%eax
  160. andl %eax,8(%rdi)
  161. L$done:
  162. shlq $32,%r9
  163. movl %r10d,%eax
  164. movq %r8,%rbx
  165. orq %r9,%rax
  166. .byte 0xf3,0xc3
  167. .globl _OPENSSL_cleanse
  168. .p2align 4
  169. _OPENSSL_cleanse:
  170. xorq %rax,%rax
  171. cmpq $15,%rsi
  172. jae L$ot
  173. cmpq $0,%rsi
  174. je L$ret
  175. L$ittle:
  176. movb %al,(%rdi)
  177. subq $1,%rsi
  178. leaq 1(%rdi),%rdi
  179. jnz L$ittle
  180. L$ret:
  181. .byte 0xf3,0xc3
  182. .p2align 4
  183. L$ot:
  184. testq $7,%rdi
  185. jz L$aligned
  186. movb %al,(%rdi)
  187. leaq -1(%rsi),%rsi
  188. leaq 1(%rdi),%rdi
  189. jmp L$ot
  190. L$aligned:
  191. movq %rax,(%rdi)
  192. leaq -8(%rsi),%rsi
  193. testq $-8,%rsi
  194. leaq 8(%rdi),%rdi
  195. jnz L$aligned
  196. cmpq $0,%rsi
  197. jne L$ittle
  198. .byte 0xf3,0xc3
  199. .globl _CRYPTO_memcmp
  200. .p2align 4
  201. _CRYPTO_memcmp:
  202. xorq %rax,%rax
  203. xorq %r10,%r10
  204. cmpq $0,%rdx
  205. je L$no_data
  206. cmpq $16,%rdx
  207. jne L$oop_cmp
  208. movq (%rdi),%r10
  209. movq 8(%rdi),%r11
  210. movq $1,%rdx
  211. xorq (%rsi),%r10
  212. xorq 8(%rsi),%r11
  213. orq %r11,%r10
  214. cmovnzq %rdx,%rax
  215. .byte 0xf3,0xc3
  216. .p2align 4
  217. L$oop_cmp:
  218. movb (%rdi),%r10b
  219. leaq 1(%rdi),%rdi
  220. xorb (%rsi),%r10b
  221. leaq 1(%rsi),%rsi
  222. orb %r10b,%al
  223. decq %rdx
  224. jnz L$oop_cmp
  225. negq %rax
  226. shrq $63,%rax
  227. L$no_data:
  228. .byte 0xf3,0xc3
  229. .globl _OPENSSL_wipe_cpu
  230. .p2align 4
  231. _OPENSSL_wipe_cpu:
  232. pxor %xmm0,%xmm0
  233. pxor %xmm1,%xmm1
  234. pxor %xmm2,%xmm2
  235. pxor %xmm3,%xmm3
  236. pxor %xmm4,%xmm4
  237. pxor %xmm5,%xmm5
  238. pxor %xmm6,%xmm6
  239. pxor %xmm7,%xmm7
  240. pxor %xmm8,%xmm8
  241. pxor %xmm9,%xmm9
  242. pxor %xmm10,%xmm10
  243. pxor %xmm11,%xmm11
  244. pxor %xmm12,%xmm12
  245. pxor %xmm13,%xmm13
  246. pxor %xmm14,%xmm14
  247. pxor %xmm15,%xmm15
  248. xorq %rcx,%rcx
  249. xorq %rdx,%rdx
  250. xorq %rsi,%rsi
  251. xorq %rdi,%rdi
  252. xorq %r8,%r8
  253. xorq %r9,%r9
  254. xorq %r10,%r10
  255. xorq %r11,%r11
  256. leaq 8(%rsp),%rax
  257. .byte 0xf3,0xc3
  258. .globl _OPENSSL_instrument_bus
  259. .p2align 4
  260. _OPENSSL_instrument_bus:
  261. movq %rdi,%r10
  262. movq %rsi,%rcx
  263. movq %rsi,%r11
  264. rdtsc
  265. movl %eax,%r8d
  266. movl $0,%r9d
  267. clflush (%r10)
  268. .byte 0xf0
  269. addl %r9d,(%r10)
  270. jmp L$oop
  271. .p2align 4
  272. L$oop: rdtsc
  273. movl %eax,%edx
  274. subl %r8d,%eax
  275. movl %edx,%r8d
  276. movl %eax,%r9d
  277. clflush (%r10)
  278. .byte 0xf0
  279. addl %eax,(%r10)
  280. leaq 4(%r10),%r10
  281. subq $1,%rcx
  282. jnz L$oop
  283. movq %r11,%rax
  284. .byte 0xf3,0xc3
  285. .globl _OPENSSL_instrument_bus2
  286. .p2align 4
  287. _OPENSSL_instrument_bus2:
  288. movq %rdi,%r10
  289. movq %rsi,%rcx
  290. movq %rdx,%r11
  291. movq %rcx,8(%rsp)
  292. rdtsc
  293. movl %eax,%r8d
  294. movl $0,%r9d
  295. clflush (%r10)
  296. .byte 0xf0
  297. addl %r9d,(%r10)
  298. rdtsc
  299. movl %eax,%edx
  300. subl %r8d,%eax
  301. movl %edx,%r8d
  302. movl %eax,%r9d
  303. L$oop2:
  304. clflush (%r10)
  305. .byte 0xf0
  306. addl %eax,(%r10)
  307. subq $1,%r11
  308. jz L$done2
  309. rdtsc
  310. movl %eax,%edx
  311. subl %r8d,%eax
  312. movl %edx,%r8d
  313. cmpl %r9d,%eax
  314. movl %eax,%r9d
  315. movl $0,%edx
  316. setne %dl
  317. subq %rdx,%rcx
  318. leaq (%r10,%rdx,4),%r10
  319. jnz L$oop2
  320. L$done2:
  321. movq 8(%rsp),%rax
  322. subq %rcx,%rax
  323. .byte 0xf3,0xc3
  324. .globl _OPENSSL_ia32_rdrand_bytes
  325. .p2align 4
  326. _OPENSSL_ia32_rdrand_bytes:
  327. xorq %rax,%rax
  328. cmpq $0,%rsi
  329. je L$done_rdrand_bytes
  330. movq $8,%r11
  331. L$oop_rdrand_bytes:
  332. .byte 73,15,199,242
  333. jc L$break_rdrand_bytes
  334. decq %r11
  335. jnz L$oop_rdrand_bytes
  336. jmp L$done_rdrand_bytes
  337. .p2align 4
  338. L$break_rdrand_bytes:
  339. cmpq $8,%rsi
  340. jb L$tail_rdrand_bytes
  341. movq %r10,(%rdi)
  342. leaq 8(%rdi),%rdi
  343. addq $8,%rax
  344. subq $8,%rsi
  345. jz L$done_rdrand_bytes
  346. movq $8,%r11
  347. jmp L$oop_rdrand_bytes
  348. .p2align 4
  349. L$tail_rdrand_bytes:
  350. movb %r10b,(%rdi)
  351. leaq 1(%rdi),%rdi
  352. incq %rax
  353. shrq $8,%r10
  354. decq %rsi
  355. jnz L$tail_rdrand_bytes
  356. L$done_rdrand_bytes:
  357. xorq %r10,%r10
  358. .byte 0xf3,0xc3
  359. .globl _OPENSSL_ia32_rdseed_bytes
  360. .p2align 4
  361. _OPENSSL_ia32_rdseed_bytes:
  362. xorq %rax,%rax
  363. cmpq $0,%rsi
  364. je L$done_rdseed_bytes
  365. movq $8,%r11
  366. L$oop_rdseed_bytes:
  367. .byte 73,15,199,250
  368. jc L$break_rdseed_bytes
  369. decq %r11
  370. jnz L$oop_rdseed_bytes
  371. jmp L$done_rdseed_bytes
  372. .p2align 4
  373. L$break_rdseed_bytes:
  374. cmpq $8,%rsi
  375. jb L$tail_rdseed_bytes
  376. movq %r10,(%rdi)
  377. leaq 8(%rdi),%rdi
  378. addq $8,%rax
  379. subq $8,%rsi
  380. jz L$done_rdseed_bytes
  381. movq $8,%r11
  382. jmp L$oop_rdseed_bytes
  383. .p2align 4
  384. L$tail_rdseed_bytes:
  385. movb %r10b,(%rdi)
  386. leaq 1(%rdi),%rdi
  387. incq %rax
  388. shrq $8,%r10
  389. decq %rsi
  390. jnz L$tail_rdseed_bytes
  391. L$done_rdseed_bytes:
  392. xorq %r10,%r10
  393. .byte 0xf3,0xc3