x86_64cpuid.masm 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. OPTION DOTNAME
  2. EXTERN OPENSSL_cpuid_setup:NEAR
  3. .CRT$XCU SEGMENT READONLY ALIGN(8)
  4. DQ OPENSSL_cpuid_setup
  5. .CRT$XCU ENDS
  6. _DATA SEGMENT
  7. COMM OPENSSL_ia32cap_P:DWORD:4
  8. _DATA ENDS
  9. .text$ SEGMENT ALIGN(256) 'CODE'
  10. PUBLIC OPENSSL_atomic_add
  11. ALIGN 16
  12. OPENSSL_atomic_add PROC PUBLIC
  13. mov eax,DWORD PTR[rcx]
  14. $L$spin:: lea r8,QWORD PTR[rax*1+rdx]
  15. DB 0f0h
  16. cmpxchg DWORD PTR[rcx],r8d
  17. jne $L$spin
  18. mov eax,r8d
  19. DB 048h,098h
  20. DB 0F3h,0C3h ;repret
  21. OPENSSL_atomic_add ENDP
  22. PUBLIC OPENSSL_rdtsc
  23. ALIGN 16
  24. OPENSSL_rdtsc PROC PUBLIC
  25. rdtsc
  26. shl rdx,32
  27. or rax,rdx
  28. DB 0F3h,0C3h ;repret
  29. OPENSSL_rdtsc ENDP
  30. PUBLIC OPENSSL_ia32_cpuid
  31. ALIGN 16
  32. OPENSSL_ia32_cpuid PROC PUBLIC
  33. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  34. mov QWORD PTR[16+rsp],rsi
  35. mov rax,rsp
  36. $L$SEH_begin_OPENSSL_ia32_cpuid::
  37. mov rdi,rcx
  38. mov r8,rbx
  39. xor eax,eax
  40. mov QWORD PTR[8+rdi],rax
  41. cpuid
  42. mov r11d,eax
  43. xor eax,eax
  44. cmp ebx,0756e6547h
  45. setne al
  46. mov r9d,eax
  47. cmp edx,049656e69h
  48. setne al
  49. or r9d,eax
  50. cmp ecx,06c65746eh
  51. setne al
  52. or r9d,eax
  53. jz $L$intel
  54. cmp ebx,068747541h
  55. setne al
  56. mov r10d,eax
  57. cmp edx,069746E65h
  58. setne al
  59. or r10d,eax
  60. cmp ecx,0444D4163h
  61. setne al
  62. or r10d,eax
  63. jnz $L$intel
  64. mov eax,080000000h
  65. cpuid
  66. cmp eax,080000001h
  67. jb $L$intel
  68. mov r10d,eax
  69. mov eax,080000001h
  70. cpuid
  71. or r9d,ecx
  72. and r9d,000000801h
  73. cmp r10d,080000008h
  74. jb $L$intel
  75. mov eax,080000008h
  76. cpuid
  77. movzx r10,cl
  78. inc r10
  79. mov eax,1
  80. cpuid
  81. bt edx,28
  82. jnc $L$generic
  83. shr ebx,16
  84. cmp bl,r10b
  85. ja $L$generic
  86. and edx,0efffffffh
  87. jmp $L$generic
  88. $L$intel::
  89. cmp r11d,4
  90. mov r10d,-1
  91. jb $L$nocacheinfo
  92. mov eax,4
  93. mov ecx,0
  94. cpuid
  95. mov r10d,eax
  96. shr r10d,14
  97. and r10d,0fffh
  98. $L$nocacheinfo::
  99. mov eax,1
  100. cpuid
  101. movd xmm0,eax
  102. and edx,0bfefffffh
  103. cmp r9d,0
  104. jne $L$notintel
  105. or edx,040000000h
  106. and ah,15
  107. cmp ah,15
  108. jne $L$notP4
  109. or edx,000100000h
  110. $L$notP4::
  111. cmp ah,6
  112. jne $L$notintel
  113. and eax,00fff0ff0h
  114. cmp eax,000050670h
  115. je $L$knights
  116. cmp eax,000080650h
  117. jne $L$notintel
  118. $L$knights::
  119. and ecx,0fbffffffh
  120. $L$notintel::
  121. bt edx,28
  122. jnc $L$generic
  123. and edx,0efffffffh
  124. cmp r10d,0
  125. je $L$generic
  126. or edx,010000000h
  127. shr ebx,16
  128. cmp bl,1
  129. ja $L$generic
  130. and edx,0efffffffh
  131. $L$generic::
  132. and r9d,000000800h
  133. and ecx,0fffff7ffh
  134. or r9d,ecx
  135. mov r10d,edx
  136. cmp r11d,7
  137. jb $L$no_extended_info
  138. mov eax,7
  139. xor ecx,ecx
  140. cpuid
  141. bt r9d,26
  142. jc $L$notknights
  143. and ebx,0fff7ffffh
  144. $L$notknights::
  145. movd eax,xmm0
  146. and eax,00fff0ff0h
  147. cmp eax,000050650h
  148. jne $L$notskylakex
  149. and ebx,0fffeffffh
  150. $L$notskylakex::
  151. mov DWORD PTR[8+rdi],ebx
  152. mov DWORD PTR[12+rdi],ecx
  153. $L$no_extended_info::
  154. bt r9d,27
  155. jnc $L$clear_avx
  156. xor ecx,ecx
  157. DB 00fh,001h,0d0h
  158. and eax,0e6h
  159. cmp eax,0e6h
  160. je $L$done
  161. and DWORD PTR[8+rdi],03fdeffffh
  162. and eax,6
  163. cmp eax,6
  164. je $L$done
  165. $L$clear_avx::
  166. mov eax,0efffe7ffh
  167. and r9d,eax
  168. mov eax,03fdeffdfh
  169. and DWORD PTR[8+rdi],eax
  170. $L$done::
  171. shl r9,32
  172. mov eax,r10d
  173. mov rbx,r8
  174. or rax,r9
  175. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  176. mov rsi,QWORD PTR[16+rsp]
  177. DB 0F3h,0C3h ;repret
  178. $L$SEH_end_OPENSSL_ia32_cpuid::
  179. OPENSSL_ia32_cpuid ENDP
  180. PUBLIC OPENSSL_cleanse
  181. ALIGN 16
  182. OPENSSL_cleanse PROC PUBLIC
  183. xor rax,rax
  184. cmp rdx,15
  185. jae $L$ot
  186. cmp rdx,0
  187. je $L$ret
  188. $L$ittle::
  189. mov BYTE PTR[rcx],al
  190. sub rdx,1
  191. lea rcx,QWORD PTR[1+rcx]
  192. jnz $L$ittle
  193. $L$ret::
  194. DB 0F3h,0C3h ;repret
  195. ALIGN 16
  196. $L$ot::
  197. test rcx,7
  198. jz $L$aligned
  199. mov BYTE PTR[rcx],al
  200. lea rdx,QWORD PTR[((-1))+rdx]
  201. lea rcx,QWORD PTR[1+rcx]
  202. jmp $L$ot
  203. $L$aligned::
  204. mov QWORD PTR[rcx],rax
  205. lea rdx,QWORD PTR[((-8))+rdx]
  206. test rdx,-8
  207. lea rcx,QWORD PTR[8+rcx]
  208. jnz $L$aligned
  209. cmp rdx,0
  210. jne $L$ittle
  211. DB 0F3h,0C3h ;repret
  212. OPENSSL_cleanse ENDP
  213. PUBLIC CRYPTO_memcmp
  214. ALIGN 16
  215. CRYPTO_memcmp PROC PUBLIC
  216. xor rax,rax
  217. xor r10,r10
  218. cmp r8,0
  219. je $L$no_data
  220. cmp r8,16
  221. jne $L$oop_cmp
  222. mov r10,QWORD PTR[rcx]
  223. mov r11,QWORD PTR[8+rcx]
  224. mov r8,1
  225. xor r10,QWORD PTR[rdx]
  226. xor r11,QWORD PTR[8+rdx]
  227. or r10,r11
  228. cmovnz rax,r8
  229. DB 0F3h,0C3h ;repret
  230. ALIGN 16
  231. $L$oop_cmp::
  232. mov r10b,BYTE PTR[rcx]
  233. lea rcx,QWORD PTR[1+rcx]
  234. xor r10b,BYTE PTR[rdx]
  235. lea rdx,QWORD PTR[1+rdx]
  236. or al,r10b
  237. dec r8
  238. jnz $L$oop_cmp
  239. neg rax
  240. shr rax,63
  241. $L$no_data::
  242. DB 0F3h,0C3h ;repret
  243. CRYPTO_memcmp ENDP
  244. PUBLIC OPENSSL_wipe_cpu
  245. ALIGN 16
  246. OPENSSL_wipe_cpu PROC PUBLIC
  247. pxor xmm0,xmm0
  248. pxor xmm1,xmm1
  249. pxor xmm2,xmm2
  250. pxor xmm3,xmm3
  251. pxor xmm4,xmm4
  252. pxor xmm5,xmm5
  253. xor rcx,rcx
  254. xor rdx,rdx
  255. xor r8,r8
  256. xor r9,r9
  257. xor r10,r10
  258. xor r11,r11
  259. lea rax,QWORD PTR[8+rsp]
  260. DB 0F3h,0C3h ;repret
  261. OPENSSL_wipe_cpu ENDP
  262. PUBLIC OPENSSL_instrument_bus
  263. ALIGN 16
  264. OPENSSL_instrument_bus PROC PUBLIC
  265. mov r10,rcx
  266. mov rcx,rdx
  267. mov r11,rdx
  268. rdtsc
  269. mov r8d,eax
  270. mov r9d,0
  271. clflush [r10]
  272. DB 0f0h
  273. add DWORD PTR[r10],r9d
  274. jmp $L$oop
  275. ALIGN 16
  276. $L$oop:: rdtsc
  277. mov edx,eax
  278. sub eax,r8d
  279. mov r8d,edx
  280. mov r9d,eax
  281. clflush [r10]
  282. DB 0f0h
  283. add DWORD PTR[r10],eax
  284. lea r10,QWORD PTR[4+r10]
  285. sub rcx,1
  286. jnz $L$oop
  287. mov rax,r11
  288. DB 0F3h,0C3h ;repret
  289. OPENSSL_instrument_bus ENDP
  290. PUBLIC OPENSSL_instrument_bus2
  291. ALIGN 16
  292. OPENSSL_instrument_bus2 PROC PUBLIC
  293. mov r10,rcx
  294. mov rcx,rdx
  295. mov r11,r8
  296. mov QWORD PTR[8+rsp],rcx
  297. rdtsc
  298. mov r8d,eax
  299. mov r9d,0
  300. clflush [r10]
  301. DB 0f0h
  302. add DWORD PTR[r10],r9d
  303. rdtsc
  304. mov edx,eax
  305. sub eax,r8d
  306. mov r8d,edx
  307. mov r9d,eax
  308. $L$oop2::
  309. clflush [r10]
  310. DB 0f0h
  311. add DWORD PTR[r10],eax
  312. sub r11,1
  313. jz $L$done2
  314. rdtsc
  315. mov edx,eax
  316. sub eax,r8d
  317. mov r8d,edx
  318. cmp eax,r9d
  319. mov r9d,eax
  320. mov edx,0
  321. setne dl
  322. sub rcx,rdx
  323. lea r10,QWORD PTR[rdx*4+r10]
  324. jnz $L$oop2
  325. $L$done2::
  326. mov rax,QWORD PTR[8+rsp]
  327. sub rax,rcx
  328. DB 0F3h,0C3h ;repret
  329. OPENSSL_instrument_bus2 ENDP
  330. PUBLIC OPENSSL_ia32_rdrand_bytes
  331. ALIGN 16
  332. OPENSSL_ia32_rdrand_bytes PROC PUBLIC
  333. xor rax,rax
  334. cmp rdx,0
  335. je $L$done_rdrand_bytes
  336. mov r11,8
  337. $L$oop_rdrand_bytes::
  338. DB 73,15,199,242
  339. jc $L$break_rdrand_bytes
  340. dec r11
  341. jnz $L$oop_rdrand_bytes
  342. jmp $L$done_rdrand_bytes
  343. ALIGN 16
  344. $L$break_rdrand_bytes::
  345. cmp rdx,8
  346. jb $L$tail_rdrand_bytes
  347. mov QWORD PTR[rcx],r10
  348. lea rcx,QWORD PTR[8+rcx]
  349. add rax,8
  350. sub rdx,8
  351. jz $L$done_rdrand_bytes
  352. mov r11,8
  353. jmp $L$oop_rdrand_bytes
  354. ALIGN 16
  355. $L$tail_rdrand_bytes::
  356. mov BYTE PTR[rcx],r10b
  357. lea rcx,QWORD PTR[1+rcx]
  358. inc rax
  359. shr r10,8
  360. dec rdx
  361. jnz $L$tail_rdrand_bytes
  362. $L$done_rdrand_bytes::
  363. xor r10,r10
  364. DB 0F3h,0C3h ;repret
  365. OPENSSL_ia32_rdrand_bytes ENDP
  366. PUBLIC OPENSSL_ia32_rdseed_bytes
  367. ALIGN 16
  368. OPENSSL_ia32_rdseed_bytes PROC PUBLIC
  369. xor rax,rax
  370. cmp rdx,0
  371. je $L$done_rdseed_bytes
  372. mov r11,8
  373. $L$oop_rdseed_bytes::
  374. DB 73,15,199,250
  375. jc $L$break_rdseed_bytes
  376. dec r11
  377. jnz $L$oop_rdseed_bytes
  378. jmp $L$done_rdseed_bytes
  379. ALIGN 16
  380. $L$break_rdseed_bytes::
  381. cmp rdx,8
  382. jb $L$tail_rdseed_bytes
  383. mov QWORD PTR[rcx],r10
  384. lea rcx,QWORD PTR[8+rcx]
  385. add rax,8
  386. sub rdx,8
  387. jz $L$done_rdseed_bytes
  388. mov r11,8
  389. jmp $L$oop_rdseed_bytes
  390. ALIGN 16
  391. $L$tail_rdseed_bytes::
  392. mov BYTE PTR[rcx],r10b
  393. lea rcx,QWORD PTR[1+rcx]
  394. inc rax
  395. shr r10,8
  396. dec rdx
  397. jnz $L$tail_rdseed_bytes
  398. $L$done_rdseed_bytes::
  399. xor r10,r10
  400. DB 0F3h,0C3h ;repret
  401. OPENSSL_ia32_rdseed_bytes ENDP
  402. .text$ ENDS
  403. END