aesni-gcm-x86_64.masm 24 KB


  1. OPTION DOTNAME
  2. .text$ SEGMENT ALIGN(256) 'CODE'
  3. ALIGN 32
  4. _aesni_ctr32_ghash_6x PROC PRIVATE
  5. vmovdqu xmm2,XMMWORD PTR[32+r11]
  6. sub rdx,6
  7. vpxor xmm4,xmm4,xmm4
  8. vmovdqu xmm15,XMMWORD PTR[((0-128))+rcx]
  9. vpaddb xmm10,xmm1,xmm2
  10. vpaddb xmm11,xmm10,xmm2
  11. vpaddb xmm12,xmm11,xmm2
  12. vpaddb xmm13,xmm12,xmm2
  13. vpaddb xmm14,xmm13,xmm2
  14. vpxor xmm9,xmm1,xmm15
  15. vmovdqu XMMWORD PTR[(16+8)+rsp],xmm4
  16. jmp $L$oop6x
  17. ALIGN 32
  18. $L$oop6x::
  19. add ebx,100663296
  20. jc $L$handle_ctr32
  21. vmovdqu xmm3,XMMWORD PTR[((0-32))+r9]
  22. vpaddb xmm1,xmm14,xmm2
  23. vpxor xmm10,xmm10,xmm15
  24. vpxor xmm11,xmm11,xmm15
  25. $L$resume_ctr32::
  26. vmovdqu XMMWORD PTR[r8],xmm1
  27. vpclmulqdq xmm5,xmm7,xmm3,010h
  28. vpxor xmm12,xmm12,xmm15
  29. vmovups xmm2,XMMWORD PTR[((16-128))+rcx]
  30. vpclmulqdq xmm6,xmm7,xmm3,001h
  31. xor r12,r12
  32. cmp r15,r14
  33. vaesenc xmm9,xmm9,xmm2
  34. vmovdqu xmm0,XMMWORD PTR[((48+8))+rsp]
  35. vpxor xmm13,xmm13,xmm15
  36. vpclmulqdq xmm1,xmm7,xmm3,000h
  37. vaesenc xmm10,xmm10,xmm2
  38. vpxor xmm14,xmm14,xmm15
  39. setnc r12b
  40. vpclmulqdq xmm7,xmm7,xmm3,011h
  41. vaesenc xmm11,xmm11,xmm2
  42. vmovdqu xmm3,XMMWORD PTR[((16-32))+r9]
  43. neg r12
  44. vaesenc xmm12,xmm12,xmm2
  45. vpxor xmm6,xmm6,xmm5
  46. vpclmulqdq xmm5,xmm0,xmm3,000h
  47. vpxor xmm8,xmm8,xmm4
  48. vaesenc xmm13,xmm13,xmm2
  49. vpxor xmm4,xmm1,xmm5
  50. and r12,060h
  51. vmovups xmm15,XMMWORD PTR[((32-128))+rcx]
  52. vpclmulqdq xmm1,xmm0,xmm3,010h
  53. vaesenc xmm14,xmm14,xmm2
  54. vpclmulqdq xmm2,xmm0,xmm3,001h
  55. lea r14,QWORD PTR[r12*1+r14]
  56. vaesenc xmm9,xmm9,xmm15
  57. vpxor xmm8,xmm8,XMMWORD PTR[((16+8))+rsp]
  58. vpclmulqdq xmm3,xmm0,xmm3,011h
  59. vmovdqu xmm0,XMMWORD PTR[((64+8))+rsp]
  60. vaesenc xmm10,xmm10,xmm15
  61. movbe r13,QWORD PTR[88+r14]
  62. vaesenc xmm11,xmm11,xmm15
  63. movbe r12,QWORD PTR[80+r14]
  64. vaesenc xmm12,xmm12,xmm15
  65. mov QWORD PTR[((32+8))+rsp],r13
  66. vaesenc xmm13,xmm13,xmm15
  67. mov QWORD PTR[((40+8))+rsp],r12
  68. vmovdqu xmm5,XMMWORD PTR[((48-32))+r9]
  69. vaesenc xmm14,xmm14,xmm15
  70. vmovups xmm15,XMMWORD PTR[((48-128))+rcx]
  71. vpxor xmm6,xmm6,xmm1
  72. vpclmulqdq xmm1,xmm0,xmm5,000h
  73. vaesenc xmm9,xmm9,xmm15
  74. vpxor xmm6,xmm6,xmm2
  75. vpclmulqdq xmm2,xmm0,xmm5,010h
  76. vaesenc xmm10,xmm10,xmm15
  77. vpxor xmm7,xmm7,xmm3
  78. vpclmulqdq xmm3,xmm0,xmm5,001h
  79. vaesenc xmm11,xmm11,xmm15
  80. vpclmulqdq xmm5,xmm0,xmm5,011h
  81. vmovdqu xmm0,XMMWORD PTR[((80+8))+rsp]
  82. vaesenc xmm12,xmm12,xmm15
  83. vaesenc xmm13,xmm13,xmm15
  84. vpxor xmm4,xmm4,xmm1
  85. vmovdqu xmm1,XMMWORD PTR[((64-32))+r9]
  86. vaesenc xmm14,xmm14,xmm15
  87. vmovups xmm15,XMMWORD PTR[((64-128))+rcx]
  88. vpxor xmm6,xmm6,xmm2
  89. vpclmulqdq xmm2,xmm0,xmm1,000h
  90. vaesenc xmm9,xmm9,xmm15
  91. vpxor xmm6,xmm6,xmm3
  92. vpclmulqdq xmm3,xmm0,xmm1,010h
  93. vaesenc xmm10,xmm10,xmm15
  94. movbe r13,QWORD PTR[72+r14]
  95. vpxor xmm7,xmm7,xmm5
  96. vpclmulqdq xmm5,xmm0,xmm1,001h
  97. vaesenc xmm11,xmm11,xmm15
  98. movbe r12,QWORD PTR[64+r14]
  99. vpclmulqdq xmm1,xmm0,xmm1,011h
  100. vmovdqu xmm0,XMMWORD PTR[((96+8))+rsp]
  101. vaesenc xmm12,xmm12,xmm15
  102. mov QWORD PTR[((48+8))+rsp],r13
  103. vaesenc xmm13,xmm13,xmm15
  104. mov QWORD PTR[((56+8))+rsp],r12
  105. vpxor xmm4,xmm4,xmm2
  106. vmovdqu xmm2,XMMWORD PTR[((96-32))+r9]
  107. vaesenc xmm14,xmm14,xmm15
  108. vmovups xmm15,XMMWORD PTR[((80-128))+rcx]
  109. vpxor xmm6,xmm6,xmm3
  110. vpclmulqdq xmm3,xmm0,xmm2,000h
  111. vaesenc xmm9,xmm9,xmm15
  112. vpxor xmm6,xmm6,xmm5
  113. vpclmulqdq xmm5,xmm0,xmm2,010h
  114. vaesenc xmm10,xmm10,xmm15
  115. movbe r13,QWORD PTR[56+r14]
  116. vpxor xmm7,xmm7,xmm1
  117. vpclmulqdq xmm1,xmm0,xmm2,001h
  118. vpxor xmm8,xmm8,XMMWORD PTR[((112+8))+rsp]
  119. vaesenc xmm11,xmm11,xmm15
  120. movbe r12,QWORD PTR[48+r14]
  121. vpclmulqdq xmm2,xmm0,xmm2,011h
  122. vaesenc xmm12,xmm12,xmm15
  123. mov QWORD PTR[((64+8))+rsp],r13
  124. vaesenc xmm13,xmm13,xmm15
  125. mov QWORD PTR[((72+8))+rsp],r12
  126. vpxor xmm4,xmm4,xmm3
  127. vmovdqu xmm3,XMMWORD PTR[((112-32))+r9]
  128. vaesenc xmm14,xmm14,xmm15
  129. vmovups xmm15,XMMWORD PTR[((96-128))+rcx]
  130. vpxor xmm6,xmm6,xmm5
  131. vpclmulqdq xmm5,xmm8,xmm3,010h
  132. vaesenc xmm9,xmm9,xmm15
  133. vpxor xmm6,xmm6,xmm1
  134. vpclmulqdq xmm1,xmm8,xmm3,001h
  135. vaesenc xmm10,xmm10,xmm15
  136. movbe r13,QWORD PTR[40+r14]
  137. vpxor xmm7,xmm7,xmm2
  138. vpclmulqdq xmm2,xmm8,xmm3,000h
  139. vaesenc xmm11,xmm11,xmm15
  140. movbe r12,QWORD PTR[32+r14]
  141. vpclmulqdq xmm8,xmm8,xmm3,011h
  142. vaesenc xmm12,xmm12,xmm15
  143. mov QWORD PTR[((80+8))+rsp],r13
  144. vaesenc xmm13,xmm13,xmm15
  145. mov QWORD PTR[((88+8))+rsp],r12
  146. vpxor xmm6,xmm6,xmm5
  147. vaesenc xmm14,xmm14,xmm15
  148. vpxor xmm6,xmm6,xmm1
  149. vmovups xmm15,XMMWORD PTR[((112-128))+rcx]
  150. vpslldq xmm5,xmm6,8
  151. vpxor xmm4,xmm4,xmm2
  152. vmovdqu xmm3,XMMWORD PTR[16+r11]
  153. vaesenc xmm9,xmm9,xmm15
  154. vpxor xmm7,xmm7,xmm8
  155. vaesenc xmm10,xmm10,xmm15
  156. vpxor xmm4,xmm4,xmm5
  157. movbe r13,QWORD PTR[24+r14]
  158. vaesenc xmm11,xmm11,xmm15
  159. movbe r12,QWORD PTR[16+r14]
  160. vpalignr xmm0,xmm4,xmm4,8
  161. vpclmulqdq xmm4,xmm4,xmm3,010h
  162. mov QWORD PTR[((96+8))+rsp],r13
  163. vaesenc xmm12,xmm12,xmm15
  164. mov QWORD PTR[((104+8))+rsp],r12
  165. vaesenc xmm13,xmm13,xmm15
  166. vmovups xmm1,XMMWORD PTR[((128-128))+rcx]
  167. vaesenc xmm14,xmm14,xmm15
  168. vaesenc xmm9,xmm9,xmm1
  169. vmovups xmm15,XMMWORD PTR[((144-128))+rcx]
  170. vaesenc xmm10,xmm10,xmm1
  171. vpsrldq xmm6,xmm6,8
  172. vaesenc xmm11,xmm11,xmm1
  173. vpxor xmm7,xmm7,xmm6
  174. vaesenc xmm12,xmm12,xmm1
  175. vpxor xmm4,xmm4,xmm0
  176. movbe r13,QWORD PTR[8+r14]
  177. vaesenc xmm13,xmm13,xmm1
  178. movbe r12,QWORD PTR[r14]
  179. vaesenc xmm14,xmm14,xmm1
  180. vmovups xmm1,XMMWORD PTR[((160-128))+rcx]
  181. cmp ebp,11
  182. jb $L$enc_tail
  183. vaesenc xmm9,xmm9,xmm15
  184. vaesenc xmm10,xmm10,xmm15
  185. vaesenc xmm11,xmm11,xmm15
  186. vaesenc xmm12,xmm12,xmm15
  187. vaesenc xmm13,xmm13,xmm15
  188. vaesenc xmm14,xmm14,xmm15
  189. vaesenc xmm9,xmm9,xmm1
  190. vaesenc xmm10,xmm10,xmm1
  191. vaesenc xmm11,xmm11,xmm1
  192. vaesenc xmm12,xmm12,xmm1
  193. vaesenc xmm13,xmm13,xmm1
  194. vmovups xmm15,XMMWORD PTR[((176-128))+rcx]
  195. vaesenc xmm14,xmm14,xmm1
  196. vmovups xmm1,XMMWORD PTR[((192-128))+rcx]
  197. je $L$enc_tail
  198. vaesenc xmm9,xmm9,xmm15
  199. vaesenc xmm10,xmm10,xmm15
  200. vaesenc xmm11,xmm11,xmm15
  201. vaesenc xmm12,xmm12,xmm15
  202. vaesenc xmm13,xmm13,xmm15
  203. vaesenc xmm14,xmm14,xmm15
  204. vaesenc xmm9,xmm9,xmm1
  205. vaesenc xmm10,xmm10,xmm1
  206. vaesenc xmm11,xmm11,xmm1
  207. vaesenc xmm12,xmm12,xmm1
  208. vaesenc xmm13,xmm13,xmm1
  209. vmovups xmm15,XMMWORD PTR[((208-128))+rcx]
  210. vaesenc xmm14,xmm14,xmm1
  211. vmovups xmm1,XMMWORD PTR[((224-128))+rcx]
  212. jmp $L$enc_tail
  213. ALIGN 32
  214. $L$handle_ctr32::
  215. vmovdqu xmm0,XMMWORD PTR[r11]
  216. vpshufb xmm6,xmm1,xmm0
  217. vmovdqu xmm5,XMMWORD PTR[48+r11]
  218. vpaddd xmm10,xmm6,XMMWORD PTR[64+r11]
  219. vpaddd xmm11,xmm6,xmm5
  220. vmovdqu xmm3,XMMWORD PTR[((0-32))+r9]
  221. vpaddd xmm12,xmm10,xmm5
  222. vpshufb xmm10,xmm10,xmm0
  223. vpaddd xmm13,xmm11,xmm5
  224. vpshufb xmm11,xmm11,xmm0
  225. vpxor xmm10,xmm10,xmm15
  226. vpaddd xmm14,xmm12,xmm5
  227. vpshufb xmm12,xmm12,xmm0
  228. vpxor xmm11,xmm11,xmm15
  229. vpaddd xmm1,xmm13,xmm5
  230. vpshufb xmm13,xmm13,xmm0
  231. vpshufb xmm14,xmm14,xmm0
  232. vpshufb xmm1,xmm1,xmm0
  233. jmp $L$resume_ctr32
  234. ALIGN 32
  235. $L$enc_tail::
  236. vaesenc xmm9,xmm9,xmm15
  237. vmovdqu XMMWORD PTR[(16+8)+rsp],xmm7
  238. vpalignr xmm8,xmm4,xmm4,8
  239. vaesenc xmm10,xmm10,xmm15
  240. vpclmulqdq xmm4,xmm4,xmm3,010h
  241. vpxor xmm2,xmm1,XMMWORD PTR[rdi]
  242. vaesenc xmm11,xmm11,xmm15
  243. vpxor xmm0,xmm1,XMMWORD PTR[16+rdi]
  244. vaesenc xmm12,xmm12,xmm15
  245. vpxor xmm5,xmm1,XMMWORD PTR[32+rdi]
  246. vaesenc xmm13,xmm13,xmm15
  247. vpxor xmm6,xmm1,XMMWORD PTR[48+rdi]
  248. vaesenc xmm14,xmm14,xmm15
  249. vpxor xmm7,xmm1,XMMWORD PTR[64+rdi]
  250. vpxor xmm3,xmm1,XMMWORD PTR[80+rdi]
  251. vmovdqu xmm1,XMMWORD PTR[r8]
  252. vaesenclast xmm9,xmm9,xmm2
  253. vmovdqu xmm2,XMMWORD PTR[32+r11]
  254. vaesenclast xmm10,xmm10,xmm0
  255. vpaddb xmm0,xmm1,xmm2
  256. mov QWORD PTR[((112+8))+rsp],r13
  257. lea rdi,QWORD PTR[96+rdi]
  258. vaesenclast xmm11,xmm11,xmm5
  259. vpaddb xmm5,xmm0,xmm2
  260. mov QWORD PTR[((120+8))+rsp],r12
  261. lea rsi,QWORD PTR[96+rsi]
  262. vmovdqu xmm15,XMMWORD PTR[((0-128))+rcx]
  263. vaesenclast xmm12,xmm12,xmm6
  264. vpaddb xmm6,xmm5,xmm2
  265. vaesenclast xmm13,xmm13,xmm7
  266. vpaddb xmm7,xmm6,xmm2
  267. vaesenclast xmm14,xmm14,xmm3
  268. vpaddb xmm3,xmm7,xmm2
  269. add r10,060h
  270. sub rdx,06h
  271. jc $L$6x_done
  272. vmovups XMMWORD PTR[(-96)+rsi],xmm9
  273. vpxor xmm9,xmm1,xmm15
  274. vmovups XMMWORD PTR[(-80)+rsi],xmm10
  275. vmovdqa xmm10,xmm0
  276. vmovups XMMWORD PTR[(-64)+rsi],xmm11
  277. vmovdqa xmm11,xmm5
  278. vmovups XMMWORD PTR[(-48)+rsi],xmm12
  279. vmovdqa xmm12,xmm6
  280. vmovups XMMWORD PTR[(-32)+rsi],xmm13
  281. vmovdqa xmm13,xmm7
  282. vmovups XMMWORD PTR[(-16)+rsi],xmm14
  283. vmovdqa xmm14,xmm3
  284. vmovdqu xmm7,XMMWORD PTR[((32+8))+rsp]
  285. jmp $L$oop6x
  286. $L$6x_done::
  287. vpxor xmm8,xmm8,XMMWORD PTR[((16+8))+rsp]
  288. vpxor xmm8,xmm8,xmm4
  289. DB 0F3h,0C3h ;repret
  290. _aesni_ctr32_ghash_6x ENDP
  291. PUBLIC aesni_gcm_decrypt
  292. ALIGN 32
  293. aesni_gcm_decrypt PROC PUBLIC
  294. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  295. mov QWORD PTR[16+rsp],rsi
  296. mov rax,rsp
  297. $L$SEH_begin_aesni_gcm_decrypt::
  298. mov rdi,rcx
  299. mov rsi,rdx
  300. mov rdx,r8
  301. mov rcx,r9
  302. mov r8,QWORD PTR[40+rsp]
  303. mov r9,QWORD PTR[48+rsp]
  304. xor r10,r10
  305. cmp rdx,060h
  306. jb $L$gcm_dec_abort
  307. lea rax,QWORD PTR[rsp]
  308. push rbx
  309. push rbp
  310. push r12
  311. push r13
  312. push r14
  313. push r15
  314. lea rsp,QWORD PTR[((-168))+rsp]
  315. movaps XMMWORD PTR[(-216)+rax],xmm6
  316. movaps XMMWORD PTR[(-200)+rax],xmm7
  317. movaps XMMWORD PTR[(-184)+rax],xmm8
  318. movaps XMMWORD PTR[(-168)+rax],xmm9
  319. movaps XMMWORD PTR[(-152)+rax],xmm10
  320. movaps XMMWORD PTR[(-136)+rax],xmm11
  321. movaps XMMWORD PTR[(-120)+rax],xmm12
  322. movaps XMMWORD PTR[(-104)+rax],xmm13
  323. movaps XMMWORD PTR[(-88)+rax],xmm14
  324. movaps XMMWORD PTR[(-72)+rax],xmm15
  325. $L$gcm_dec_body::
  326. vzeroupper
  327. vmovdqu xmm1,XMMWORD PTR[r8]
  328. add rsp,-128
  329. mov ebx,DWORD PTR[12+r8]
  330. lea r11,QWORD PTR[$L$bswap_mask]
  331. lea r14,QWORD PTR[((-128))+rcx]
  332. mov r15,0f80h
  333. vmovdqu xmm8,XMMWORD PTR[r9]
  334. and rsp,-128
  335. vmovdqu xmm0,XMMWORD PTR[r11]
  336. lea rcx,QWORD PTR[128+rcx]
  337. lea r9,QWORD PTR[((32+32))+r9]
  338. mov ebp,DWORD PTR[((240-128))+rcx]
  339. vpshufb xmm8,xmm8,xmm0
  340. and r14,r15
  341. and r15,rsp
  342. sub r15,r14
  343. jc $L$dec_no_key_aliasing
  344. cmp r15,768
  345. jnc $L$dec_no_key_aliasing
  346. sub rsp,r15
  347. $L$dec_no_key_aliasing::
  348. vmovdqu xmm7,XMMWORD PTR[80+rdi]
  349. lea r14,QWORD PTR[rdi]
  350. vmovdqu xmm4,XMMWORD PTR[64+rdi]
  351. lea r15,QWORD PTR[((-192))+rdx*1+rdi]
  352. vmovdqu xmm5,XMMWORD PTR[48+rdi]
  353. shr rdx,4
  354. xor r10,r10
  355. vmovdqu xmm6,XMMWORD PTR[32+rdi]
  356. vpshufb xmm7,xmm7,xmm0
  357. vmovdqu xmm2,XMMWORD PTR[16+rdi]
  358. vpshufb xmm4,xmm4,xmm0
  359. vmovdqu xmm3,XMMWORD PTR[rdi]
  360. vpshufb xmm5,xmm5,xmm0
  361. vmovdqu XMMWORD PTR[48+rsp],xmm4
  362. vpshufb xmm6,xmm6,xmm0
  363. vmovdqu XMMWORD PTR[64+rsp],xmm5
  364. vpshufb xmm2,xmm2,xmm0
  365. vmovdqu XMMWORD PTR[80+rsp],xmm6
  366. vpshufb xmm3,xmm3,xmm0
  367. vmovdqu XMMWORD PTR[96+rsp],xmm2
  368. vmovdqu XMMWORD PTR[112+rsp],xmm3
  369. call _aesni_ctr32_ghash_6x
  370. vmovups XMMWORD PTR[(-96)+rsi],xmm9
  371. vmovups XMMWORD PTR[(-80)+rsi],xmm10
  372. vmovups XMMWORD PTR[(-64)+rsi],xmm11
  373. vmovups XMMWORD PTR[(-48)+rsi],xmm12
  374. vmovups XMMWORD PTR[(-32)+rsi],xmm13
  375. vmovups XMMWORD PTR[(-16)+rsi],xmm14
  376. vpshufb xmm8,xmm8,XMMWORD PTR[r11]
  377. vmovdqu XMMWORD PTR[(-64)+r9],xmm8
  378. vzeroupper
  379. movaps xmm6,XMMWORD PTR[((-216))+rax]
  380. movaps xmm7,XMMWORD PTR[((-200))+rax]
  381. movaps xmm8,XMMWORD PTR[((-184))+rax]
  382. movaps xmm9,XMMWORD PTR[((-168))+rax]
  383. movaps xmm10,XMMWORD PTR[((-152))+rax]
  384. movaps xmm11,XMMWORD PTR[((-136))+rax]
  385. movaps xmm12,XMMWORD PTR[((-120))+rax]
  386. movaps xmm13,XMMWORD PTR[((-104))+rax]
  387. movaps xmm14,XMMWORD PTR[((-88))+rax]
  388. movaps xmm15,XMMWORD PTR[((-72))+rax]
  389. mov r15,QWORD PTR[((-48))+rax]
  390. mov r14,QWORD PTR[((-40))+rax]
  391. mov r13,QWORD PTR[((-32))+rax]
  392. mov r12,QWORD PTR[((-24))+rax]
  393. mov rbp,QWORD PTR[((-16))+rax]
  394. mov rbx,QWORD PTR[((-8))+rax]
  395. lea rsp,QWORD PTR[rax]
  396. $L$gcm_dec_abort::
  397. mov rax,r10
  398. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  399. mov rsi,QWORD PTR[16+rsp]
  400. DB 0F3h,0C3h ;repret
  401. $L$SEH_end_aesni_gcm_decrypt::
  402. aesni_gcm_decrypt ENDP
  403. ALIGN 32
  404. _aesni_ctr32_6x PROC PRIVATE
  405. vmovdqu xmm4,XMMWORD PTR[((0-128))+rcx]
  406. vmovdqu xmm2,XMMWORD PTR[32+r11]
  407. lea r13,QWORD PTR[((-1))+rbp]
  408. vmovups xmm15,XMMWORD PTR[((16-128))+rcx]
  409. lea r12,QWORD PTR[((32-128))+rcx]
  410. vpxor xmm9,xmm1,xmm4
  411. add ebx,100663296
  412. jc $L$handle_ctr32_2
  413. vpaddb xmm10,xmm1,xmm2
  414. vpaddb xmm11,xmm10,xmm2
  415. vpxor xmm10,xmm10,xmm4
  416. vpaddb xmm12,xmm11,xmm2
  417. vpxor xmm11,xmm11,xmm4
  418. vpaddb xmm13,xmm12,xmm2
  419. vpxor xmm12,xmm12,xmm4
  420. vpaddb xmm14,xmm13,xmm2
  421. vpxor xmm13,xmm13,xmm4
  422. vpaddb xmm1,xmm14,xmm2
  423. vpxor xmm14,xmm14,xmm4
  424. jmp $L$oop_ctr32
  425. ALIGN 16
  426. $L$oop_ctr32::
  427. vaesenc xmm9,xmm9,xmm15
  428. vaesenc xmm10,xmm10,xmm15
  429. vaesenc xmm11,xmm11,xmm15
  430. vaesenc xmm12,xmm12,xmm15
  431. vaesenc xmm13,xmm13,xmm15
  432. vaesenc xmm14,xmm14,xmm15
  433. vmovups xmm15,XMMWORD PTR[r12]
  434. lea r12,QWORD PTR[16+r12]
  435. dec r13d
  436. jnz $L$oop_ctr32
  437. vmovdqu xmm3,XMMWORD PTR[r12]
  438. vaesenc xmm9,xmm9,xmm15
  439. vpxor xmm4,xmm3,XMMWORD PTR[rdi]
  440. vaesenc xmm10,xmm10,xmm15
  441. vpxor xmm5,xmm3,XMMWORD PTR[16+rdi]
  442. vaesenc xmm11,xmm11,xmm15
  443. vpxor xmm6,xmm3,XMMWORD PTR[32+rdi]
  444. vaesenc xmm12,xmm12,xmm15
  445. vpxor xmm8,xmm3,XMMWORD PTR[48+rdi]
  446. vaesenc xmm13,xmm13,xmm15
  447. vpxor xmm2,xmm3,XMMWORD PTR[64+rdi]
  448. vaesenc xmm14,xmm14,xmm15
  449. vpxor xmm3,xmm3,XMMWORD PTR[80+rdi]
  450. lea rdi,QWORD PTR[96+rdi]
  451. vaesenclast xmm9,xmm9,xmm4
  452. vaesenclast xmm10,xmm10,xmm5
  453. vaesenclast xmm11,xmm11,xmm6
  454. vaesenclast xmm12,xmm12,xmm8
  455. vaesenclast xmm13,xmm13,xmm2
  456. vaesenclast xmm14,xmm14,xmm3
  457. vmovups XMMWORD PTR[rsi],xmm9
  458. vmovups XMMWORD PTR[16+rsi],xmm10
  459. vmovups XMMWORD PTR[32+rsi],xmm11
  460. vmovups XMMWORD PTR[48+rsi],xmm12
  461. vmovups XMMWORD PTR[64+rsi],xmm13
  462. vmovups XMMWORD PTR[80+rsi],xmm14
  463. lea rsi,QWORD PTR[96+rsi]
  464. DB 0F3h,0C3h ;repret
  465. ALIGN 32
  466. $L$handle_ctr32_2::
  467. vpshufb xmm6,xmm1,xmm0
  468. vmovdqu xmm5,XMMWORD PTR[48+r11]
  469. vpaddd xmm10,xmm6,XMMWORD PTR[64+r11]
  470. vpaddd xmm11,xmm6,xmm5
  471. vpaddd xmm12,xmm10,xmm5
  472. vpshufb xmm10,xmm10,xmm0
  473. vpaddd xmm13,xmm11,xmm5
  474. vpshufb xmm11,xmm11,xmm0
  475. vpxor xmm10,xmm10,xmm4
  476. vpaddd xmm14,xmm12,xmm5
  477. vpshufb xmm12,xmm12,xmm0
  478. vpxor xmm11,xmm11,xmm4
  479. vpaddd xmm1,xmm13,xmm5
  480. vpshufb xmm13,xmm13,xmm0
  481. vpxor xmm12,xmm12,xmm4
  482. vpshufb xmm14,xmm14,xmm0
  483. vpxor xmm13,xmm13,xmm4
  484. vpshufb xmm1,xmm1,xmm0
  485. vpxor xmm14,xmm14,xmm4
  486. jmp $L$oop_ctr32
  487. _aesni_ctr32_6x ENDP
  488. PUBLIC aesni_gcm_encrypt
  489. ALIGN 32
  490. aesni_gcm_encrypt PROC PUBLIC
  491. mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
  492. mov QWORD PTR[16+rsp],rsi
  493. mov rax,rsp
  494. $L$SEH_begin_aesni_gcm_encrypt::
  495. mov rdi,rcx
  496. mov rsi,rdx
  497. mov rdx,r8
  498. mov rcx,r9
  499. mov r8,QWORD PTR[40+rsp]
  500. mov r9,QWORD PTR[48+rsp]
  501. xor r10,r10
  502. cmp rdx,060h*3
  503. jb $L$gcm_enc_abort
  504. lea rax,QWORD PTR[rsp]
  505. push rbx
  506. push rbp
  507. push r12
  508. push r13
  509. push r14
  510. push r15
  511. lea rsp,QWORD PTR[((-168))+rsp]
  512. movaps XMMWORD PTR[(-216)+rax],xmm6
  513. movaps XMMWORD PTR[(-200)+rax],xmm7
  514. movaps XMMWORD PTR[(-184)+rax],xmm8
  515. movaps XMMWORD PTR[(-168)+rax],xmm9
  516. movaps XMMWORD PTR[(-152)+rax],xmm10
  517. movaps XMMWORD PTR[(-136)+rax],xmm11
  518. movaps XMMWORD PTR[(-120)+rax],xmm12
  519. movaps XMMWORD PTR[(-104)+rax],xmm13
  520. movaps XMMWORD PTR[(-88)+rax],xmm14
  521. movaps XMMWORD PTR[(-72)+rax],xmm15
  522. $L$gcm_enc_body::
  523. vzeroupper
  524. vmovdqu xmm1,XMMWORD PTR[r8]
  525. add rsp,-128
  526. mov ebx,DWORD PTR[12+r8]
  527. lea r11,QWORD PTR[$L$bswap_mask]
  528. lea r14,QWORD PTR[((-128))+rcx]
  529. mov r15,0f80h
  530. lea rcx,QWORD PTR[128+rcx]
  531. vmovdqu xmm0,XMMWORD PTR[r11]
  532. and rsp,-128
  533. mov ebp,DWORD PTR[((240-128))+rcx]
  534. and r14,r15
  535. and r15,rsp
  536. sub r15,r14
  537. jc $L$enc_no_key_aliasing
  538. cmp r15,768
  539. jnc $L$enc_no_key_aliasing
  540. sub rsp,r15
  541. $L$enc_no_key_aliasing::
  542. lea r14,QWORD PTR[rsi]
  543. lea r15,QWORD PTR[((-192))+rdx*1+rsi]
  544. shr rdx,4
  545. call _aesni_ctr32_6x
  546. vpshufb xmm8,xmm9,xmm0
  547. vpshufb xmm2,xmm10,xmm0
  548. vmovdqu XMMWORD PTR[112+rsp],xmm8
  549. vpshufb xmm4,xmm11,xmm0
  550. vmovdqu XMMWORD PTR[96+rsp],xmm2
  551. vpshufb xmm5,xmm12,xmm0
  552. vmovdqu XMMWORD PTR[80+rsp],xmm4
  553. vpshufb xmm6,xmm13,xmm0
  554. vmovdqu XMMWORD PTR[64+rsp],xmm5
  555. vpshufb xmm7,xmm14,xmm0
  556. vmovdqu XMMWORD PTR[48+rsp],xmm6
  557. call _aesni_ctr32_6x
  558. vmovdqu xmm8,XMMWORD PTR[r9]
  559. lea r9,QWORD PTR[((32+32))+r9]
  560. sub rdx,12
  561. mov r10,060h*2
  562. vpshufb xmm8,xmm8,xmm0
  563. call _aesni_ctr32_ghash_6x
  564. vmovdqu xmm7,XMMWORD PTR[32+rsp]
  565. vmovdqu xmm0,XMMWORD PTR[r11]
  566. vmovdqu xmm3,XMMWORD PTR[((0-32))+r9]
  567. vpunpckhqdq xmm1,xmm7,xmm7
  568. vmovdqu xmm15,XMMWORD PTR[((32-32))+r9]
  569. vmovups XMMWORD PTR[(-96)+rsi],xmm9
  570. vpshufb xmm9,xmm9,xmm0
  571. vpxor xmm1,xmm1,xmm7
  572. vmovups XMMWORD PTR[(-80)+rsi],xmm10
  573. vpshufb xmm10,xmm10,xmm0
  574. vmovups XMMWORD PTR[(-64)+rsi],xmm11
  575. vpshufb xmm11,xmm11,xmm0
  576. vmovups XMMWORD PTR[(-48)+rsi],xmm12
  577. vpshufb xmm12,xmm12,xmm0
  578. vmovups XMMWORD PTR[(-32)+rsi],xmm13
  579. vpshufb xmm13,xmm13,xmm0
  580. vmovups XMMWORD PTR[(-16)+rsi],xmm14
  581. vpshufb xmm14,xmm14,xmm0
  582. vmovdqu XMMWORD PTR[16+rsp],xmm9
  583. vmovdqu xmm6,XMMWORD PTR[48+rsp]
  584. vmovdqu xmm0,XMMWORD PTR[((16-32))+r9]
  585. vpunpckhqdq xmm2,xmm6,xmm6
  586. vpclmulqdq xmm5,xmm7,xmm3,000h
  587. vpxor xmm2,xmm2,xmm6
  588. vpclmulqdq xmm7,xmm7,xmm3,011h
  589. vpclmulqdq xmm1,xmm1,xmm15,000h
  590. vmovdqu xmm9,XMMWORD PTR[64+rsp]
  591. vpclmulqdq xmm4,xmm6,xmm0,000h
  592. vmovdqu xmm3,XMMWORD PTR[((48-32))+r9]
  593. vpxor xmm4,xmm4,xmm5
  594. vpunpckhqdq xmm5,xmm9,xmm9
  595. vpclmulqdq xmm6,xmm6,xmm0,011h
  596. vpxor xmm5,xmm5,xmm9
  597. vpxor xmm6,xmm6,xmm7
  598. vpclmulqdq xmm2,xmm2,xmm15,010h
  599. vmovdqu xmm15,XMMWORD PTR[((80-32))+r9]
  600. vpxor xmm2,xmm2,xmm1
  601. vmovdqu xmm1,XMMWORD PTR[80+rsp]
  602. vpclmulqdq xmm7,xmm9,xmm3,000h
  603. vmovdqu xmm0,XMMWORD PTR[((64-32))+r9]
  604. vpxor xmm7,xmm7,xmm4
  605. vpunpckhqdq xmm4,xmm1,xmm1
  606. vpclmulqdq xmm9,xmm9,xmm3,011h
  607. vpxor xmm4,xmm4,xmm1
  608. vpxor xmm9,xmm9,xmm6
  609. vpclmulqdq xmm5,xmm5,xmm15,000h
  610. vpxor xmm5,xmm5,xmm2
  611. vmovdqu xmm2,XMMWORD PTR[96+rsp]
  612. vpclmulqdq xmm6,xmm1,xmm0,000h
  613. vmovdqu xmm3,XMMWORD PTR[((96-32))+r9]
  614. vpxor xmm6,xmm6,xmm7
  615. vpunpckhqdq xmm7,xmm2,xmm2
  616. vpclmulqdq xmm1,xmm1,xmm0,011h
  617. vpxor xmm7,xmm7,xmm2
  618. vpxor xmm1,xmm1,xmm9
  619. vpclmulqdq xmm4,xmm4,xmm15,010h
  620. vmovdqu xmm15,XMMWORD PTR[((128-32))+r9]
  621. vpxor xmm4,xmm4,xmm5
  622. vpxor xmm8,xmm8,XMMWORD PTR[112+rsp]
  623. vpclmulqdq xmm5,xmm2,xmm3,000h
  624. vmovdqu xmm0,XMMWORD PTR[((112-32))+r9]
  625. vpunpckhqdq xmm9,xmm8,xmm8
  626. vpxor xmm5,xmm5,xmm6
  627. vpclmulqdq xmm2,xmm2,xmm3,011h
  628. vpxor xmm9,xmm9,xmm8
  629. vpxor xmm2,xmm2,xmm1
  630. vpclmulqdq xmm7,xmm7,xmm15,000h
  631. vpxor xmm4,xmm7,xmm4
  632. vpclmulqdq xmm6,xmm8,xmm0,000h
  633. vmovdqu xmm3,XMMWORD PTR[((0-32))+r9]
  634. vpunpckhqdq xmm1,xmm14,xmm14
  635. vpclmulqdq xmm8,xmm8,xmm0,011h
  636. vpxor xmm1,xmm1,xmm14
  637. vpxor xmm5,xmm6,xmm5
  638. vpclmulqdq xmm9,xmm9,xmm15,010h
  639. vmovdqu xmm15,XMMWORD PTR[((32-32))+r9]
  640. vpxor xmm7,xmm8,xmm2
  641. vpxor xmm6,xmm9,xmm4
  642. vmovdqu xmm0,XMMWORD PTR[((16-32))+r9]
  643. vpxor xmm9,xmm7,xmm5
  644. vpclmulqdq xmm4,xmm14,xmm3,000h
  645. vpxor xmm6,xmm6,xmm9
  646. vpunpckhqdq xmm2,xmm13,xmm13
  647. vpclmulqdq xmm14,xmm14,xmm3,011h
  648. vpxor xmm2,xmm2,xmm13
  649. vpslldq xmm9,xmm6,8
  650. vpclmulqdq xmm1,xmm1,xmm15,000h
  651. vpxor xmm8,xmm5,xmm9
  652. vpsrldq xmm6,xmm6,8
  653. vpxor xmm7,xmm7,xmm6
  654. vpclmulqdq xmm5,xmm13,xmm0,000h
  655. vmovdqu xmm3,XMMWORD PTR[((48-32))+r9]
  656. vpxor xmm5,xmm5,xmm4
  657. vpunpckhqdq xmm9,xmm12,xmm12
  658. vpclmulqdq xmm13,xmm13,xmm0,011h
  659. vpxor xmm9,xmm9,xmm12
  660. vpxor xmm13,xmm13,xmm14
  661. vpalignr xmm14,xmm8,xmm8,8
  662. vpclmulqdq xmm2,xmm2,xmm15,010h
  663. vmovdqu xmm15,XMMWORD PTR[((80-32))+r9]
  664. vpxor xmm2,xmm2,xmm1
  665. vpclmulqdq xmm4,xmm12,xmm3,000h
  666. vmovdqu xmm0,XMMWORD PTR[((64-32))+r9]
  667. vpxor xmm4,xmm4,xmm5
  668. vpunpckhqdq xmm1,xmm11,xmm11
  669. vpclmulqdq xmm12,xmm12,xmm3,011h
  670. vpxor xmm1,xmm1,xmm11
  671. vpxor xmm12,xmm12,xmm13
  672. vxorps xmm7,xmm7,XMMWORD PTR[16+rsp]
  673. vpclmulqdq xmm9,xmm9,xmm15,000h
  674. vpxor xmm9,xmm9,xmm2
  675. vpclmulqdq xmm8,xmm8,XMMWORD PTR[16+r11],010h
  676. vxorps xmm8,xmm8,xmm14
  677. vpclmulqdq xmm5,xmm11,xmm0,000h
  678. vmovdqu xmm3,XMMWORD PTR[((96-32))+r9]
  679. vpxor xmm5,xmm5,xmm4
  680. vpunpckhqdq xmm2,xmm10,xmm10
  681. vpclmulqdq xmm11,xmm11,xmm0,011h
  682. vpxor xmm2,xmm2,xmm10
  683. vpalignr xmm14,xmm8,xmm8,8
  684. vpxor xmm11,xmm11,xmm12
  685. vpclmulqdq xmm1,xmm1,xmm15,010h
  686. vmovdqu xmm15,XMMWORD PTR[((128-32))+r9]
  687. vpxor xmm1,xmm1,xmm9
  688. vxorps xmm14,xmm14,xmm7
  689. vpclmulqdq xmm8,xmm8,XMMWORD PTR[16+r11],010h
  690. vxorps xmm8,xmm8,xmm14
  691. vpclmulqdq xmm4,xmm10,xmm3,000h
  692. vmovdqu xmm0,XMMWORD PTR[((112-32))+r9]
  693. vpxor xmm4,xmm4,xmm5
  694. vpunpckhqdq xmm9,xmm8,xmm8
  695. vpclmulqdq xmm10,xmm10,xmm3,011h
  696. vpxor xmm9,xmm9,xmm8
  697. vpxor xmm10,xmm10,xmm11
  698. vpclmulqdq xmm2,xmm2,xmm15,000h
  699. vpxor xmm2,xmm2,xmm1
  700. vpclmulqdq xmm5,xmm8,xmm0,000h
  701. vpclmulqdq xmm7,xmm8,xmm0,011h
  702. vpxor xmm5,xmm5,xmm4
  703. vpclmulqdq xmm6,xmm9,xmm15,010h
  704. vpxor xmm7,xmm7,xmm10
  705. vpxor xmm6,xmm6,xmm2
  706. vpxor xmm4,xmm7,xmm5
  707. vpxor xmm6,xmm6,xmm4
  708. vpslldq xmm1,xmm6,8
  709. vmovdqu xmm3,XMMWORD PTR[16+r11]
  710. vpsrldq xmm6,xmm6,8
  711. vpxor xmm8,xmm5,xmm1
  712. vpxor xmm7,xmm7,xmm6
  713. vpalignr xmm2,xmm8,xmm8,8
  714. vpclmulqdq xmm8,xmm8,xmm3,010h
  715. vpxor xmm8,xmm8,xmm2
  716. vpalignr xmm2,xmm8,xmm8,8
  717. vpclmulqdq xmm8,xmm8,xmm3,010h
  718. vpxor xmm2,xmm2,xmm7
  719. vpxor xmm8,xmm8,xmm2
  720. vpshufb xmm8,xmm8,XMMWORD PTR[r11]
  721. vmovdqu XMMWORD PTR[(-64)+r9],xmm8
  722. vzeroupper
  723. movaps xmm6,XMMWORD PTR[((-216))+rax]
  724. movaps xmm7,XMMWORD PTR[((-200))+rax]
  725. movaps xmm8,XMMWORD PTR[((-184))+rax]
  726. movaps xmm9,XMMWORD PTR[((-168))+rax]
  727. movaps xmm10,XMMWORD PTR[((-152))+rax]
  728. movaps xmm11,XMMWORD PTR[((-136))+rax]
  729. movaps xmm12,XMMWORD PTR[((-120))+rax]
  730. movaps xmm13,XMMWORD PTR[((-104))+rax]
  731. movaps xmm14,XMMWORD PTR[((-88))+rax]
  732. movaps xmm15,XMMWORD PTR[((-72))+rax]
  733. mov r15,QWORD PTR[((-48))+rax]
  734. mov r14,QWORD PTR[((-40))+rax]
  735. mov r13,QWORD PTR[((-32))+rax]
  736. mov r12,QWORD PTR[((-24))+rax]
  737. mov rbp,QWORD PTR[((-16))+rax]
  738. mov rbx,QWORD PTR[((-8))+rax]
  739. lea rsp,QWORD PTR[rax]
  740. $L$gcm_enc_abort::
  741. mov rax,r10
  742. mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
  743. mov rsi,QWORD PTR[16+rsp]
  744. DB 0F3h,0C3h ;repret
  745. $L$SEH_end_aesni_gcm_encrypt::
  746. aesni_gcm_encrypt ENDP
  747. ALIGN 64
  748. $L$bswap_mask::
  749. DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  750. $L$poly::
  751. DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0c2h
  752. $L$one_msb::
  753. DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
  754. $L$two_lsb::
  755. DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  756. $L$one_lsb::
  757. DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
  758. DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
  759. DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
  760. DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
  761. DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
  762. ALIGN 64
  763. EXTERN __imp_RtlVirtualUnwind:NEAR
  764. ALIGN 16
  765. gcm_se_handler PROC PRIVATE
  766. push rsi
  767. push rdi
  768. push rbx
  769. push rbp
  770. push r12
  771. push r13
  772. push r14
  773. push r15
  774. pushfq
  775. sub rsp,64
  776. mov rax,QWORD PTR[120+r8]
  777. mov rbx,QWORD PTR[248+r8]
  778. mov rsi,QWORD PTR[8+r9]
  779. mov r11,QWORD PTR[56+r9]
  780. mov r10d,DWORD PTR[r11]
  781. lea r10,QWORD PTR[r10*1+rsi]
  782. cmp rbx,r10
  783. jb $L$common_seh_tail
  784. mov rax,QWORD PTR[152+r8]
  785. mov r10d,DWORD PTR[4+r11]
  786. lea r10,QWORD PTR[r10*1+rsi]
  787. cmp rbx,r10
  788. jae $L$common_seh_tail
  789. mov rax,QWORD PTR[120+r8]
  790. mov r15,QWORD PTR[((-48))+rax]
  791. mov r14,QWORD PTR[((-40))+rax]
  792. mov r13,QWORD PTR[((-32))+rax]
  793. mov r12,QWORD PTR[((-24))+rax]
  794. mov rbp,QWORD PTR[((-16))+rax]
  795. mov rbx,QWORD PTR[((-8))+rax]
  796. mov QWORD PTR[240+r8],r15
  797. mov QWORD PTR[232+r8],r14
  798. mov QWORD PTR[224+r8],r13
  799. mov QWORD PTR[216+r8],r12
  800. mov QWORD PTR[160+r8],rbp
  801. mov QWORD PTR[144+r8],rbx
  802. lea rsi,QWORD PTR[((-216))+rax]
  803. lea rdi,QWORD PTR[512+r8]
  804. mov ecx,20
  805. DD 0a548f3fch
  806. $L$common_seh_tail::
  807. mov rdi,QWORD PTR[8+rax]
  808. mov rsi,QWORD PTR[16+rax]
  809. mov QWORD PTR[152+r8],rax
  810. mov QWORD PTR[168+r8],rsi
  811. mov QWORD PTR[176+r8],rdi
  812. mov rdi,QWORD PTR[40+r9]
  813. mov rsi,r8
  814. mov ecx,154
  815. DD 0a548f3fch
  816. mov rsi,r9
  817. xor rcx,rcx
  818. mov rdx,QWORD PTR[8+rsi]
  819. mov r8,QWORD PTR[rsi]
  820. mov r9,QWORD PTR[16+rsi]
  821. mov r10,QWORD PTR[40+rsi]
  822. lea r11,QWORD PTR[56+rsi]
  823. lea r12,QWORD PTR[24+rsi]
  824. mov QWORD PTR[32+rsp],r10
  825. mov QWORD PTR[40+rsp],r11
  826. mov QWORD PTR[48+rsp],r12
  827. mov QWORD PTR[56+rsp],rcx
  828. call QWORD PTR[__imp_RtlVirtualUnwind]
  829. mov eax,1
  830. add rsp,64
  831. popfq
  832. pop r15
  833. pop r14
  834. pop r13
  835. pop r12
  836. pop rbp
  837. pop rbx
  838. pop rdi
  839. pop rsi
  840. DB 0F3h,0C3h ;repret
  841. gcm_se_handler ENDP
  842. .text$ ENDS
  843. .pdata SEGMENT READONLY ALIGN(4)
  844. ALIGN 4
  845. DD imagerel $L$SEH_begin_aesni_gcm_decrypt
  846. DD imagerel $L$SEH_end_aesni_gcm_decrypt
  847. DD imagerel $L$SEH_gcm_dec_info
  848. DD imagerel $L$SEH_begin_aesni_gcm_encrypt
  849. DD imagerel $L$SEH_end_aesni_gcm_encrypt
  850. DD imagerel $L$SEH_gcm_enc_info
  851. .pdata ENDS
  852. .xdata SEGMENT READONLY ALIGN(8)
  853. ALIGN 8
  854. $L$SEH_gcm_dec_info::
  855. DB 9,0,0,0
  856. DD imagerel gcm_se_handler
  857. DD imagerel $L$gcm_dec_body,imagerel $L$gcm_dec_abort
  858. $L$SEH_gcm_enc_info::
  859. DB 9,0,0,0
  860. DD imagerel gcm_se_handler
  861. DD imagerel $L$gcm_enc_body,imagerel $L$gcm_enc_abort
  862. .xdata ENDS
  863. END