keccak1600-armv4.S 58 KB


  1. #include "arm_arch.h"
  2. .text
  3. #if defined(__thumb2__)
  4. .syntax unified
  5. .thumb
  6. #else
  7. .code 32
  8. #endif
  9. .type iotas32, %object
  10. .align 5
  11. iotas32:
  12. .long 0x00000001, 0x00000000
  13. .long 0x00000000, 0x00000089
  14. .long 0x00000000, 0x8000008b
  15. .long 0x00000000, 0x80008080
  16. .long 0x00000001, 0x0000008b
  17. .long 0x00000001, 0x00008000
  18. .long 0x00000001, 0x80008088
  19. .long 0x00000001, 0x80000082
  20. .long 0x00000000, 0x0000000b
  21. .long 0x00000000, 0x0000000a
  22. .long 0x00000001, 0x00008082
  23. .long 0x00000000, 0x00008003
  24. .long 0x00000001, 0x0000808b
  25. .long 0x00000001, 0x8000000b
  26. .long 0x00000001, 0x8000008a
  27. .long 0x00000001, 0x80000081
  28. .long 0x00000000, 0x80000081
  29. .long 0x00000000, 0x80000008
  30. .long 0x00000000, 0x00000083
  31. .long 0x00000000, 0x80008003
  32. .long 0x00000001, 0x80008088
  33. .long 0x00000000, 0x80000088
  34. .long 0x00000001, 0x00008000
  35. .long 0x00000000, 0x80008082
  36. .size iotas32,.-iotas32
  37. .type KeccakF1600_int, %function
  38. .align 5
  39. KeccakF1600_int:
  40. add r9,sp,#176
  41. add r12,sp,#0
  42. add r10,sp,#40
  43. ldmia r9,{r4-r9} @ A[4][2..4]
  44. KeccakF1600_enter:
  45. str lr,[sp,#440]
  46. eor r11,r11,r11
  47. str r11,[sp,#444]
  48. b .Lround2x
  49. .align 4
  50. .Lround2x:
  51. ldmia r12,{r0-r3} @ A[0][0..1]
  52. ldmia r10,{r10-r12,r14} @ A[1][0..1]
  53. #ifdef __thumb2__
  54. eor r0,r0,r10
  55. eor r1,r1,r11
  56. eor r2,r2,r12
  57. ldrd r10,r11,[sp,#56]
  58. eor r3,r3,r14
  59. ldrd r12,r14,[sp,#64]
  60. eor r4,r4,r10
  61. eor r5,r5,r11
  62. eor r6,r6,r12
  63. ldrd r10,r11,[sp,#72]
  64. eor r7,r7,r14
  65. ldrd r12,r14,[sp,#80]
  66. eor r8,r8,r10
  67. eor r9,r9,r11
  68. eor r0,r0,r12
  69. ldrd r10,r11,[sp,#88]
  70. eor r1,r1,r14
  71. ldrd r12,r14,[sp,#96]
  72. eor r2,r2,r10
  73. eor r3,r3,r11
  74. eor r4,r4,r12
  75. ldrd r10,r11,[sp,#104]
  76. eor r5,r5,r14
  77. ldrd r12,r14,[sp,#112]
  78. eor r6,r6,r10
  79. eor r7,r7,r11
  80. eor r8,r8,r12
  81. ldrd r10,r11,[sp,#120]
  82. eor r9,r9,r14
  83. ldrd r12,r14,[sp,#128]
  84. eor r0,r0,r10
  85. eor r1,r1,r11
  86. eor r2,r2,r12
  87. ldrd r10,r11,[sp,#136]
  88. eor r3,r3,r14
  89. ldrd r12,r14,[sp,#144]
  90. eor r4,r4,r10
  91. eor r5,r5,r11
  92. eor r6,r6,r12
  93. ldrd r10,r11,[sp,#152]
  94. eor r7,r7,r14
  95. ldrd r12,r14,[sp,#160]
  96. eor r8,r8,r10
  97. eor r9,r9,r11
  98. eor r0,r0,r12
  99. ldrd r10,r11,[sp,#168]
  100. eor r1,r1,r14
  101. ldrd r12,r14,[sp,#16]
  102. eor r2,r2,r10
  103. eor r3,r3,r11
  104. eor r4,r4,r12
  105. ldrd r10,r11,[sp,#24]
  106. eor r5,r5,r14
  107. ldrd r12,r14,[sp,#32]
  108. #else
  109. eor r0,r0,r10
  110. add r10,sp,#56
  111. eor r1,r1,r11
  112. eor r2,r2,r12
  113. eor r3,r3,r14
  114. ldmia r10,{r10-r12,r14} @ A[1][2..3]
  115. eor r4,r4,r10
  116. add r10,sp,#72
  117. eor r5,r5,r11
  118. eor r6,r6,r12
  119. eor r7,r7,r14
  120. ldmia r10,{r10-r12,r14} @ A[1][4]..A[2][0]
  121. eor r8,r8,r10
  122. add r10,sp,#88
  123. eor r9,r9,r11
  124. eor r0,r0,r12
  125. eor r1,r1,r14
  126. ldmia r10,{r10-r12,r14} @ A[2][1..2]
  127. eor r2,r2,r10
  128. add r10,sp,#104
  129. eor r3,r3,r11
  130. eor r4,r4,r12
  131. eor r5,r5,r14
  132. ldmia r10,{r10-r12,r14} @ A[2][3..4]
  133. eor r6,r6,r10
  134. add r10,sp,#120
  135. eor r7,r7,r11
  136. eor r8,r8,r12
  137. eor r9,r9,r14
  138. ldmia r10,{r10-r12,r14} @ A[3][0..1]
  139. eor r0,r0,r10
  140. add r10,sp,#136
  141. eor r1,r1,r11
  142. eor r2,r2,r12
  143. eor r3,r3,r14
  144. ldmia r10,{r10-r12,r14} @ A[3][2..3]
  145. eor r4,r4,r10
  146. add r10,sp,#152
  147. eor r5,r5,r11
  148. eor r6,r6,r12
  149. eor r7,r7,r14
  150. ldmia r10,{r10-r12,r14} @ A[3][4]..A[4][0]
  151. eor r8,r8,r10
  152. ldr r10,[sp,#168] @ A[4][1]
  153. eor r9,r9,r11
  154. ldr r11,[sp,#168+4]
  155. eor r0,r0,r12
  156. ldr r12,[sp,#16] @ A[0][2]
  157. eor r1,r1,r14
  158. ldr r14,[sp,#16+4]
  159. eor r2,r2,r10
  160. add r10,sp,#24
  161. eor r3,r3,r11
  162. eor r4,r4,r12
  163. eor r5,r5,r14
  164. ldmia r10,{r10-r12,r14} @ A[0][3..4]
  165. #endif
  166. eor r6,r6,r10
  167. eor r7,r7,r11
  168. eor r8,r8,r12
  169. eor r9,r9,r14
  170. eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
  171. #ifndef __thumb2__
  172. str r10,[sp,#208] @ D[1] = E[0]
  173. #endif
  174. eor r11,r1,r4
  175. #ifndef __thumb2__
  176. str r11,[sp,#208+4]
  177. #else
  178. strd r10,r11,[sp,#208] @ D[1] = E[0]
  179. #endif
  180. eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
  181. eor r14,r7,r0
  182. #ifndef __thumb2__
  183. str r12,[sp,#232] @ D[4] = E[1]
  184. #endif
  185. eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
  186. #ifndef __thumb2__
  187. str r14,[sp,#232+4]
  188. #else
  189. strd r12,r14,[sp,#232] @ D[4] = E[1]
  190. #endif
  191. eor r1,r9,r2
  192. #ifndef __thumb2__
  193. str r0,[sp,#200] @ D[0] = C[0]
  194. #endif
  195. eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
  196. #ifndef __thumb2__
  197. ldr r7,[sp,#144]
  198. #endif
  199. eor r3,r3,r6
  200. #ifndef __thumb2__
  201. str r1,[sp,#200+4]
  202. #else
  203. strd r0,r1,[sp,#200] @ D[0] = C[0]
  204. #endif
  205. #ifndef __thumb2__
  206. ldr r6,[sp,#144+4]
  207. #else
  208. ldrd r7,r6,[sp,#144]
  209. #endif
  210. #ifndef __thumb2__
  211. str r2,[sp,#216] @ D[2] = C[1]
  212. #endif
  213. eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
  214. #ifndef __thumb2__
  215. str r3,[sp,#216+4]
  216. #else
  217. strd r2,r3,[sp,#216] @ D[2] = C[1]
  218. #endif
  219. eor r5,r5,r8
  220. #ifndef __thumb2__
  221. ldr r8,[sp,#192]
  222. #endif
  223. #ifndef __thumb2__
  224. ldr r9,[sp,#192+4]
  225. #else
  226. ldrd r8,r9,[sp,#192]
  227. #endif
  228. #ifndef __thumb2__
  229. str r4,[sp,#224] @ D[3] = C[2]
  230. #endif
  231. eor r7,r7,r4
  232. #ifndef __thumb2__
  233. str r5,[sp,#224+4]
  234. #else
  235. strd r4,r5,[sp,#224] @ D[3] = C[2]
  236. #endif
  237. eor r6,r6,r5
  238. #ifndef __thumb2__
  239. ldr r4,[sp,#0]
  240. #endif
  241. @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
  242. @ mov r6,r6,ror#32-11
  243. #ifndef __thumb2__
  244. ldr r5,[sp,#0+4]
  245. #else
  246. ldrd r4,r5,[sp,#0]
  247. #endif
  248. eor r8,r8,r12
  249. eor r9,r9,r14
  250. #ifndef __thumb2__
  251. ldr r12,[sp,#96]
  252. #endif
  253. eor r0,r0,r4
  254. #ifndef __thumb2__
  255. ldr r14,[sp,#96+4]
  256. #else
  257. ldrd r12,r14,[sp,#96]
  258. #endif
  259. @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
  260. @ mov r9,r9,ror#32-7
  261. eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
  262. eor r12,r12,r2
  263. #ifndef __thumb2__
  264. ldr r2,[sp,#48]
  265. #endif
  266. eor r14,r14,r3
  267. #ifndef __thumb2__
  268. ldr r3,[sp,#48+4]
  269. #else
  270. ldrd r2,r3,[sp,#48]
  271. #endif
  272. mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
  273. ldr r12,[sp,#444] @ load counter
  274. eor r2,r2,r10
  275. adr r10,iotas32
  276. mov r4,r14,ror#32-22
  277. add r14,r10,r12
  278. eor r3,r3,r11
  279. ldmia r14,{r10,r11} @ iotas[i]
  280. bic r12,r4,r2,ror#32-22
  281. bic r14,r5,r3,ror#32-22
  282. mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
  283. mov r3,r3,ror#32-22
  284. eor r12,r12,r0
  285. eor r14,r14,r1
  286. eor r10,r10,r12
  287. eor r11,r11,r14
  288. #ifndef __thumb2__
  289. str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
  290. #endif
  291. bic r12,r6,r4,ror#11
  292. #ifndef __thumb2__
  293. str r11,[sp,#240+4]
  294. #else
  295. strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
  296. #endif
  297. bic r14,r7,r5,ror#10
  298. bic r10,r8,r6,ror#32-(11-7)
  299. bic r11,r9,r7,ror#32-(10-7)
  300. eor r12,r2,r12,ror#32-11
  301. #ifndef __thumb2__
  302. str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
  303. #endif
  304. eor r14,r3,r14,ror#32-10
  305. #ifndef __thumb2__
  306. str r14,[sp,#248+4]
  307. #else
  308. strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
  309. #endif
  310. eor r10,r4,r10,ror#32-7
  311. eor r11,r5,r11,ror#32-7
  312. #ifndef __thumb2__
  313. str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
  314. #endif
  315. bic r12,r0,r8,ror#32-7
  316. #ifndef __thumb2__
  317. str r11,[sp,#256+4]
  318. #else
  319. strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
  320. #endif
  321. bic r14,r1,r9,ror#32-7
  322. eor r12,r12,r6,ror#32-11
  323. #ifndef __thumb2__
  324. str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
  325. #endif
  326. eor r14,r14,r7,ror#32-10
  327. #ifndef __thumb2__
  328. str r14,[sp,#264+4]
  329. #else
  330. strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
  331. #endif
  332. bic r10,r2,r0
  333. add r14,sp,#224
  334. #ifndef __thumb2__
  335. ldr r0,[sp,#24] @ A[0][3]
  336. #endif
  337. bic r11,r3,r1
  338. #ifndef __thumb2__
  339. ldr r1,[sp,#24+4]
  340. #else
  341. ldrd r0,r1,[sp,#24] @ A[0][3]
  342. #endif
  343. eor r10,r10,r8,ror#32-7
  344. eor r11,r11,r9,ror#32-7
  345. #ifndef __thumb2__
  346. str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
  347. #endif
  348. add r9,sp,#200
  349. #ifndef __thumb2__
  350. str r11,[sp,#272+4]
  351. #else
  352. strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
  353. #endif
  354. ldmia r14,{r10-r12,r14} @ D[3..4]
  355. ldmia r9,{r6-r9} @ D[0..1]
  356. #ifndef __thumb2__
  357. ldr r2,[sp,#72] @ A[1][4]
  358. #endif
  359. eor r0,r0,r10
  360. #ifndef __thumb2__
  361. ldr r3,[sp,#72+4]
  362. #else
  363. ldrd r2,r3,[sp,#72] @ A[1][4]
  364. #endif
  365. eor r1,r1,r11
  366. @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
  367. #ifndef __thumb2__
  368. ldr r10,[sp,#128] @ A[3][1]
  369. #endif
  370. @ mov r1,r1,ror#32-14
  371. #ifndef __thumb2__
  372. ldr r11,[sp,#128+4]
  373. #else
  374. ldrd r10,r11,[sp,#128] @ A[3][1]
  375. #endif
  376. eor r2,r2,r12
  377. #ifndef __thumb2__
  378. ldr r4,[sp,#80] @ A[2][0]
  379. #endif
  380. eor r3,r3,r14
  381. #ifndef __thumb2__
  382. ldr r5,[sp,#80+4]
  383. #else
  384. ldrd r4,r5,[sp,#80] @ A[2][0]
  385. #endif
  386. @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
  387. @ mov r3,r3,ror#32-10
  388. eor r6,r6,r4
  389. #ifndef __thumb2__
  390. ldr r12,[sp,#216] @ D[2]
  391. #endif
  392. eor r7,r7,r5
  393. #ifndef __thumb2__
  394. ldr r14,[sp,#216+4]
  395. #else
  396. ldrd r12,r14,[sp,#216] @ D[2]
  397. #endif
  398. mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
  399. mov r4,r7,ror#32-2
  400. eor r10,r10,r8
  401. #ifndef __thumb2__
  402. ldr r8,[sp,#176] @ A[4][2]
  403. #endif
  404. eor r11,r11,r9
  405. #ifndef __thumb2__
  406. ldr r9,[sp,#176+4]
  407. #else
  408. ldrd r8,r9,[sp,#176] @ A[4][2]
  409. #endif
  410. mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
  411. mov r6,r11,ror#32-23
  412. bic r10,r4,r2,ror#32-10
  413. bic r11,r5,r3,ror#32-10
  414. eor r12,r12,r8
  415. eor r14,r14,r9
  416. mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
  417. mov r8,r14,ror#32-31
  418. eor r10,r10,r0,ror#32-14
  419. eor r11,r11,r1,ror#32-14
  420. #ifndef __thumb2__
  421. str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
  422. #endif
  423. bic r12,r6,r4
  424. #ifndef __thumb2__
  425. str r11,[sp,#280+4]
  426. #else
  427. strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
  428. #endif
  429. bic r14,r7,r5
  430. eor r12,r12,r2,ror#32-10
  431. #ifndef __thumb2__
  432. str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
  433. #endif
  434. eor r14,r14,r3,ror#32-10
  435. #ifndef __thumb2__
  436. str r14,[sp,#288+4]
  437. #else
  438. strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
  439. #endif
  440. bic r10,r8,r6
  441. bic r11,r9,r7
  442. bic r12,r0,r8,ror#14
  443. bic r14,r1,r9,ror#14
  444. eor r10,r10,r4
  445. eor r11,r11,r5
  446. #ifndef __thumb2__
  447. str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
  448. #endif
  449. bic r2,r2,r0,ror#32-(14-10)
  450. #ifndef __thumb2__
  451. str r11,[sp,#296+4]
  452. #else
  453. strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
  454. #endif
  455. eor r12,r6,r12,ror#32-14
  456. bic r11,r3,r1,ror#32-(14-10)
  457. #ifndef __thumb2__
  458. str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
  459. #endif
  460. eor r14,r7,r14,ror#32-14
  461. #ifndef __thumb2__
  462. str r14,[sp,#304+4]
  463. #else
  464. strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
  465. #endif
  466. add r12,sp,#208
  467. #ifndef __thumb2__
  468. ldr r1,[sp,#8] @ A[0][1]
  469. #endif
  470. eor r10,r8,r2,ror#32-10
  471. #ifndef __thumb2__
  472. ldr r0,[sp,#8+4]
  473. #else
  474. ldrd r1,r0,[sp,#8] @ A[0][1]
  475. #endif
  476. eor r11,r9,r11,ror#32-10
  477. #ifndef __thumb2__
  478. str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
  479. #endif
  480. #ifndef __thumb2__
  481. str r11,[sp,#312+4]
  482. #else
  483. strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
  484. #endif
  485. add r9,sp,#224
  486. ldmia r12,{r10-r12,r14} @ D[1..2]
  487. #ifndef __thumb2__
  488. ldr r2,[sp,#56] @ A[1][2]
  489. #endif
  490. #ifndef __thumb2__
  491. ldr r3,[sp,#56+4]
  492. #else
  493. ldrd r2,r3,[sp,#56] @ A[1][2]
  494. #endif
  495. ldmia r9,{r6-r9} @ D[3..4]
  496. eor r1,r1,r10
  497. #ifndef __thumb2__
  498. ldr r4,[sp,#104] @ A[2][3]
  499. #endif
  500. eor r0,r0,r11
  501. #ifndef __thumb2__
  502. ldr r5,[sp,#104+4]
  503. #else
  504. ldrd r4,r5,[sp,#104] @ A[2][3]
  505. #endif
  506. mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
  507. eor r2,r2,r12
  508. #ifndef __thumb2__
  509. ldr r10,[sp,#152] @ A[3][4]
  510. #endif
  511. eor r3,r3,r14
  512. #ifndef __thumb2__
  513. ldr r11,[sp,#152+4]
  514. #else
  515. ldrd r10,r11,[sp,#152] @ A[3][4]
  516. #endif
  517. @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
  518. #ifndef __thumb2__
  519. ldr r12,[sp,#200] @ D[0]
  520. #endif
  521. @ mov r3,r3,ror#32-3
  522. #ifndef __thumb2__
  523. ldr r14,[sp,#200+4]
  524. #else
  525. ldrd r12,r14,[sp,#200] @ D[0]
  526. #endif
  527. eor r4,r4,r6
  528. eor r5,r5,r7
  529. @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
  530. @ mov r4,r7,ror#32-13 @ [track reverse order below]
  531. eor r10,r10,r8
  532. #ifndef __thumb2__
  533. ldr r8,[sp,#160] @ A[4][0]
  534. #endif
  535. eor r11,r11,r9
  536. #ifndef __thumb2__
  537. ldr r9,[sp,#160+4]
  538. #else
  539. ldrd r8,r9,[sp,#160] @ A[4][0]
  540. #endif
  541. mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
  542. mov r7,r11,ror#32-4
  543. eor r12,r12,r8
  544. eor r14,r14,r9
  545. mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
  546. mov r9,r14,ror#32-9
  547. bic r10,r5,r2,ror#13-3
  548. bic r11,r4,r3,ror#12-3
  549. bic r12,r6,r5,ror#32-13
  550. bic r14,r7,r4,ror#32-12
  551. eor r10,r0,r10,ror#32-13
  552. eor r11,r1,r11,ror#32-12
  553. #ifndef __thumb2__
  554. str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
  555. #endif
  556. eor r12,r12,r2,ror#32-3
  557. #ifndef __thumb2__
  558. str r11,[sp,#320+4]
  559. #else
  560. strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
  561. #endif
  562. eor r14,r14,r3,ror#32-3
  563. #ifndef __thumb2__
  564. str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
  565. #endif
  566. bic r10,r8,r6
  567. bic r11,r9,r7
  568. #ifndef __thumb2__
  569. str r14,[sp,#328+4]
  570. #else
  571. strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
  572. #endif
  573. eor r10,r10,r5,ror#32-13
  574. eor r11,r11,r4,ror#32-12
  575. #ifndef __thumb2__
  576. str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
  577. #endif
  578. bic r12,r0,r8
  579. #ifndef __thumb2__
  580. str r11,[sp,#336+4]
  581. #else
  582. strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
  583. #endif
  584. bic r14,r1,r9
  585. eor r12,r12,r6
  586. eor r14,r14,r7
  587. #ifndef __thumb2__
  588. str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
  589. #endif
  590. bic r10,r2,r0,ror#3
  591. #ifndef __thumb2__
  592. str r14,[sp,#344+4]
  593. #else
  594. strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
  595. #endif
  596. bic r11,r3,r1,ror#3
  597. #ifndef __thumb2__
  598. ldr r1,[sp,#32] @ A[0][4] [in reverse order]
  599. #endif
  600. eor r10,r8,r10,ror#32-3
  601. #ifndef __thumb2__
  602. ldr r0,[sp,#32+4]
  603. #else
  604. ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order]
  605. #endif
  606. eor r11,r9,r11,ror#32-3
  607. #ifndef __thumb2__
  608. str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
  609. #endif
  610. add r9,sp,#208
  611. #ifndef __thumb2__
  612. str r11,[sp,#352+4]
  613. #else
  614. strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
  615. #endif
  616. #ifndef __thumb2__
  617. ldr r10,[sp,#232] @ D[4]
  618. #endif
  619. #ifndef __thumb2__
  620. ldr r11,[sp,#232+4]
  621. #else
  622. ldrd r10,r11,[sp,#232] @ D[4]
  623. #endif
  624. #ifndef __thumb2__
  625. ldr r12,[sp,#200] @ D[0]
  626. #endif
  627. #ifndef __thumb2__
  628. ldr r14,[sp,#200+4]
  629. #else
  630. ldrd r12,r14,[sp,#200] @ D[0]
  631. #endif
  632. ldmia r9,{r6-r9} @ D[1..2]
  633. eor r1,r1,r10
  634. #ifndef __thumb2__
  635. ldr r2,[sp,#40] @ A[1][0]
  636. #endif
  637. eor r0,r0,r11
  638. #ifndef __thumb2__
  639. ldr r3,[sp,#40+4]
  640. #else
  641. ldrd r2,r3,[sp,#40] @ A[1][0]
  642. #endif
  643. @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
  644. #ifndef __thumb2__
  645. ldr r4,[sp,#88] @ A[2][1]
  646. #endif
  647. @ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
  648. #ifndef __thumb2__
  649. ldr r5,[sp,#88+4]
  650. #else
  651. ldrd r4,r5,[sp,#88] @ A[2][1]
  652. #endif
  653. eor r2,r2,r12
  654. #ifndef __thumb2__
  655. ldr r10,[sp,#136] @ A[3][2]
  656. #endif
  657. eor r3,r3,r14
  658. #ifndef __thumb2__
  659. ldr r11,[sp,#136+4]
  660. #else
  661. ldrd r10,r11,[sp,#136] @ A[3][2]
  662. #endif
  663. @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
  664. #ifndef __thumb2__
  665. ldr r12,[sp,#224] @ D[3]
  666. #endif
  667. @ mov r3,r3,ror#32-18
  668. #ifndef __thumb2__
  669. ldr r14,[sp,#224+4]
  670. #else
  671. ldrd r12,r14,[sp,#224] @ D[3]
  672. #endif
  673. eor r6,r6,r4
  674. eor r7,r7,r5
  675. mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
  676. mov r5,r7,ror#32-5
  677. eor r10,r10,r8
  678. #ifndef __thumb2__
  679. ldr r8,[sp,#184] @ A[4][3]
  680. #endif
  681. eor r11,r11,r9
  682. #ifndef __thumb2__
  683. ldr r9,[sp,#184+4]
  684. #else
  685. ldrd r8,r9,[sp,#184] @ A[4][3]
  686. #endif
  687. mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
  688. mov r6,r11,ror#32-8
  689. eor r12,r12,r8
  690. eor r14,r14,r9
  691. mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
  692. mov r9,r14,ror#32-28
  693. bic r10,r4,r2,ror#32-18
  694. bic r11,r5,r3,ror#32-18
  695. eor r10,r10,r0,ror#32-14
  696. eor r11,r11,r1,ror#32-13
  697. #ifndef __thumb2__
  698. str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
  699. #endif
  700. bic r12,r6,r4
  701. #ifndef __thumb2__
  702. str r11,[sp,#360+4]
  703. #else
  704. strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
  705. #endif
  706. bic r14,r7,r5
  707. eor r12,r12,r2,ror#32-18
  708. #ifndef __thumb2__
  709. str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
  710. #endif
  711. eor r14,r14,r3,ror#32-18
  712. #ifndef __thumb2__
  713. str r14,[sp,#368+4]
  714. #else
  715. strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
  716. #endif
  717. bic r10,r8,r6
  718. bic r11,r9,r7
  719. bic r12,r0,r8,ror#14
  720. bic r14,r1,r9,ror#13
  721. eor r10,r10,r4
  722. eor r11,r11,r5
  723. #ifndef __thumb2__
  724. str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
  725. #endif
  726. bic r2,r2,r0,ror#18-14
  727. #ifndef __thumb2__
  728. str r11,[sp,#376+4]
  729. #else
  730. strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
  731. #endif
  732. eor r12,r6,r12,ror#32-14
  733. bic r11,r3,r1,ror#18-13
  734. eor r14,r7,r14,ror#32-13
  735. #ifndef __thumb2__
  736. str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
  737. #endif
  738. #ifndef __thumb2__
  739. str r14,[sp,#384+4]
  740. #else
  741. strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
  742. #endif
  743. add r14,sp,#216
  744. #ifndef __thumb2__
  745. ldr r0,[sp,#16] @ A[0][2]
  746. #endif
  747. eor r10,r8,r2,ror#32-18
  748. #ifndef __thumb2__
  749. ldr r1,[sp,#16+4]
  750. #else
  751. ldrd r0,r1,[sp,#16] @ A[0][2]
  752. #endif
  753. eor r11,r9,r11,ror#32-18
  754. #ifndef __thumb2__
  755. str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
  756. #endif
  757. #ifndef __thumb2__
  758. str r11,[sp,#392+4]
  759. #else
  760. strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
  761. #endif
  762. ldmia r14,{r10-r12,r14} @ D[2..3]
  763. #ifndef __thumb2__
  764. ldr r2,[sp,#64] @ A[1][3]
  765. #endif
  766. #ifndef __thumb2__
  767. ldr r3,[sp,#64+4]
  768. #else
  769. ldrd r2,r3,[sp,#64] @ A[1][3]
  770. #endif
  771. #ifndef __thumb2__
  772. ldr r6,[sp,#232] @ D[4]
  773. #endif
  774. #ifndef __thumb2__
  775. ldr r7,[sp,#232+4]
  776. #else
  777. ldrd r6,r7,[sp,#232] @ D[4]
  778. #endif
  779. eor r0,r0,r10
  780. #ifndef __thumb2__
  781. ldr r4,[sp,#112] @ A[2][4]
  782. #endif
  783. eor r1,r1,r11
  784. #ifndef __thumb2__
  785. ldr r5,[sp,#112+4]
  786. #else
  787. ldrd r4,r5,[sp,#112] @ A[2][4]
  788. #endif
  789. @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
  790. #ifndef __thumb2__
  791. ldr r8,[sp,#200] @ D[0]
  792. #endif
  793. @ mov r1,r1,ror#32-31
  794. #ifndef __thumb2__
  795. ldr r9,[sp,#200+4]
  796. #else
  797. ldrd r8,r9,[sp,#200] @ D[0]
  798. #endif
  799. eor r12,r12,r2
  800. #ifndef __thumb2__
  801. ldr r10,[sp,#120] @ A[3][0]
  802. #endif
  803. eor r14,r14,r3
  804. #ifndef __thumb2__
  805. ldr r11,[sp,#120+4]
  806. #else
  807. ldrd r10,r11,[sp,#120] @ A[3][0]
  808. #endif
  809. mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
  810. #ifndef __thumb2__
  811. ldr r12,[sp,#208] @ D[1]
  812. #endif
  813. mov r2,r14,ror#32-28
  814. #ifndef __thumb2__
  815. ldr r14,[sp,#208+4]
  816. #else
  817. ldrd r12,r14,[sp,#208] @ D[1]
  818. #endif
  819. eor r6,r6,r4
  820. eor r7,r7,r5
  821. mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
  822. mov r4,r7,ror#32-20
  823. eor r10,r10,r8
  824. #ifndef __thumb2__
  825. ldr r8,[sp,#168] @ A[4][1]
  826. #endif
  827. eor r11,r11,r9
  828. #ifndef __thumb2__
  829. ldr r9,[sp,#168+4]
  830. #else
  831. ldrd r8,r9,[sp,#168] @ A[4][1]
  832. #endif
  833. mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
  834. mov r6,r11,ror#32-21
  835. eor r8,r8,r12
  836. eor r9,r9,r14
  837. @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
  838. @ mov r9,r3,ror#32-1
  839. bic r10,r4,r2
  840. bic r11,r5,r3
  841. eor r10,r10,r0,ror#32-31
  842. #ifndef __thumb2__
  843. str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
  844. #endif
  845. eor r11,r11,r1,ror#32-31
  846. #ifndef __thumb2__
  847. str r11,[sp,#400+4]
  848. #else
  849. strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
  850. #endif
  851. bic r12,r6,r4
  852. bic r14,r7,r5
  853. eor r12,r12,r2
  854. eor r14,r14,r3
  855. #ifndef __thumb2__
  856. str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
  857. #endif
  858. bic r10,r8,r6,ror#1
  859. #ifndef __thumb2__
  860. str r14,[sp,#408+4]
  861. #else
  862. strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
  863. #endif
  864. bic r11,r9,r7,ror#1
  865. bic r12,r0,r8,ror#31-1
  866. bic r14,r1,r9,ror#31-1
  867. eor r4,r4,r10,ror#32-1
  868. #ifndef __thumb2__
  869. str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
  870. #endif
  871. eor r5,r5,r11,ror#32-1
  872. #ifndef __thumb2__
  873. str r5,[sp,#416+4]
  874. #else
  875. strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
  876. #endif
  877. eor r6,r6,r12,ror#32-31
  878. eor r7,r7,r14,ror#32-31
  879. #ifndef __thumb2__
  880. str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
  881. #endif
  882. bic r10,r2,r0,ror#32-31
  883. #ifndef __thumb2__
  884. str r7,[sp,#424+4]
  885. #else
  886. strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
  887. #endif
  888. bic r11,r3,r1,ror#32-31
  889. add r12,sp,#240
  890. eor r8,r10,r8,ror#32-1
  891. add r10,sp,#280
  892. eor r9,r11,r9,ror#32-1
  893. #ifndef __thumb2__
  894. str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
  895. #endif
  896. #ifndef __thumb2__
  897. str r9,[sp,#432+4]
  898. #else
  899. strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
  900. #endif
  901. ldmia r12,{r0-r3} @ A[0][0..1]
  902. ldmia r10,{r10-r12,r14} @ A[1][0..1]
  903. #ifdef __thumb2__
  904. eor r0,r0,r10
  905. eor r1,r1,r11
  906. eor r2,r2,r12
  907. ldrd r10,r11,[sp,#296]
  908. eor r3,r3,r14
  909. ldrd r12,r14,[sp,#304]
  910. eor r4,r4,r10
  911. eor r5,r5,r11
  912. eor r6,r6,r12
  913. ldrd r10,r11,[sp,#312]
  914. eor r7,r7,r14
  915. ldrd r12,r14,[sp,#320]
  916. eor r8,r8,r10
  917. eor r9,r9,r11
  918. eor r0,r0,r12
  919. ldrd r10,r11,[sp,#328]
  920. eor r1,r1,r14
  921. ldrd r12,r14,[sp,#336]
  922. eor r2,r2,r10
  923. eor r3,r3,r11
  924. eor r4,r4,r12
  925. ldrd r10,r11,[sp,#344]
  926. eor r5,r5,r14
  927. ldrd r12,r14,[sp,#352]
  928. eor r6,r6,r10
  929. eor r7,r7,r11
  930. eor r8,r8,r12
  931. ldrd r10,r11,[sp,#360]
  932. eor r9,r9,r14
  933. ldrd r12,r14,[sp,#368]
  934. eor r0,r0,r10
  935. eor r1,r1,r11
  936. eor r2,r2,r12
  937. ldrd r10,r11,[sp,#376]
  938. eor r3,r3,r14
  939. ldrd r12,r14,[sp,#384]
  940. eor r4,r4,r10
  941. eor r5,r5,r11
  942. eor r6,r6,r12
  943. ldrd r10,r11,[sp,#392]
  944. eor r7,r7,r14
  945. ldrd r12,r14,[sp,#400]
  946. eor r8,r8,r10
  947. eor r9,r9,r11
  948. eor r0,r0,r12
  949. ldrd r10,r11,[sp,#408]
  950. eor r1,r1,r14
  951. ldrd r12,r14,[sp,#256]
  952. eor r2,r2,r10
  953. eor r3,r3,r11
  954. eor r4,r4,r12
  955. ldrd r10,r11,[sp,#264]
  956. eor r5,r5,r14
  957. ldrd r12,r14,[sp,#272]
  958. #else
  959. eor r0,r0,r10
  960. add r10,sp,#296
  961. eor r1,r1,r11
  962. eor r2,r2,r12
  963. eor r3,r3,r14
  964. ldmia r10,{r10-r12,r14} @ A[1][2..3]
  965. eor r4,r4,r10
  966. add r10,sp,#312
  967. eor r5,r5,r11
  968. eor r6,r6,r12
  969. eor r7,r7,r14
  970. ldmia r10,{r10-r12,r14} @ A[1][4]..A[2][0]
  971. eor r8,r8,r10
  972. add r10,sp,#328
  973. eor r9,r9,r11
  974. eor r0,r0,r12
  975. eor r1,r1,r14
  976. ldmia r10,{r10-r12,r14} @ A[2][1..2]
  977. eor r2,r2,r10
  978. add r10,sp,#344
  979. eor r3,r3,r11
  980. eor r4,r4,r12
  981. eor r5,r5,r14
  982. ldmia r10,{r10-r12,r14} @ A[2][3..4]
  983. eor r6,r6,r10
  984. add r10,sp,#360
  985. eor r7,r7,r11
  986. eor r8,r8,r12
  987. eor r9,r9,r14
  988. ldmia r10,{r10-r12,r14} @ A[3][0..1]
  989. eor r0,r0,r10
  990. add r10,sp,#376
  991. eor r1,r1,r11
  992. eor r2,r2,r12
  993. eor r3,r3,r14
  994. ldmia r10,{r10-r12,r14} @ A[3][2..3]
  995. eor r4,r4,r10
  996. add r10,sp,#392
  997. eor r5,r5,r11
  998. eor r6,r6,r12
  999. eor r7,r7,r14
  1000. ldmia r10,{r10-r12,r14} @ A[3][4]..A[4][0]
  1001. eor r8,r8,r10
  1002. ldr r10,[sp,#408] @ A[4][1]
  1003. eor r9,r9,r11
  1004. ldr r11,[sp,#408+4]
  1005. eor r0,r0,r12
  1006. ldr r12,[sp,#256] @ A[0][2]
  1007. eor r1,r1,r14
  1008. ldr r14,[sp,#256+4]
  1009. eor r2,r2,r10
  1010. add r10,sp,#264
  1011. eor r3,r3,r11
  1012. eor r4,r4,r12
  1013. eor r5,r5,r14
  1014. ldmia r10,{r10-r12,r14} @ A[0][3..4]
  1015. #endif
  1016. eor r6,r6,r10
  1017. eor r7,r7,r11
  1018. eor r8,r8,r12
  1019. eor r9,r9,r14
  1020. eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
  1021. #ifndef __thumb2__
  1022. str r10,[sp,#208] @ D[1] = E[0]
  1023. #endif
  1024. eor r11,r1,r4
  1025. #ifndef __thumb2__
  1026. str r11,[sp,#208+4]
  1027. #else
  1028. strd r10,r11,[sp,#208] @ D[1] = E[0]
  1029. #endif
  1030. eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
  1031. eor r14,r7,r0
  1032. #ifndef __thumb2__
  1033. str r12,[sp,#232] @ D[4] = E[1]
  1034. #endif
  1035. eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
  1036. #ifndef __thumb2__
  1037. str r14,[sp,#232+4]
  1038. #else
  1039. strd r12,r14,[sp,#232] @ D[4] = E[1]
  1040. #endif
  1041. eor r1,r9,r2
  1042. #ifndef __thumb2__
  1043. str r0,[sp,#200] @ D[0] = C[0]
  1044. #endif
  1045. eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
  1046. #ifndef __thumb2__
  1047. ldr r7,[sp,#384]
  1048. #endif
  1049. eor r3,r3,r6
  1050. #ifndef __thumb2__
  1051. str r1,[sp,#200+4]
  1052. #else
  1053. strd r0,r1,[sp,#200] @ D[0] = C[0]
  1054. #endif
  1055. #ifndef __thumb2__
  1056. ldr r6,[sp,#384+4]
  1057. #else
  1058. ldrd r7,r6,[sp,#384]
  1059. #endif
  1060. #ifndef __thumb2__
  1061. str r2,[sp,#216] @ D[2] = C[1]
  1062. #endif
  1063. eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
  1064. #ifndef __thumb2__
  1065. str r3,[sp,#216+4]
  1066. #else
  1067. strd r2,r3,[sp,#216] @ D[2] = C[1]
  1068. #endif
  1069. eor r5,r5,r8
  1070. #ifndef __thumb2__
  1071. ldr r8,[sp,#432]
  1072. #endif
  1073. #ifndef __thumb2__
  1074. ldr r9,[sp,#432+4]
  1075. #else
  1076. ldrd r8,r9,[sp,#432]
  1077. #endif
  1078. #ifndef __thumb2__
  1079. str r4,[sp,#224] @ D[3] = C[2]
  1080. #endif
  1081. eor r7,r7,r4
  1082. #ifndef __thumb2__
  1083. str r5,[sp,#224+4]
  1084. #else
  1085. strd r4,r5,[sp,#224] @ D[3] = C[2]
  1086. #endif
  1087. eor r6,r6,r5
  1088. #ifndef __thumb2__
  1089. ldr r4,[sp,#240]
  1090. #endif
  1091. @ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
  1092. @ mov r6,r6,ror#32-11
  1093. #ifndef __thumb2__
  1094. ldr r5,[sp,#240+4]
  1095. #else
  1096. ldrd r4,r5,[sp,#240]
  1097. #endif
  1098. eor r8,r8,r12
  1099. eor r9,r9,r14
  1100. #ifndef __thumb2__
  1101. ldr r12,[sp,#336]
  1102. #endif
  1103. eor r0,r0,r4
  1104. #ifndef __thumb2__
  1105. ldr r14,[sp,#336+4]
  1106. #else
  1107. ldrd r12,r14,[sp,#336]
  1108. #endif
  1109. @ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
  1110. @ mov r9,r9,ror#32-7
  1111. eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0]; /* rotate by 0 */ /* D[0] */
  1112. eor r12,r12,r2
  1113. #ifndef __thumb2__
  1114. ldr r2,[sp,#288]
  1115. #endif
  1116. eor r14,r14,r3
  1117. #ifndef __thumb2__
  1118. ldr r3,[sp,#288+4]
  1119. #else
  1120. ldrd r2,r3,[sp,#288]
  1121. #endif
  1122. mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]); /* D[2] */
  1123. ldr r12,[sp,#444] @ load counter
  1124. eor r2,r2,r10
  1125. adr r10,iotas32
  1126. mov r4,r14,ror#32-22
  1127. add r14,r10,r12
  1128. eor r3,r3,r11
  1129. #ifndef __thumb2__
  1130. ldr r10,[r14,#8] @ iotas[i].lo
  1131. #endif
  1132. add r12,r12,#16
  1133. #ifndef __thumb2__
  1134. ldr r11,[r14,#12] @ iotas[i].hi
  1135. #else
  1136. ldrd r10,r11,[r14,#8] @ iotas[i].lo
  1137. #endif
  1138. cmp r12,#192
  1139. str r12,[sp,#444] @ store counter
  1140. bic r12,r4,r2,ror#32-22
  1141. bic r14,r5,r3,ror#32-22
  1142. mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]); /* D[1] */
  1143. mov r3,r3,ror#32-22
  1144. eor r12,r12,r0
  1145. eor r14,r14,r1
  1146. eor r10,r10,r12
  1147. eor r11,r11,r14
  1148. #ifndef __thumb2__
  1149. str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
  1150. #endif
  1151. bic r12,r6,r4,ror#11
  1152. #ifndef __thumb2__
  1153. str r11,[sp,#0+4]
  1154. #else
  1155. strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
  1156. #endif
  1157. bic r14,r7,r5,ror#10
  1158. bic r10,r8,r6,ror#32-(11-7)
  1159. bic r11,r9,r7,ror#32-(10-7)
  1160. eor r12,r2,r12,ror#32-11
  1161. #ifndef __thumb2__
  1162. str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
  1163. #endif
  1164. eor r14,r3,r14,ror#32-10
  1165. #ifndef __thumb2__
  1166. str r14,[sp,#8+4]
  1167. #else
  1168. strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
  1169. #endif
  1170. eor r10,r4,r10,ror#32-7
  1171. eor r11,r5,r11,ror#32-7
  1172. #ifndef __thumb2__
  1173. str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
  1174. #endif
  1175. bic r12,r0,r8,ror#32-7
  1176. #ifndef __thumb2__
  1177. str r11,[sp,#16+4]
  1178. #else
  1179. strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
  1180. #endif
  1181. bic r14,r1,r9,ror#32-7
  1182. eor r12,r12,r6,ror#32-11
  1183. #ifndef __thumb2__
  1184. str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
  1185. #endif
  1186. eor r14,r14,r7,ror#32-10
  1187. #ifndef __thumb2__
  1188. str r14,[sp,#24+4]
  1189. #else
  1190. strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
  1191. #endif
  1192. bic r10,r2,r0
  1193. add r14,sp,#224
  1194. #ifndef __thumb2__
  1195. ldr r0,[sp,#264] @ A[0][3]
  1196. #endif
  1197. bic r11,r3,r1
  1198. #ifndef __thumb2__
  1199. ldr r1,[sp,#264+4]
  1200. #else
  1201. ldrd r0,r1,[sp,#264] @ A[0][3]
  1202. #endif
  1203. eor r10,r10,r8,ror#32-7
  1204. eor r11,r11,r9,ror#32-7
  1205. #ifndef __thumb2__
  1206. str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
  1207. #endif
  1208. add r9,sp,#200
  1209. #ifndef __thumb2__
  1210. str r11,[sp,#32+4]
  1211. #else
  1212. strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
  1213. #endif
  1214. ldmia r14,{r10-r12,r14} @ D[3..4]
  1215. ldmia r9,{r6-r9} @ D[0..1]
  1216. #ifndef __thumb2__
  1217. ldr r2,[sp,#312] @ A[1][4]
  1218. #endif
  1219. eor r0,r0,r10
  1220. #ifndef __thumb2__
  1221. ldr r3,[sp,#312+4]
  1222. #else
  1223. ldrd r2,r3,[sp,#312] @ A[1][4]
  1224. #endif
  1225. eor r1,r1,r11
  1226. @ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
  1227. #ifndef __thumb2__
  1228. ldr r10,[sp,#368] @ A[3][1]
  1229. #endif
  1230. @ mov r1,r1,ror#32-14
  1231. #ifndef __thumb2__
  1232. ldr r11,[sp,#368+4]
  1233. #else
  1234. ldrd r10,r11,[sp,#368] @ A[3][1]
  1235. #endif
  1236. eor r2,r2,r12
  1237. #ifndef __thumb2__
  1238. ldr r4,[sp,#320] @ A[2][0]
  1239. #endif
  1240. eor r3,r3,r14
  1241. #ifndef __thumb2__
  1242. ldr r5,[sp,#320+4]
  1243. #else
  1244. ldrd r4,r5,[sp,#320] @ A[2][0]
  1245. #endif
  1246. @ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
  1247. @ mov r3,r3,ror#32-10
  1248. eor r6,r6,r4
  1249. #ifndef __thumb2__
  1250. ldr r12,[sp,#216] @ D[2]
  1251. #endif
  1252. eor r7,r7,r5
  1253. #ifndef __thumb2__
  1254. ldr r14,[sp,#216+4]
  1255. #else
  1256. ldrd r12,r14,[sp,#216] @ D[2]
  1257. #endif
  1258. mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
  1259. mov r4,r7,ror#32-2
  1260. eor r10,r10,r8
  1261. #ifndef __thumb2__
  1262. ldr r8,[sp,#416] @ A[4][2]
  1263. #endif
  1264. eor r11,r11,r9
  1265. #ifndef __thumb2__
  1266. ldr r9,[sp,#416+4]
  1267. #else
  1268. ldrd r8,r9,[sp,#416] @ A[4][2]
  1269. #endif
  1270. mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
  1271. mov r6,r11,ror#32-23
  1272. bic r10,r4,r2,ror#32-10
  1273. bic r11,r5,r3,ror#32-10
  1274. eor r12,r12,r8
  1275. eor r14,r14,r9
  1276. mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
  1277. mov r8,r14,ror#32-31
  1278. eor r10,r10,r0,ror#32-14
  1279. eor r11,r11,r1,ror#32-14
  1280. #ifndef __thumb2__
  1281. str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
  1282. #endif
  1283. bic r12,r6,r4
  1284. #ifndef __thumb2__
  1285. str r11,[sp,#40+4]
  1286. #else
  1287. strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
  1288. #endif
  1289. bic r14,r7,r5
  1290. eor r12,r12,r2,ror#32-10
  1291. #ifndef __thumb2__
  1292. str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
  1293. #endif
  1294. eor r14,r14,r3,ror#32-10
  1295. #ifndef __thumb2__
  1296. str r14,[sp,#48+4]
  1297. #else
  1298. strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
  1299. #endif
  1300. bic r10,r8,r6
  1301. bic r11,r9,r7
  1302. bic r12,r0,r8,ror#14
  1303. bic r14,r1,r9,ror#14
  1304. eor r10,r10,r4
  1305. eor r11,r11,r5
  1306. #ifndef __thumb2__
  1307. str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
  1308. #endif
  1309. bic r2,r2,r0,ror#32-(14-10)
  1310. #ifndef __thumb2__
  1311. str r11,[sp,#56+4]
  1312. #else
  1313. strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
  1314. #endif
  1315. eor r12,r6,r12,ror#32-14
  1316. bic r11,r3,r1,ror#32-(14-10)
  1317. #ifndef __thumb2__
  1318. str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
  1319. #endif
  1320. eor r14,r7,r14,ror#32-14
  1321. #ifndef __thumb2__
  1322. str r14,[sp,#64+4]
  1323. #else
  1324. strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
  1325. #endif
  1326. add r12,sp,#208
  1327. #ifndef __thumb2__
  1328. ldr r1,[sp,#248] @ A[0][1]
  1329. #endif
  1330. eor r10,r8,r2,ror#32-10
  1331. #ifndef __thumb2__
  1332. ldr r0,[sp,#248+4]
  1333. #else
  1334. ldrd r1,r0,[sp,#248] @ A[0][1]
  1335. #endif
  1336. eor r11,r9,r11,ror#32-10
  1337. #ifndef __thumb2__
  1338. str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
  1339. #endif
  1340. #ifndef __thumb2__
  1341. str r11,[sp,#72+4]
  1342. #else
  1343. strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
  1344. #endif
  1345. add r9,sp,#224
  1346. ldmia r12,{r10-r12,r14} @ D[1..2]
  1347. #ifndef __thumb2__
  1348. ldr r2,[sp,#296] @ A[1][2]
  1349. #endif
  1350. #ifndef __thumb2__
  1351. ldr r3,[sp,#296+4]
  1352. #else
  1353. ldrd r2,r3,[sp,#296] @ A[1][2]
  1354. #endif
  1355. ldmia r9,{r6-r9} @ D[3..4]
  1356. eor r1,r1,r10
  1357. #ifndef __thumb2__
  1358. ldr r4,[sp,#344] @ A[2][3]
  1359. #endif
  1360. eor r0,r0,r11
  1361. #ifndef __thumb2__
  1362. ldr r5,[sp,#344+4]
  1363. #else
  1364. ldrd r4,r5,[sp,#344] @ A[2][3]
  1365. #endif
  1366. mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
  1367. eor r2,r2,r12
  1368. #ifndef __thumb2__
  1369. ldr r10,[sp,#392] @ A[3][4]
  1370. #endif
  1371. eor r3,r3,r14
  1372. #ifndef __thumb2__
  1373. ldr r11,[sp,#392+4]
  1374. #else
  1375. ldrd r10,r11,[sp,#392] @ A[3][4]
  1376. #endif
  1377. @ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
  1378. #ifndef __thumb2__
  1379. ldr r12,[sp,#200] @ D[0]
  1380. #endif
  1381. @ mov r3,r3,ror#32-3
  1382. #ifndef __thumb2__
  1383. ldr r14,[sp,#200+4]
  1384. #else
  1385. ldrd r12,r14,[sp,#200] @ D[0]
  1386. #endif
  1387. eor r4,r4,r6
  1388. eor r5,r5,r7
  1389. @ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
  1390. @ mov r4,r7,ror#32-13 @ [track reverse order below]
  1391. eor r10,r10,r8
  1392. #ifndef __thumb2__
  1393. ldr r8,[sp,#400] @ A[4][0]
  1394. #endif
  1395. eor r11,r11,r9
  1396. #ifndef __thumb2__
  1397. ldr r9,[sp,#400+4]
  1398. #else
  1399. ldrd r8,r9,[sp,#400] @ A[4][0]
  1400. #endif
  1401. mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
  1402. mov r7,r11,ror#32-4
  1403. eor r12,r12,r8
  1404. eor r14,r14,r9
  1405. mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
  1406. mov r9,r14,ror#32-9
  1407. bic r10,r5,r2,ror#13-3
  1408. bic r11,r4,r3,ror#12-3
  1409. bic r12,r6,r5,ror#32-13
  1410. bic r14,r7,r4,ror#32-12
  1411. eor r10,r0,r10,ror#32-13
  1412. eor r11,r1,r11,ror#32-12
  1413. #ifndef __thumb2__
  1414. str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
  1415. #endif
  1416. eor r12,r12,r2,ror#32-3
  1417. #ifndef __thumb2__
  1418. str r11,[sp,#80+4]
  1419. #else
  1420. strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
  1421. #endif
  1422. eor r14,r14,r3,ror#32-3
  1423. #ifndef __thumb2__
  1424. str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
  1425. #endif
  1426. bic r10,r8,r6
  1427. bic r11,r9,r7
  1428. #ifndef __thumb2__
  1429. str r14,[sp,#88+4]
  1430. #else
  1431. strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
  1432. #endif
  1433. eor r10,r10,r5,ror#32-13
  1434. eor r11,r11,r4,ror#32-12
  1435. #ifndef __thumb2__
  1436. str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
  1437. #endif
  1438. bic r12,r0,r8
  1439. #ifndef __thumb2__
  1440. str r11,[sp,#96+4]
  1441. #else
  1442. strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
  1443. #endif
  1444. bic r14,r1,r9
  1445. eor r12,r12,r6
  1446. eor r14,r14,r7
  1447. #ifndef __thumb2__
  1448. str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
  1449. #endif
  1450. bic r10,r2,r0,ror#3
  1451. #ifndef __thumb2__
  1452. str r14,[sp,#104+4]
  1453. #else
  1454. strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
  1455. #endif
  1456. bic r11,r3,r1,ror#3
  1457. #ifndef __thumb2__
  1458. ldr r1,[sp,#272] @ A[0][4] [in reverse order]
  1459. #endif
  1460. eor r10,r8,r10,ror#32-3
  1461. #ifndef __thumb2__
  1462. ldr r0,[sp,#272+4]
  1463. #else
  1464. ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order]
  1465. #endif
  1466. eor r11,r9,r11,ror#32-3
  1467. #ifndef __thumb2__
  1468. str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
  1469. #endif
  1470. add r9,sp,#208
  1471. #ifndef __thumb2__
  1472. str r11,[sp,#112+4]
  1473. #else
  1474. strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
  1475. #endif
  1476. #ifndef __thumb2__
  1477. ldr r10,[sp,#232] @ D[4]
  1478. #endif
  1479. #ifndef __thumb2__
  1480. ldr r11,[sp,#232+4]
  1481. #else
  1482. ldrd r10,r11,[sp,#232] @ D[4]
  1483. #endif
  1484. #ifndef __thumb2__
  1485. ldr r12,[sp,#200] @ D[0]
  1486. #endif
  1487. #ifndef __thumb2__
  1488. ldr r14,[sp,#200+4]
  1489. #else
  1490. ldrd r12,r14,[sp,#200] @ D[0]
  1491. #endif
  1492. ldmia r9,{r6-r9} @ D[1..2]
  1493. eor r1,r1,r10
  1494. #ifndef __thumb2__
  1495. ldr r2,[sp,#280] @ A[1][0]
  1496. #endif
  1497. eor r0,r0,r11
  1498. #ifndef __thumb2__
  1499. ldr r3,[sp,#280+4]
  1500. #else
  1501. ldrd r2,r3,[sp,#280] @ A[1][0]
  1502. #endif
  1503. @ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
  1504. #ifndef __thumb2__
  1505. ldr r4,[sp,#328] @ A[2][1]
  1506. #endif
  1507. @ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
  1508. #ifndef __thumb2__
  1509. ldr r5,[sp,#328+4]
  1510. #else
  1511. ldrd r4,r5,[sp,#328] @ A[2][1]
  1512. #endif
  1513. eor r2,r2,r12
  1514. #ifndef __thumb2__
  1515. ldr r10,[sp,#376] @ A[3][2]
  1516. #endif
  1517. eor r3,r3,r14
  1518. #ifndef __thumb2__
  1519. ldr r11,[sp,#376+4]
  1520. #else
  1521. ldrd r10,r11,[sp,#376] @ A[3][2]
  1522. #endif
  1523. @ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
  1524. #ifndef __thumb2__
  1525. ldr r12,[sp,#224] @ D[3]
  1526. #endif
  1527. @ mov r3,r3,ror#32-18
  1528. #ifndef __thumb2__
  1529. ldr r14,[sp,#224+4]
  1530. #else
  1531. ldrd r12,r14,[sp,#224] @ D[3]
  1532. #endif
  1533. eor r6,r6,r4
  1534. eor r7,r7,r5
  1535. mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
  1536. mov r5,r7,ror#32-5
  1537. eor r10,r10,r8
  1538. #ifndef __thumb2__
  1539. ldr r8,[sp,#424] @ A[4][3]
  1540. #endif
  1541. eor r11,r11,r9
  1542. #ifndef __thumb2__
  1543. ldr r9,[sp,#424+4]
  1544. #else
  1545. ldrd r8,r9,[sp,#424] @ A[4][3]
  1546. #endif
  1547. mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
  1548. mov r6,r11,ror#32-8
  1549. eor r12,r12,r8
  1550. eor r14,r14,r9
  1551. mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
  1552. mov r9,r14,ror#32-28
  1553. bic r10,r4,r2,ror#32-18
  1554. bic r11,r5,r3,ror#32-18
  1555. eor r10,r10,r0,ror#32-14
  1556. eor r11,r11,r1,ror#32-13
  1557. #ifndef __thumb2__
  1558. str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
  1559. #endif
  1560. bic r12,r6,r4
  1561. #ifndef __thumb2__
  1562. str r11,[sp,#120+4]
  1563. #else
  1564. strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
  1565. #endif
  1566. bic r14,r7,r5
  1567. eor r12,r12,r2,ror#32-18
  1568. #ifndef __thumb2__
  1569. str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
  1570. #endif
  1571. eor r14,r14,r3,ror#32-18
  1572. #ifndef __thumb2__
  1573. str r14,[sp,#128+4]
  1574. #else
  1575. strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
  1576. #endif
  1577. bic r10,r8,r6
  1578. bic r11,r9,r7
  1579. bic r12,r0,r8,ror#14
  1580. bic r14,r1,r9,ror#13
  1581. eor r10,r10,r4
  1582. eor r11,r11,r5
  1583. #ifndef __thumb2__
  1584. str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
  1585. #endif
  1586. bic r2,r2,r0,ror#18-14
  1587. #ifndef __thumb2__
  1588. str r11,[sp,#136+4]
  1589. #else
  1590. strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
  1591. #endif
  1592. eor r12,r6,r12,ror#32-14
  1593. bic r11,r3,r1,ror#18-13
  1594. eor r14,r7,r14,ror#32-13
  1595. #ifndef __thumb2__
  1596. str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
  1597. #endif
  1598. #ifndef __thumb2__
  1599. str r14,[sp,#144+4]
  1600. #else
  1601. strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
  1602. #endif
  1603. add r14,sp,#216
  1604. #ifndef __thumb2__
  1605. ldr r0,[sp,#256] @ A[0][2]
  1606. #endif
  1607. eor r10,r8,r2,ror#32-18
  1608. #ifndef __thumb2__
  1609. ldr r1,[sp,#256+4]
  1610. #else
  1611. ldrd r0,r1,[sp,#256] @ A[0][2]
  1612. #endif
  1613. eor r11,r9,r11,ror#32-18
  1614. #ifndef __thumb2__
  1615. str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
  1616. #endif
  1617. #ifndef __thumb2__
  1618. str r11,[sp,#152+4]
  1619. #else
  1620. strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
  1621. #endif
  1622. ldmia r14,{r10-r12,r14} @ D[2..3]
  1623. #ifndef __thumb2__
  1624. ldr r2,[sp,#304] @ A[1][3]
  1625. #endif
  1626. #ifndef __thumb2__
  1627. ldr r3,[sp,#304+4]
  1628. #else
  1629. ldrd r2,r3,[sp,#304] @ A[1][3]
  1630. #endif
  1631. #ifndef __thumb2__
  1632. ldr r6,[sp,#232] @ D[4]
  1633. #endif
  1634. #ifndef __thumb2__
  1635. ldr r7,[sp,#232+4]
  1636. #else
  1637. ldrd r6,r7,[sp,#232] @ D[4]
  1638. #endif
  1639. eor r0,r0,r10
  1640. #ifndef __thumb2__
  1641. ldr r4,[sp,#352] @ A[2][4]
  1642. #endif
  1643. eor r1,r1,r11
  1644. #ifndef __thumb2__
  1645. ldr r5,[sp,#352+4]
  1646. #else
  1647. ldrd r4,r5,[sp,#352] @ A[2][4]
  1648. #endif
  1649. @ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
  1650. #ifndef __thumb2__
  1651. ldr r8,[sp,#200] @ D[0]
  1652. #endif
  1653. @ mov r1,r1,ror#32-31
  1654. #ifndef __thumb2__
  1655. ldr r9,[sp,#200+4]
  1656. #else
  1657. ldrd r8,r9,[sp,#200] @ D[0]
  1658. #endif
  1659. eor r12,r12,r2
  1660. #ifndef __thumb2__
  1661. ldr r10,[sp,#360] @ A[3][0]
  1662. #endif
  1663. eor r14,r14,r3
  1664. #ifndef __thumb2__
  1665. ldr r11,[sp,#360+4]
  1666. #else
  1667. ldrd r10,r11,[sp,#360] @ A[3][0]
  1668. #endif
  1669. mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
  1670. #ifndef __thumb2__
  1671. ldr r12,[sp,#208] @ D[1]
  1672. #endif
  1673. mov r2,r14,ror#32-28
  1674. #ifndef __thumb2__
  1675. ldr r14,[sp,#208+4]
  1676. #else
  1677. ldrd r12,r14,[sp,#208] @ D[1]
  1678. #endif
  1679. eor r6,r6,r4
  1680. eor r7,r7,r5
  1681. mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
  1682. mov r4,r7,ror#32-20
  1683. eor r10,r10,r8
  1684. #ifndef __thumb2__
  1685. ldr r8,[sp,#408] @ A[4][1]
  1686. #endif
  1687. eor r11,r11,r9
  1688. #ifndef __thumb2__
  1689. ldr r9,[sp,#408+4]
  1690. #else
  1691. ldrd r8,r9,[sp,#408] @ A[4][1]
  1692. #endif
  1693. mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
  1694. mov r6,r11,ror#32-21
  1695. eor r8,r8,r12
  1696. eor r9,r9,r14
  1697. @ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
  1698. @ mov r9,r3,ror#32-1
  1699. bic r10,r4,r2
  1700. bic r11,r5,r3
  1701. eor r10,r10,r0,ror#32-31
  1702. #ifndef __thumb2__
  1703. str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
  1704. #endif
  1705. eor r11,r11,r1,ror#32-31
  1706. #ifndef __thumb2__
  1707. str r11,[sp,#160+4]
  1708. #else
  1709. strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
  1710. #endif
  1711. bic r12,r6,r4
  1712. bic r14,r7,r5
  1713. eor r12,r12,r2
  1714. eor r14,r14,r3
  1715. #ifndef __thumb2__
  1716. str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
  1717. #endif
  1718. bic r10,r8,r6,ror#1
  1719. #ifndef __thumb2__
  1720. str r14,[sp,#168+4]
  1721. #else
  1722. strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
  1723. #endif
  1724. bic r11,r9,r7,ror#1
  1725. bic r12,r0,r8,ror#31-1
  1726. bic r14,r1,r9,ror#31-1
  1727. eor r4,r4,r10,ror#32-1
  1728. #ifndef __thumb2__
  1729. str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
  1730. #endif
  1731. eor r5,r5,r11,ror#32-1
  1732. #ifndef __thumb2__
  1733. str r5,[sp,#176+4]
  1734. #else
  1735. strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
  1736. #endif
  1737. eor r6,r6,r12,ror#32-31
  1738. eor r7,r7,r14,ror#32-31
  1739. #ifndef __thumb2__
  1740. str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
  1741. #endif
  1742. bic r10,r2,r0,ror#32-31
  1743. #ifndef __thumb2__
  1744. str r7,[sp,#184+4]
  1745. #else
  1746. strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
  1747. #endif
  1748. bic r11,r3,r1,ror#32-31
  1749. add r12,sp,#0
  1750. eor r8,r10,r8,ror#32-1
  1751. add r10,sp,#40
  1752. eor r9,r11,r9,ror#32-1
  1753. #ifndef __thumb2__
  1754. str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
  1755. #endif
  1756. #ifndef __thumb2__
  1757. str r9,[sp,#192+4]
  1758. #else
  1759. strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
  1760. #endif
  1761. blo .Lround2x
  1762. ldr pc,[sp,#440]
  1763. .size KeccakF1600_int,.-KeccakF1600_int
  1764. .type KeccakF1600, %function
  1765. .align 5
  1766. KeccakF1600:
  1767. stmdb sp!,{r0,r4-r11,lr}
  1768. sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],...
  1769. add r10,r0,#40
  1770. add r11,sp,#40
  1771. ldmia r0, {r0-r9} @ copy A[5][5] to stack
  1772. stmia sp, {r0-r9}
  1773. ldmia r10!,{r0-r9}
  1774. stmia r11!,{r0-r9}
  1775. ldmia r10!,{r0-r9}
  1776. stmia r11!,{r0-r9}
  1777. ldmia r10!,{r0-r9}
  1778. stmia r11!,{r0-r9}
  1779. ldmia r10, {r0-r9}
  1780. add r12,sp,#0
  1781. add r10,sp,#40
  1782. stmia r11, {r0-r9}
  1783. bl KeccakF1600_enter
  1784. ldr r11, [sp,#440+16] @ restore pointer to A
  1785. ldmia sp, {r0-r9}
  1786. stmia r11!,{r0-r9} @ return A[5][5]
  1787. ldmia r10!,{r0-r9}
  1788. stmia r11!,{r0-r9}
  1789. ldmia r10!,{r0-r9}
  1790. stmia r11!,{r0-r9}
  1791. ldmia r10!,{r0-r9}
  1792. stmia r11!,{r0-r9}
  1793. ldmia r10, {r0-r9}
  1794. stmia r11, {r0-r9}
  1795. add sp,sp,#440+20
  1796. ldmia sp!,{r4-r11,pc}
  1797. .size KeccakF1600,.-KeccakF1600
  1798. .global SHA3_absorb
  1799. .type SHA3_absorb,%function
  1800. .align 5
  1801. SHA3_absorb:
  1802. stmdb sp!,{r0-r12,lr}
  1803. sub sp,sp,#456+16
  1804. add r10,r0,#40
  1805. @ mov r11,r1
  1806. mov r12,r2
  1807. mov r14,r3
  1808. cmp r2,r3
  1809. blo .Labsorb_abort
  1810. add r11,sp,#0
  1811. ldmia r0, {r0-r9} @ copy A[5][5] to stack
  1812. stmia r11!, {r0-r9}
  1813. ldmia r10!,{r0-r9}
  1814. stmia r11!, {r0-r9}
  1815. ldmia r10!,{r0-r9}
  1816. stmia r11!, {r0-r9}
  1817. ldmia r10!,{r0-r9}
  1818. stmia r11!, {r0-r9}
  1819. ldmia r10!,{r0-r9}
  1820. stmia r11, {r0-r9}
  1821. ldr r11,[sp,#476] @ restore r11
  1822. #ifdef __thumb2__
  1823. mov r9,#0x00ff00ff
  1824. mov r8,#0x0f0f0f0f
  1825. mov r7,#0x33333333
  1826. mov r6,#0x55555555
  1827. #else
  1828. mov r6,#0x11 @ compose constants
  1829. mov r8,#0x0f
  1830. mov r9,#0xff
  1831. orr r6,r6,r6,lsl#8
  1832. orr r8,r8,r8,lsl#8
  1833. orr r6,r6,r6,lsl#16 @ 0x11111111
  1834. orr r9,r9,r9,lsl#16 @ 0x00ff00ff
  1835. orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
  1836. orr r7,r6,r6,lsl#1 @ 0x33333333
  1837. orr r6,r6,r6,lsl#2 @ 0x55555555
  1838. #endif
  1839. str r9,[sp,#468]
  1840. str r8,[sp,#464]
  1841. str r7,[sp,#460]
  1842. str r6,[sp,#456]
  1843. b .Loop_absorb
  1844. .align 4
  1845. .Loop_absorb:
  1846. subs r0,r12,r14
  1847. blo .Labsorbed
  1848. add r10,sp,#0
  1849. str r0,[sp,#480] @ save len - bsz
  1850. .align 4
  1851. .Loop_block:
  1852. ldrb r0,[r11],#1
  1853. ldrb r1,[r11],#1
  1854. ldrb r2,[r11],#1
  1855. ldrb r3,[r11],#1
  1856. ldrb r4,[r11],#1
  1857. orr r0,r0,r1,lsl#8
  1858. ldrb r1,[r11],#1
  1859. orr r0,r0,r2,lsl#16
  1860. ldrb r2,[r11],#1
  1861. orr r0,r0,r3,lsl#24 @ lo
  1862. ldrb r3,[r11],#1
  1863. orr r1,r4,r1,lsl#8
  1864. orr r1,r1,r2,lsl#16
  1865. orr r1,r1,r3,lsl#24 @ hi
  1866. and r2,r0,r6 @ &=0x55555555
  1867. and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa
  1868. and r3,r1,r6 @ &=0x55555555
  1869. and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
  1870. orr r2,r2,r2,lsr#1
  1871. orr r0,r0,r0,lsl#1
  1872. orr r3,r3,r3,lsr#1
  1873. orr r1,r1,r1,lsl#1
  1874. and r2,r2,r7 @ &=0x33333333
  1875. and r0,r0,r7,lsl#2 @ &=0xcccccccc
  1876. and r3,r3,r7 @ &=0x33333333
  1877. and r1,r1,r7,lsl#2 @ &=0xcccccccc
  1878. orr r2,r2,r2,lsr#2
  1879. orr r0,r0,r0,lsl#2
  1880. orr r3,r3,r3,lsr#2
  1881. orr r1,r1,r1,lsl#2
  1882. and r2,r2,r8 @ &=0x0f0f0f0f
  1883. and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0
  1884. and r3,r3,r8 @ &=0x0f0f0f0f
  1885. and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
  1886. ldmia r10,{r4-r5} @ A_flat[i]
  1887. orr r2,r2,r2,lsr#4
  1888. orr r0,r0,r0,lsl#4
  1889. orr r3,r3,r3,lsr#4
  1890. orr r1,r1,r1,lsl#4
  1891. and r2,r2,r9 @ &=0x00ff00ff
  1892. and r0,r0,r9,lsl#8 @ &=0xff00ff00
  1893. and r3,r3,r9 @ &=0x00ff00ff
  1894. and r1,r1,r9,lsl#8 @ &=0xff00ff00
  1895. orr r2,r2,r2,lsr#8
  1896. orr r0,r0,r0,lsl#8
  1897. orr r3,r3,r3,lsr#8
  1898. orr r1,r1,r1,lsl#8
  1899. mov r2,r2,lsl#16
  1900. mov r1,r1,lsr#16
  1901. eor r4,r4,r3,lsl#16
  1902. eor r5,r5,r0,lsr#16
  1903. eor r4,r4,r2,lsr#16
  1904. eor r5,r5,r1,lsl#16
  1905. stmia r10!,{r4-r5} @ A_flat[i++] ^= BitInterleave(inp[0..7])
  1906. subs r14,r14,#8
  1907. bhi .Loop_block
  1908. str r11,[sp,#476]
  1909. bl KeccakF1600_int
  1910. add r14,sp,#456
  1911. ldmia r14,{r6-r12,r14} @ restore constants and variables
  1912. b .Loop_absorb
  1913. .align 4
  1914. .Labsorbed:
  1915. add r11,sp,#40
  1916. ldmia sp, {r0-r9}
  1917. stmia r10!,{r0-r9} @ return A[5][5]
  1918. ldmia r11!, {r0-r9}
  1919. stmia r10!,{r0-r9}
  1920. ldmia r11!, {r0-r9}
  1921. stmia r10!,{r0-r9}
  1922. ldmia r11!, {r0-r9}
  1923. stmia r10!,{r0-r9}
  1924. ldmia r11, {r0-r9}
  1925. stmia r10, {r0-r9}
  1926. .Labsorb_abort:
  1927. add sp,sp,#456+32
  1928. mov r0,r12 @ return value
  1929. ldmia sp!,{r4-r12,pc}
  1930. .size SHA3_absorb,.-SHA3_absorb
  1931. .global SHA3_squeeze
  1932. .type SHA3_squeeze,%function
  1933. .align 5
  1934. SHA3_squeeze:
  1935. stmdb sp!,{r0,r3-r10,lr}
  1936. mov r10,r0
  1937. mov r4,r1
  1938. mov r5,r2
  1939. mov r12,r3
  1940. #ifdef __thumb2__
  1941. mov r9,#0x00ff00ff
  1942. mov r8,#0x0f0f0f0f
  1943. mov r7,#0x33333333
  1944. mov r6,#0x55555555
  1945. #else
  1946. mov r6,#0x11 @ compose constants
  1947. mov r8,#0x0f
  1948. mov r9,#0xff
  1949. orr r6,r6,r6,lsl#8
  1950. orr r8,r8,r8,lsl#8
  1951. orr r6,r6,r6,lsl#16 @ 0x11111111
  1952. orr r9,r9,r9,lsl#16 @ 0x00ff00ff
  1953. orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
  1954. orr r7,r6,r6,lsl#1 @ 0x33333333
  1955. orr r6,r6,r6,lsl#2 @ 0x55555555
  1956. #endif
  1957. stmdb sp!,{r6-r9}
  1958. mov r14,r10
  1959. b .Loop_squeeze
  1960. .align 4
  1961. .Loop_squeeze:
  1962. ldmia r10!,{r0,r1} @ A_flat[i++]
  1963. mov r2,r0,lsl#16
  1964. mov r3,r1,lsl#16 @ r3 = r1 << 16
  1965. mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff
  1966. mov r1,r1,lsr#16
  1967. mov r0,r0,lsr#16 @ r0 = r0 >> 16
  1968. mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000
  1969. orr r2,r2,r2,lsl#8
  1970. orr r3,r3,r3,lsr#8
  1971. orr r0,r0,r0,lsl#8
  1972. orr r1,r1,r1,lsr#8
  1973. and r2,r2,r9 @ &=0x00ff00ff
  1974. and r3,r3,r9,lsl#8 @ &=0xff00ff00
  1975. and r0,r0,r9 @ &=0x00ff00ff
  1976. and r1,r1,r9,lsl#8 @ &=0xff00ff00
  1977. orr r2,r2,r2,lsl#4
  1978. orr r3,r3,r3,lsr#4
  1979. orr r0,r0,r0,lsl#4
  1980. orr r1,r1,r1,lsr#4
  1981. and r2,r2,r8 @ &=0x0f0f0f0f
  1982. and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0
  1983. and r0,r0,r8 @ &=0x0f0f0f0f
  1984. and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
  1985. orr r2,r2,r2,lsl#2
  1986. orr r3,r3,r3,lsr#2
  1987. orr r0,r0,r0,lsl#2
  1988. orr r1,r1,r1,lsr#2
  1989. and r2,r2,r7 @ &=0x33333333
  1990. and r3,r3,r7,lsl#2 @ &=0xcccccccc
  1991. and r0,r0,r7 @ &=0x33333333
  1992. and r1,r1,r7,lsl#2 @ &=0xcccccccc
  1993. orr r2,r2,r2,lsl#1
  1994. orr r3,r3,r3,lsr#1
  1995. orr r0,r0,r0,lsl#1
  1996. orr r1,r1,r1,lsr#1
  1997. and r2,r2,r6 @ &=0x55555555
  1998. and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa
  1999. and r0,r0,r6 @ &=0x55555555
  2000. and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
  2001. orr r2,r2,r3
  2002. orr r0,r0,r1
  2003. cmp r5,#8
  2004. blo .Lsqueeze_tail
  2005. mov r1,r2,lsr#8
  2006. strb r2,[r4],#1
  2007. mov r3,r2,lsr#16
  2008. strb r1,[r4],#1
  2009. mov r2,r2,lsr#24
  2010. strb r3,[r4],#1
  2011. strb r2,[r4],#1
  2012. mov r1,r0,lsr#8
  2013. strb r0,[r4],#1
  2014. mov r3,r0,lsr#16
  2015. strb r1,[r4],#1
  2016. mov r0,r0,lsr#24
  2017. strb r3,[r4],#1
  2018. strb r0,[r4],#1
  2019. subs r5,r5,#8
  2020. beq .Lsqueeze_done
  2021. subs r12,r12,#8 @ bsz -= 8
  2022. bhi .Loop_squeeze
  2023. mov r0,r14 @ original r10
  2024. bl KeccakF1600
  2025. ldmia sp,{r6-r10,r12} @ restore constants and variables
  2026. mov r14,r10
  2027. b .Loop_squeeze
  2028. .align 4
  2029. .Lsqueeze_tail:
  2030. strb r2,[r4],#1
  2031. mov r2,r2,lsr#8
  2032. subs r5,r5,#1
  2033. beq .Lsqueeze_done
  2034. strb r2,[r4],#1
  2035. mov r2,r2,lsr#8
  2036. subs r5,r5,#1
  2037. beq .Lsqueeze_done
  2038. strb r2,[r4],#1
  2039. mov r2,r2,lsr#8
  2040. subs r5,r5,#1
  2041. beq .Lsqueeze_done
  2042. strb r2,[r4],#1
  2043. subs r5,r5,#1
  2044. beq .Lsqueeze_done
  2045. strb r0,[r4],#1
  2046. mov r0,r0,lsr#8
  2047. subs r5,r5,#1
  2048. beq .Lsqueeze_done
  2049. strb r0,[r4],#1
  2050. mov r0,r0,lsr#8
  2051. subs r5,r5,#1
  2052. beq .Lsqueeze_done
  2053. strb r0,[r4]
  2054. b .Lsqueeze_done
  2055. .align 4
  2056. .Lsqueeze_done:
  2057. add sp,sp,#24
  2058. ldmia sp!,{r4-r10,pc}
  2059. .size SHA3_squeeze,.-SHA3_squeeze
  2060. #if __ARM_MAX_ARCH__>=7
  2061. .fpu neon
  2062. .type iotas64, %object
  2063. .align 5
  2064. iotas64:
  2065. .quad 0x0000000000000001
  2066. .quad 0x0000000000008082
  2067. .quad 0x800000000000808a
  2068. .quad 0x8000000080008000
  2069. .quad 0x000000000000808b
  2070. .quad 0x0000000080000001
  2071. .quad 0x8000000080008081
  2072. .quad 0x8000000000008009
  2073. .quad 0x000000000000008a
  2074. .quad 0x0000000000000088
  2075. .quad 0x0000000080008009
  2076. .quad 0x000000008000000a
  2077. .quad 0x000000008000808b
  2078. .quad 0x800000000000008b
  2079. .quad 0x8000000000008089
  2080. .quad 0x8000000000008003
  2081. .quad 0x8000000000008002
  2082. .quad 0x8000000000000080
  2083. .quad 0x000000000000800a
  2084. .quad 0x800000008000000a
  2085. .quad 0x8000000080008081
  2086. .quad 0x8000000000008080
  2087. .quad 0x0000000080000001
  2088. .quad 0x8000000080008008
  2089. .size iotas64,.-iotas64
  2090. .type KeccakF1600_neon, %function
  2091. .align 5
  2092. KeccakF1600_neon:
  2093. add r1, r0, #16
  2094. adr r2, iotas64
  2095. mov r3, #24 @ loop counter
  2096. b .Loop_neon
  2097. .align 4
  2098. .Loop_neon:
  2099. @ Theta
  2100. vst1.64 {q4}, [r0,:64] @ offload A[0..1][4]
  2101. veor q13, q0, q5 @ A[0..1][0]^A[2..3][0]
  2102. vst1.64 {d18}, [r1,:64] @ offload A[2][4]
  2103. veor q14, q1, q6 @ A[0..1][1]^A[2..3][1]
  2104. veor q15, q2, q7 @ A[0..1][2]^A[2..3][2]
  2105. veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
  2106. veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
  2107. veor q14, q3, q8 @ A[0..1][3]^A[2..3][3]
  2108. veor q4, q4, q9 @ A[0..1][4]^A[2..3][4]
  2109. veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
  2110. veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
  2111. veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
  2112. veor q13, q13, q10 @ C[0..1]^=A[4][0..1]
  2113. veor q14, q15, q11 @ C[2..3]^=A[4][2..3]
  2114. veor d25, d25, d24 @ C[4]^=A[4][4]
  2115. vadd.u64 q4, q13, q13 @ C[0..1]<<1
  2116. vadd.u64 q15, q14, q14 @ C[2..3]<<1
  2117. vadd.u64 d18, d25, d25 @ C[4]<<1
  2118. vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1)
  2119. vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1)
  2120. vsri.u64 d18, d25, #63 @ ROL64(C[4],1)
  2121. veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1)
  2122. veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
  2123. veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1)
  2124. veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1)
  2125. veor d0, d0, d25 @ A[0][0] ^= C[4]
  2126. veor d1, d1, d25 @ A[1][0] ^= C[4]
  2127. veor d10, d10, d25 @ A[2][0] ^= C[4]
  2128. veor d11, d11, d25 @ A[3][0] ^= C[4]
  2129. veor d20, d20, d25 @ A[4][0] ^= C[4]
  2130. veor d2, d2, d26 @ A[0][1] ^= D[1]
  2131. veor d3, d3, d26 @ A[1][1] ^= D[1]
  2132. veor d12, d12, d26 @ A[2][1] ^= D[1]
  2133. veor d13, d13, d26 @ A[3][1] ^= D[1]
  2134. veor d21, d21, d26 @ A[4][1] ^= D[1]
  2135. vmov d26, d27
  2136. veor d6, d6, d28 @ A[0][3] ^= C[2]
  2137. veor d7, d7, d28 @ A[1][3] ^= C[2]
  2138. veor d16, d16, d28 @ A[2][3] ^= C[2]
  2139. veor d17, d17, d28 @ A[3][3] ^= C[2]
  2140. veor d23, d23, d28 @ A[4][3] ^= C[2]
  2141. vld1.64 {q4}, [r0,:64] @ restore A[0..1][4]
  2142. vmov d28, d29
  2143. vld1.64 {d18}, [r1,:64] @ restore A[2][4]
  2144. veor q2, q2, q13 @ A[0..1][2] ^= D[2]
  2145. veor q7, q7, q13 @ A[2..3][2] ^= D[2]
  2146. veor d22, d22, d27 @ A[4][2] ^= D[2]
  2147. veor q4, q4, q14 @ A[0..1][4] ^= C[3]
  2148. veor q9, q9, q14 @ A[2..3][4] ^= C[3]
  2149. veor d24, d24, d29 @ A[4][4] ^= C[3]
  2150. @ Rho + Pi
  2151. vmov d26, d2 @ C[1] = A[0][1]
  2152. vshl.u64 d2, d3, #44
  2153. vmov d27, d4 @ C[2] = A[0][2]
  2154. vshl.u64 d4, d14, #43
  2155. vmov d28, d6 @ C[3] = A[0][3]
  2156. vshl.u64 d6, d17, #21
  2157. vmov d29, d8 @ C[4] = A[0][4]
  2158. vshl.u64 d8, d24, #14
  2159. vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1])
  2160. vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2])
  2161. vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3])
  2162. vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4])
  2163. vshl.u64 d3, d9, #20
  2164. vshl.u64 d14, d16, #25
  2165. vshl.u64 d17, d15, #15
  2166. vshl.u64 d24, d21, #2
  2167. vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4])
  2168. vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3])
  2169. vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2])
  2170. vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1])
  2171. vshl.u64 d9, d22, #61
  2172. @ vshl.u64 d16, d19, #8
  2173. vshl.u64 d15, d12, #10
  2174. vshl.u64 d21, d7, #55
  2175. vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2])
  2176. vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4])
  2177. vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1])
  2178. vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3])
  2179. vshl.u64 d22, d18, #39
  2180. @ vshl.u64 d19, d23, #56
  2181. vshl.u64 d12, d5, #6
  2182. vshl.u64 d7, d13, #45
  2183. vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4])
  2184. vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3])
  2185. vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2])
  2186. vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1])
  2187. vshl.u64 d18, d20, #18
  2188. vshl.u64 d23, d11, #41
  2189. vshl.u64 d5, d10, #3
  2190. vshl.u64 d13, d1, #36
  2191. vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0])
  2192. vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0])
  2193. vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0])
  2194. vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0])
  2195. vshl.u64 d1, d28, #28
  2196. vshl.u64 d10, d26, #1
  2197. vshl.u64 d11, d29, #27
  2198. vshl.u64 d20, d27, #62
  2199. vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3])
  2200. vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1])
  2201. vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4])
  2202. vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2])
  2203. @ Chi + Iota
  2204. vbic q13, q2, q1
  2205. vbic q14, q3, q2
  2206. vbic q15, q4, q3
  2207. veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
  2208. veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
  2209. veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
  2210. vst1.64 {q13}, [r0,:64] @ offload A[0..1][0]
  2211. vbic q13, q0, q4
  2212. vbic q15, q1, q0
  2213. vmov q1, q14 @ A[0..1][1]
  2214. veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
  2215. veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
  2216. vbic q13, q7, q6
  2217. vmov q0, q5 @ A[2..3][0]
  2218. vbic q14, q8, q7
  2219. vmov q15, q6 @ A[2..3][1]
  2220. veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
  2221. vbic q13, q9, q8
  2222. veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
  2223. vbic q14, q0, q9
  2224. veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
  2225. vbic q13, q15, q0
  2226. veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
  2227. vmov q14, q10 @ A[4][0..1]
  2228. veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
  2229. vld1.64 d25, [r2,:64]! @ Iota[i++]
  2230. vbic d26, d22, d21
  2231. vbic d27, d23, d22
  2232. vld1.64 {q0}, [r0,:64] @ restore A[0..1][0]
  2233. veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2])
  2234. vbic d26, d24, d23
  2235. veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3])
  2236. vbic d27, d28, d24
  2237. veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4])
  2238. vbic d26, d29, d28
  2239. veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0])
  2240. veor d0, d0, d25 @ A[0][0] ^= Iota[i]
  2241. veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1])
  2242. subs r3, r3, #1
  2243. bne .Loop_neon
  2244. .word 0xe12fff1e
  2245. .size KeccakF1600_neon,.-KeccakF1600_neon
  2246. .global SHA3_absorb_neon
  2247. .type SHA3_absorb_neon, %function
  2248. .align 5
  2249. SHA3_absorb_neon:
  2250. stmdb sp!, {r4-r6,lr}
  2251. vstmdb sp!, {d8-d15}
  2252. mov r4, r1 @ inp
  2253. mov r5, r2 @ len
  2254. mov r6, r3 @ bsz
  2255. vld1.32 {d0}, [r0,:64]! @ A[0][0]
  2256. vld1.32 {d2}, [r0,:64]! @ A[0][1]
  2257. vld1.32 {d4}, [r0,:64]! @ A[0][2]
  2258. vld1.32 {d6}, [r0,:64]! @ A[0][3]
  2259. vld1.32 {d8}, [r0,:64]! @ A[0][4]
  2260. vld1.32 {d1}, [r0,:64]! @ A[1][0]
  2261. vld1.32 {d3}, [r0,:64]! @ A[1][1]
  2262. vld1.32 {d5}, [r0,:64]! @ A[1][2]
  2263. vld1.32 {d7}, [r0,:64]! @ A[1][3]
  2264. vld1.32 {d9}, [r0,:64]! @ A[1][4]
  2265. vld1.32 {d10}, [r0,:64]! @ A[2][0]
  2266. vld1.32 {d12}, [r0,:64]! @ A[2][1]
  2267. vld1.32 {d14}, [r0,:64]! @ A[2][2]
  2268. vld1.32 {d16}, [r0,:64]! @ A[2][3]
  2269. vld1.32 {d18}, [r0,:64]! @ A[2][4]
  2270. vld1.32 {d11}, [r0,:64]! @ A[3][0]
  2271. vld1.32 {d13}, [r0,:64]! @ A[3][1]
  2272. vld1.32 {d15}, [r0,:64]! @ A[3][2]
  2273. vld1.32 {d17}, [r0,:64]! @ A[3][3]
  2274. vld1.32 {d19}, [r0,:64]! @ A[3][4]
  2275. vld1.32 {d20-d23}, [r0,:64]! @ A[4][0..3]
  2276. vld1.32 {d24}, [r0,:64] @ A[4][4]
  2277. sub r0, r0, #24*8 @ rewind
  2278. b .Loop_absorb_neon
  2279. .align 4
  2280. .Loop_absorb_neon:
  2281. subs r12, r5, r6 @ len - bsz
  2282. blo .Labsorbed_neon
  2283. mov r5, r12
  2284. vld1.8 {d31}, [r4]! @ endian-neutral loads...
  2285. cmp r6, #8*2
  2286. veor d0, d0, d31 @ A[0][0] ^= *inp++
  2287. blo .Lprocess_neon
  2288. vld1.8 {d31}, [r4]!
  2289. veor d2, d2, d31 @ A[0][1] ^= *inp++
  2290. beq .Lprocess_neon
  2291. vld1.8 {d31}, [r4]!
  2292. cmp r6, #8*4
  2293. veor d4, d4, d31 @ A[0][2] ^= *inp++
  2294. blo .Lprocess_neon
  2295. vld1.8 {d31}, [r4]!
  2296. veor d6, d6, d31 @ A[0][3] ^= *inp++
  2297. beq .Lprocess_neon
  2298. vld1.8 {d31},[r4]!
  2299. cmp r6, #8*6
  2300. veor d8, d8, d31 @ A[0][4] ^= *inp++
  2301. blo .Lprocess_neon
  2302. vld1.8 {d31}, [r4]!
  2303. veor d1, d1, d31 @ A[1][0] ^= *inp++
  2304. beq .Lprocess_neon
  2305. vld1.8 {d31}, [r4]!
  2306. cmp r6, #8*8
  2307. veor d3, d3, d31 @ A[1][1] ^= *inp++
  2308. blo .Lprocess_neon
  2309. vld1.8 {d31}, [r4]!
  2310. veor d5, d5, d31 @ A[1][2] ^= *inp++
  2311. beq .Lprocess_neon
  2312. vld1.8 {d31}, [r4]!
  2313. cmp r6, #8*10
  2314. veor d7, d7, d31 @ A[1][3] ^= *inp++
  2315. blo .Lprocess_neon
  2316. vld1.8 {d31}, [r4]!
  2317. veor d9, d9, d31 @ A[1][4] ^= *inp++
  2318. beq .Lprocess_neon
  2319. vld1.8 {d31}, [r4]!
  2320. cmp r6, #8*12
  2321. veor d10, d10, d31 @ A[2][0] ^= *inp++
  2322. blo .Lprocess_neon
  2323. vld1.8 {d31}, [r4]!
  2324. veor d12, d12, d31 @ A[2][1] ^= *inp++
  2325. beq .Lprocess_neon
  2326. vld1.8 {d31}, [r4]!
  2327. cmp r6, #8*14
  2328. veor d14, d14, d31 @ A[2][2] ^= *inp++
  2329. blo .Lprocess_neon
  2330. vld1.8 {d31}, [r4]!
  2331. veor d16, d16, d31 @ A[2][3] ^= *inp++
  2332. beq .Lprocess_neon
  2333. vld1.8 {d31}, [r4]!
  2334. cmp r6, #8*16
  2335. veor d18, d18, d31 @ A[2][4] ^= *inp++
  2336. blo .Lprocess_neon
  2337. vld1.8 {d31}, [r4]!
  2338. veor d11, d11, d31 @ A[3][0] ^= *inp++
  2339. beq .Lprocess_neon
  2340. vld1.8 {d31}, [r4]!
  2341. cmp r6, #8*18
  2342. veor d13, d13, d31 @ A[3][1] ^= *inp++
  2343. blo .Lprocess_neon
  2344. vld1.8 {d31}, [r4]!
  2345. veor d15, d15, d31 @ A[3][2] ^= *inp++
  2346. beq .Lprocess_neon
  2347. vld1.8 {d31}, [r4]!
  2348. cmp r6, #8*20
  2349. veor d17, d17, d31 @ A[3][3] ^= *inp++
  2350. blo .Lprocess_neon
  2351. vld1.8 {d31}, [r4]!
  2352. veor d19, d19, d31 @ A[3][4] ^= *inp++
  2353. beq .Lprocess_neon
  2354. vld1.8 {d31}, [r4]!
  2355. cmp r6, #8*22
  2356. veor d20, d20, d31 @ A[4][0] ^= *inp++
  2357. blo .Lprocess_neon
  2358. vld1.8 {d31}, [r4]!
  2359. veor d21, d21, d31 @ A[4][1] ^= *inp++
  2360. beq .Lprocess_neon
  2361. vld1.8 {d31}, [r4]!
  2362. cmp r6, #8*24
  2363. veor d22, d22, d31 @ A[4][2] ^= *inp++
  2364. blo .Lprocess_neon
  2365. vld1.8 {d31}, [r4]!
  2366. veor d23, d23, d31 @ A[4][3] ^= *inp++
  2367. beq .Lprocess_neon
  2368. vld1.8 {d31}, [r4]!
  2369. veor d24, d24, d31 @ A[4][4] ^= *inp++
  2370. .Lprocess_neon:
  2371. bl KeccakF1600_neon
  2372. b .Loop_absorb_neon
  2373. .align 4
  2374. .Labsorbed_neon:
  2375. vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
  2376. vst1.32 {d2}, [r0,:64]!
  2377. vst1.32 {d4}, [r0,:64]!
  2378. vst1.32 {d6}, [r0,:64]!
  2379. vst1.32 {d8}, [r0,:64]!
  2380. vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
  2381. vst1.32 {d3}, [r0,:64]!
  2382. vst1.32 {d5}, [r0,:64]!
  2383. vst1.32 {d7}, [r0,:64]!
  2384. vst1.32 {d9}, [r0,:64]!
  2385. vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
  2386. vst1.32 {d12}, [r0,:64]!
  2387. vst1.32 {d14}, [r0,:64]!
  2388. vst1.32 {d16}, [r0,:64]!
  2389. vst1.32 {d18}, [r0,:64]!
  2390. vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
  2391. vst1.32 {d13}, [r0,:64]!
  2392. vst1.32 {d15}, [r0,:64]!
  2393. vst1.32 {d17}, [r0,:64]!
  2394. vst1.32 {d19}, [r0,:64]!
  2395. vst1.32 {d20-d23}, [r0,:64]! @ A[4][0..4]
  2396. vst1.32 {d24}, [r0,:64]
  2397. mov r0, r5 @ return value
  2398. vldmia sp!, {d8-d15}
  2399. ldmia sp!, {r4-r6,pc}
  2400. .size SHA3_absorb_neon,.-SHA3_absorb_neon
  2401. .global SHA3_squeeze_neon
  2402. .type SHA3_squeeze_neon, %function
  2403. .align 5
  2404. SHA3_squeeze_neon:
  2405. stmdb sp!, {r4-r6,lr}
  2406. mov r4, r1 @ out
  2407. mov r5, r2 @ len
  2408. mov r6, r3 @ bsz
  2409. mov r12, r0 @ A_flat
  2410. mov r14, r3 @ bsz
  2411. b .Loop_squeeze_neon
  2412. .align 4
  2413. .Loop_squeeze_neon:
  2414. cmp r5, #8
  2415. blo .Lsqueeze_neon_tail
  2416. vld1.32 {d0}, [r12]!
  2417. vst1.8 {d0}, [r4]! @ endian-neutral store
  2418. subs r5, r5, #8 @ len -= 8
  2419. beq .Lsqueeze_neon_done
  2420. subs r14, r14, #8 @ bsz -= 8
  2421. bhi .Loop_squeeze_neon
  2422. vstmdb sp!, {d8-d15}
  2423. vld1.32 {d0}, [r0,:64]! @ A[0][0..4]
  2424. vld1.32 {d2}, [r0,:64]!
  2425. vld1.32 {d4}, [r0,:64]!
  2426. vld1.32 {d6}, [r0,:64]!
  2427. vld1.32 {d8}, [r0,:64]!
  2428. vld1.32 {d1}, [r0,:64]! @ A[1][0..4]
  2429. vld1.32 {d3}, [r0,:64]!
  2430. vld1.32 {d5}, [r0,:64]!
  2431. vld1.32 {d7}, [r0,:64]!
  2432. vld1.32 {d9}, [r0,:64]!
  2433. vld1.32 {d10}, [r0,:64]! @ A[2][0..4]
  2434. vld1.32 {d12}, [r0,:64]!
  2435. vld1.32 {d14}, [r0,:64]!
  2436. vld1.32 {d16}, [r0,:64]!
  2437. vld1.32 {d18}, [r0,:64]!
  2438. vld1.32 {d11}, [r0,:64]! @ A[3][0..4]
  2439. vld1.32 {d13}, [r0,:64]!
  2440. vld1.32 {d15}, [r0,:64]!
  2441. vld1.32 {d17}, [r0,:64]!
  2442. vld1.32 {d19}, [r0,:64]!
  2443. vld1.32 {d20-d23}, [r0,:64]! @ A[4][0..4]
  2444. vld1.32 {d24}, [r0,:64]
  2445. sub r0, r0, #24*8 @ rewind
  2446. bl KeccakF1600_neon
  2447. mov r12, r0 @ A_flat
  2448. vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
  2449. vst1.32 {d2}, [r0,:64]!
  2450. vst1.32 {d4}, [r0,:64]!
  2451. vst1.32 {d6}, [r0,:64]!
  2452. vst1.32 {d8}, [r0,:64]!
  2453. vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
  2454. vst1.32 {d3}, [r0,:64]!
  2455. vst1.32 {d5}, [r0,:64]!
  2456. vst1.32 {d7}, [r0,:64]!
  2457. vst1.32 {d9}, [r0,:64]!
  2458. vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
  2459. vst1.32 {d12}, [r0,:64]!
  2460. vst1.32 {d14}, [r0,:64]!
  2461. vst1.32 {d16}, [r0,:64]!
  2462. vst1.32 {d18}, [r0,:64]!
  2463. vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
  2464. vst1.32 {d13}, [r0,:64]!
  2465. vst1.32 {d15}, [r0,:64]!
  2466. vst1.32 {d17}, [r0,:64]!
  2467. vst1.32 {d19}, [r0,:64]!
  2468. vst1.32 {d20-d23}, [r0,:64]! @ A[4][0..4]
  2469. mov r14, r6 @ bsz
  2470. vst1.32 {d24}, [r0,:64]
  2471. mov r0, r12 @ rewind
  2472. vldmia sp!, {d8-d15}
  2473. b .Loop_squeeze_neon
  2474. .align 4
  2475. .Lsqueeze_neon_tail:
  2476. ldmia r12, {r2,r3}
  2477. cmp r5, #2
  2478. strb r2, [r4],#1 @ endian-neutral store
  2479. mov r2, r2, lsr#8
  2480. blo .Lsqueeze_neon_done
  2481. strb r2, [r4], #1
  2482. mov r2, r2, lsr#8
  2483. beq .Lsqueeze_neon_done
  2484. strb r2, [r4], #1
  2485. mov r2, r2, lsr#8
  2486. cmp r5, #4
  2487. blo .Lsqueeze_neon_done
  2488. strb r2, [r4], #1
  2489. beq .Lsqueeze_neon_done
  2490. strb r3, [r4], #1
  2491. mov r3, r3, lsr#8
  2492. cmp r5, #6
  2493. blo .Lsqueeze_neon_done
  2494. strb r3, [r4], #1
  2495. mov r3, r3, lsr#8
  2496. beq .Lsqueeze_neon_done
  2497. strb r3, [r4], #1
  2498. .Lsqueeze_neon_done:
  2499. ldmia sp!, {r4-r6,pc}
  2500. .size SHA3_squeeze_neon,.-SHA3_squeeze_neon
  2501. #endif
  2502. .asciz "Keccak-1600 absorb and squeeze for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
  2503. .align 2