md5-fast-x8664.S 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /*
  2. * MD5 hash in x86-64 assembly
  3. *
  4. * Copyright (c) 2016 Project Nayuki. (MIT License)
  5. * https://www.nayuki.io/page/fast-md5-hash-implementation-in-x86-assembly
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy of
  8. * this software and associated documentation files (the "Software"), to deal in
  9. * the Software without restriction, including without limitation the rights to
  10. * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  11. * the Software, and to permit persons to whom the Software is furnished to do so,
  12. * subject to the following conditions:
  13. * - The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. * - The Software is provided "as is", without warranty of any kind, express or
  16. * implied, including but not limited to the warranties of merchantability,
  17. * fitness for a particular purpose and noninfringement. In no event shall the
  18. * authors or copyright holders be liable for any claim, damages or other
  19. * liability, whether in an action of contract, tort or otherwise, arising from,
  20. * out of or in connection with the Software or the use or other dealings in the
  21. * Software.
  22. */
  23. /* void md5_compress(uint32_t state[4], const uint8_t block[64]) */
  24. .globl md5_compress
  25. md5_compress:
  26. /*
  27. * Storage usage:
  28. * Bytes Location Description
  29. * 4 eax MD5 state variable A
  30. * 4 ebx MD5 state variable B
  31. * 4 ecx MD5 state variable C
  32. * 4 edx MD5 state variable D
  33. * 4 esi Temporary for calculation per round
  34. * 4 edi Temporary for calculation per round
  35. * 8 rbp Base address of block array argument (read-only)
  36. * 8 r8 Base address of state array argument (read-only)
  37. * 16 xmm0 Caller's value of rbx (only low 64 bits are used)
  38. * 16 xmm1 Caller's value of rbp (only low 64 bits are used)
  39. */
  40. #define ROUND0(a, b, c, d, k, s, t) \
  41. movl %c, %esi; \
  42. addl (k*4)(%rbp), %a; \
  43. xorl %d, %esi; \
  44. andl %b, %esi; \
  45. xorl %d, %esi; \
  46. leal t(%esi,%a), %a; \
  47. roll $s, %a; \
  48. addl %b, %a;
  49. #define ROUND1(a, b, c, d, k, s, t) \
  50. movl %d, %esi; \
  51. movl %d, %edi; \
  52. addl (k*4)(%rbp), %a; \
  53. notl %esi; \
  54. andl %b, %edi; \
  55. andl %c, %esi; \
  56. orl %edi, %esi; \
  57. leal t(%esi,%a), %a; \
  58. roll $s, %a; \
  59. addl %b, %a;
  60. #define ROUND2(a, b, c, d, k, s, t) \
  61. movl %c, %esi; \
  62. addl (k*4)(%rbp), %a; \
  63. xorl %d, %esi; \
  64. xorl %b, %esi; \
  65. leal t(%esi,%a), %a; \
  66. roll $s, %a; \
  67. addl %b, %a;
  68. #define ROUND3(a, b, c, d, k, s, t) \
  69. movl %d, %esi; \
  70. not %esi; \
  71. addl (k*4)(%rbp), %a; \
  72. orl %b, %esi; \
  73. xorl %c, %esi; \
  74. leal t(%esi,%a), %a; \
  75. roll $s, %a; \
  76. addl %b, %a;
  77. /* Save registers */
  78. movq %rbx, %xmm0
  79. movq %rbp, %xmm1
  80. /* Load arguments */
  81. movq %rsi, %rbp
  82. movl 0(%rdi), %eax /* a */
  83. movl 4(%rdi), %ebx /* b */
  84. movl 8(%rdi), %ecx /* c */
  85. movl 12(%rdi), %edx /* d */
  86. movq %rdi, %r8
  87. /* 64 rounds of hashing */
  88. ROUND0(eax, ebx, ecx, edx, 0, 7, -0x28955B88)
  89. ROUND0(edx, eax, ebx, ecx, 1, 12, -0x173848AA)
  90. ROUND0(ecx, edx, eax, ebx, 2, 17, 0x242070DB)
  91. ROUND0(ebx, ecx, edx, eax, 3, 22, -0x3E423112)
  92. ROUND0(eax, ebx, ecx, edx, 4, 7, -0x0A83F051)
  93. ROUND0(edx, eax, ebx, ecx, 5, 12, 0x4787C62A)
  94. ROUND0(ecx, edx, eax, ebx, 6, 17, -0x57CFB9ED)
  95. ROUND0(ebx, ecx, edx, eax, 7, 22, -0x02B96AFF)
  96. ROUND0(eax, ebx, ecx, edx, 8, 7, 0x698098D8)
  97. ROUND0(edx, eax, ebx, ecx, 9, 12, -0x74BB0851)
  98. ROUND0(ecx, edx, eax, ebx, 10, 17, -0x0000A44F)
  99. ROUND0(ebx, ecx, edx, eax, 11, 22, -0x76A32842)
  100. ROUND0(eax, ebx, ecx, edx, 12, 7, 0x6B901122)
  101. ROUND0(edx, eax, ebx, ecx, 13, 12, -0x02678E6D)
  102. ROUND0(ecx, edx, eax, ebx, 14, 17, -0x5986BC72)
  103. ROUND0(ebx, ecx, edx, eax, 15, 22, 0x49B40821)
  104. ROUND1(eax, ebx, ecx, edx, 1, 5, -0x09E1DA9E)
  105. ROUND1(edx, eax, ebx, ecx, 6, 9, -0x3FBF4CC0)
  106. ROUND1(ecx, edx, eax, ebx, 11, 14, 0x265E5A51)
  107. ROUND1(ebx, ecx, edx, eax, 0, 20, -0x16493856)
  108. ROUND1(eax, ebx, ecx, edx, 5, 5, -0x29D0EFA3)
  109. ROUND1(edx, eax, ebx, ecx, 10, 9, 0x02441453)
  110. ROUND1(ecx, edx, eax, ebx, 15, 14, -0x275E197F)
  111. ROUND1(ebx, ecx, edx, eax, 4, 20, -0x182C0438)
  112. ROUND1(eax, ebx, ecx, edx, 9, 5, 0x21E1CDE6)
  113. ROUND1(edx, eax, ebx, ecx, 14, 9, -0x3CC8F82A)
  114. ROUND1(ecx, edx, eax, ebx, 3, 14, -0x0B2AF279)
  115. ROUND1(ebx, ecx, edx, eax, 8, 20, 0x455A14ED)
  116. ROUND1(eax, ebx, ecx, edx, 13, 5, -0x561C16FB)
  117. ROUND1(edx, eax, ebx, ecx, 2, 9, -0x03105C08)
  118. ROUND1(ecx, edx, eax, ebx, 7, 14, 0x676F02D9)
  119. ROUND1(ebx, ecx, edx, eax, 12, 20, -0x72D5B376)
  120. ROUND2(eax, ebx, ecx, edx, 5, 4, -0x0005C6BE)
  121. ROUND2(edx, eax, ebx, ecx, 8, 11, -0x788E097F)
  122. ROUND2(ecx, edx, eax, ebx, 11, 16, 0x6D9D6122)
  123. ROUND2(ebx, ecx, edx, eax, 14, 23, -0x021AC7F4)
  124. ROUND2(eax, ebx, ecx, edx, 1, 4, -0x5B4115BC)
  125. ROUND2(edx, eax, ebx, ecx, 4, 11, 0x4BDECFA9)
  126. ROUND2(ecx, edx, eax, ebx, 7, 16, -0x0944B4A0)
  127. ROUND2(ebx, ecx, edx, eax, 10, 23, -0x41404390)
  128. ROUND2(eax, ebx, ecx, edx, 13, 4, 0x289B7EC6)
  129. ROUND2(edx, eax, ebx, ecx, 0, 11, -0x155ED806)
  130. ROUND2(ecx, edx, eax, ebx, 3, 16, -0x2B10CF7B)
  131. ROUND2(ebx, ecx, edx, eax, 6, 23, 0x04881D05)
  132. ROUND2(eax, ebx, ecx, edx, 9, 4, -0x262B2FC7)
  133. ROUND2(edx, eax, ebx, ecx, 12, 11, -0x1924661B)
  134. ROUND2(ecx, edx, eax, ebx, 15, 16, 0x1FA27CF8)
  135. ROUND2(ebx, ecx, edx, eax, 2, 23, -0x3B53A99B)
  136. ROUND3(eax, ebx, ecx, edx, 0, 6, -0x0BD6DDBC)
  137. ROUND3(edx, eax, ebx, ecx, 7, 10, 0x432AFF97)
  138. ROUND3(ecx, edx, eax, ebx, 14, 15, -0x546BDC59)
  139. ROUND3(ebx, ecx, edx, eax, 5, 21, -0x036C5FC7)
  140. ROUND3(eax, ebx, ecx, edx, 12, 6, 0x655B59C3)
  141. ROUND3(edx, eax, ebx, ecx, 3, 10, -0x70F3336E)
  142. ROUND3(ecx, edx, eax, ebx, 10, 15, -0x00100B83)
  143. ROUND3(ebx, ecx, edx, eax, 1, 21, -0x7A7BA22F)
  144. ROUND3(eax, ebx, ecx, edx, 8, 6, 0x6FA87E4F)
  145. ROUND3(edx, eax, ebx, ecx, 15, 10, -0x01D31920)
  146. ROUND3(ecx, edx, eax, ebx, 6, 15, -0x5CFEBCEC)
  147. ROUND3(ebx, ecx, edx, eax, 13, 21, 0x4E0811A1)
  148. ROUND3(eax, ebx, ecx, edx, 4, 6, -0x08AC817E)
  149. ROUND3(edx, eax, ebx, ecx, 11, 10, -0x42C50DCB)
  150. ROUND3(ecx, edx, eax, ebx, 2, 15, 0x2AD7D2BB)
  151. ROUND3(ebx, ecx, edx, eax, 9, 21, -0x14792C6F)
  152. /* Save updated state */
  153. addl %eax, 0(%r8)
  154. addl %ebx, 4(%r8)
  155. addl %ecx, 8(%r8)
  156. addl %edx, 12(%r8)
  157. /* Restore registers */
  158. movq %xmm0, %rbx
  159. movq %xmm1, %rbp
  160. retq