udivmodti4.c 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. //===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements __udivmodti4 for the compiler_rt library.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "int_lib.h"
  13. #ifdef CRT_HAS_128BIT
  14. // Returns the 128 bit division result by 64 bit. Result must fit in 64 bits.
  15. // Remainder stored in r.
  16. // Taken and adjusted from libdivide libdivide_128_div_64_to_64 division
  17. // fallback. For a correctness proof see the reference for this algorithm
  18. // in Knuth, Volume 2, section 4.3.1, Algorithm D.
  19. UNUSED
  20. static inline du_int udiv128by64to64default(du_int u1, du_int u0, du_int v,
  21. du_int *r) {
  22. const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
  23. const du_int b = (1ULL << (n_udword_bits / 2)); // Number base (32 bits)
  24. du_int un1, un0; // Norm. dividend LSD's
  25. du_int vn1, vn0; // Norm. divisor digits
  26. du_int q1, q0; // Quotient digits
  27. du_int un64, un21, un10; // Dividend digit pairs
  28. du_int rhat; // A remainder
  29. si_int s; // Shift amount for normalization
  30. s = __builtin_clzll(v);
  31. if (s > 0) {
  32. // Normalize the divisor.
  33. v = v << s;
  34. un64 = (u1 << s) | (u0 >> (n_udword_bits - s));
  35. un10 = u0 << s; // Shift dividend left
  36. } else {
  37. // Avoid undefined behavior of (u0 >> 64).
  38. un64 = u1;
  39. un10 = u0;
  40. }
  41. // Break divisor up into two 32-bit digits.
  42. vn1 = v >> (n_udword_bits / 2);
  43. vn0 = v & 0xFFFFFFFF;
  44. // Break right half of dividend into two digits.
  45. un1 = un10 >> (n_udword_bits / 2);
  46. un0 = un10 & 0xFFFFFFFF;
  47. // Compute the first quotient digit, q1.
  48. q1 = un64 / vn1;
  49. rhat = un64 - q1 * vn1;
  50. // q1 has at most error 2. No more than 2 iterations.
  51. while (q1 >= b || q1 * vn0 > b * rhat + un1) {
  52. q1 = q1 - 1;
  53. rhat = rhat + vn1;
  54. if (rhat >= b)
  55. break;
  56. }
  57. un21 = un64 * b + un1 - q1 * v;
  58. // Compute the second quotient digit.
  59. q0 = un21 / vn1;
  60. rhat = un21 - q0 * vn1;
  61. // q0 has at most error 2. No more than 2 iterations.
  62. while (q0 >= b || q0 * vn0 > b * rhat + un0) {
  63. q0 = q0 - 1;
  64. rhat = rhat + vn1;
  65. if (rhat >= b)
  66. break;
  67. }
  68. *r = (un21 * b + un0 - q0 * v) >> s;
  69. return q1 * b + q0;
  70. }
  71. static inline du_int udiv128by64to64(du_int u1, du_int u0, du_int v,
  72. du_int *r) {
  73. #if defined(__x86_64__)
  74. du_int result;
  75. __asm__("divq %[v]"
  76. : "=a"(result), "=d"(*r)
  77. : [ v ] "r"(v), "a"(u0), "d"(u1));
  78. return result;
  79. #else
  80. return udiv128by64to64default(u1, u0, v, r);
  81. #endif
  82. }
  83. // Effects: if rem != 0, *rem = a % b
  84. // Returns: a / b
  85. COMPILER_RT_ABI tu_int __udivmodti4(tu_int a, tu_int b, tu_int *rem) {
  86. const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
  87. utwords dividend;
  88. dividend.all = a;
  89. utwords divisor;
  90. divisor.all = b;
  91. utwords quotient;
  92. utwords remainder;
  93. if (divisor.all > dividend.all) {
  94. if (rem)
  95. *rem = dividend.all;
  96. return 0;
  97. }
  98. // When the divisor fits in 64 bits, we can use an optimized path.
  99. if (divisor.s.high == 0) {
  100. remainder.s.high = 0;
  101. if (dividend.s.high < divisor.s.low) {
  102. // The result fits in 64 bits.
  103. quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
  104. divisor.s.low, &remainder.s.low);
  105. quotient.s.high = 0;
  106. } else {
  107. // First, divide with the high part to get the remainder in dividend.s.high.
  108. // After that dividend.s.high < divisor.s.low.
  109. quotient.s.high = dividend.s.high / divisor.s.low;
  110. dividend.s.high = dividend.s.high % divisor.s.low;
  111. quotient.s.low = udiv128by64to64(dividend.s.high, dividend.s.low,
  112. divisor.s.low, &remainder.s.low);
  113. }
  114. if (rem)
  115. *rem = remainder.all;
  116. return quotient.all;
  117. }
  118. // 0 <= shift <= 63.
  119. si_int shift =
  120. __builtin_clzll(divisor.s.high) - __builtin_clzll(dividend.s.high);
  121. divisor.all <<= shift;
  122. quotient.s.high = 0;
  123. quotient.s.low = 0;
  124. for (; shift >= 0; --shift) {
  125. quotient.s.low <<= 1;
  126. // Branch free version of.
  127. // if (dividend.all >= divisor.all)
  128. // {
  129. // dividend.all -= divisor.all;
  130. // carry = 1;
  131. // }
  132. const ti_int s =
  133. (ti_int)(divisor.all - dividend.all - 1) >> (n_utword_bits - 1);
  134. quotient.s.low |= s & 1;
  135. dividend.all -= divisor.all & s;
  136. divisor.all >>= 1;
  137. }
  138. if (rem)
  139. *rem = dividend.all;
  140. return quotient.all;
  141. }
  142. #endif // CRT_HAS_128BIT