crt.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. /*
  2. * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25. * SUCH DAMAGE.
  26. */
  27. #include "mpdecimal.h"
  28. #include <assert.h>
  29. #include "constants.h"
  30. #include "crt.h"
  31. #include "numbertheory.h"
  32. #include "typearith.h"
  33. #include "umodarith.h"
  34. /* Bignum: Chinese Remainder Theorem, extends the maximum transform length. */
  35. /* Multiply P1P2 by v, store result in w. */
  36. static inline void
  37. _crt_mulP1P2_3(mpd_uint_t w[3], mpd_uint_t v)
  38. {
  39. mpd_uint_t hi1, hi2, lo;
  40. _mpd_mul_words(&hi1, &lo, LH_P1P2, v);
  41. w[0] = lo;
  42. _mpd_mul_words(&hi2, &lo, UH_P1P2, v);
  43. lo = hi1 + lo;
  44. if (lo < hi1) hi2++;
  45. w[1] = lo;
  46. w[2] = hi2;
  47. }
  48. /* Add 3 words from v to w. The result is known to fit in w. */
  49. static inline void
  50. _crt_add3(mpd_uint_t w[3], mpd_uint_t v[3])
  51. {
  52. mpd_uint_t carry;
  53. w[0] = w[0] + v[0];
  54. carry = (w[0] < v[0]);
  55. w[1] = w[1] + v[1];
  56. if (w[1] < v[1]) w[2]++;
  57. w[1] = w[1] + carry;
  58. if (w[1] < carry) w[2]++;
  59. w[2] += v[2];
  60. }
  61. /* Divide 3 words in u by v, store result in w, return remainder. */
  62. static inline mpd_uint_t
  63. _crt_div3(mpd_uint_t *w, const mpd_uint_t *u, mpd_uint_t v)
  64. {
  65. mpd_uint_t r1 = u[2];
  66. mpd_uint_t r2;
  67. if (r1 < v) {
  68. w[2] = 0;
  69. }
  70. else {
  71. _mpd_div_word(&w[2], &r1, u[2], v); /* GCOV_NOT_REACHED */
  72. }
  73. _mpd_div_words(&w[1], &r2, r1, u[1], v);
  74. _mpd_div_words(&w[0], &r1, r2, u[0], v);
  75. return r1;
  76. }
  77. /*
  78. * Chinese Remainder Theorem:
  79. * Algorithm from Joerg Arndt, "Matters Computational",
  80. * Chapter 37.4.1 [http://www.jjj.de/fxt/]
  81. *
  82. * See also Knuth, TAOCP, Volume 2, 4.3.2, exercise 7.
  83. */
  84. /*
  85. * CRT with carry: x1, x2, x3 contain numbers modulo p1, p2, p3. For each
  86. * triple of members of the arrays, find the unique z modulo p1*p2*p3, with
  87. * zmax = p1*p2*p3 - 1.
  88. *
  89. * In each iteration of the loop, split z into result[i] = z % MPD_RADIX
  90. * and carry = z / MPD_RADIX. Let N be the size of carry[] and cmax the
  91. * maximum carry.
  92. *
  93. * Limits for the 32-bit build:
  94. *
  95. * N = 2**96
  96. * cmax = 7711435591312380274
  97. *
  98. * Limits for the 64 bit build:
  99. *
  100. * N = 2**192
  101. * cmax = 627710135393475385904124401220046371710
  102. *
  103. * The following statements hold for both versions:
  104. *
  105. * 1) cmax + zmax < N, so the addition does not overflow.
  106. *
  107. * 2) (cmax + zmax) / MPD_RADIX == cmax.
  108. *
  109. * 3) If c <= cmax, then c_next = (c + zmax) / MPD_RADIX <= cmax.
  110. */
  111. void
  112. crt3(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_size_t rsize)
  113. {
  114. mpd_uint_t p1 = mpd_moduli[P1];
  115. mpd_uint_t umod;
  116. #ifdef PPRO
  117. double dmod;
  118. uint32_t dinvmod[3];
  119. #endif
  120. mpd_uint_t a1, a2, a3;
  121. mpd_uint_t s;
  122. mpd_uint_t z[3], t[3];
  123. mpd_uint_t carry[3] = {0,0,0};
  124. mpd_uint_t hi, lo;
  125. mpd_size_t i;
  126. for (i = 0; i < rsize; i++) {
  127. a1 = x1[i];
  128. a2 = x2[i];
  129. a3 = x3[i];
  130. SETMODULUS(P2);
  131. s = ext_submod(a2, a1, umod);
  132. s = MULMOD(s, INV_P1_MOD_P2);
  133. _mpd_mul_words(&hi, &lo, s, p1);
  134. lo = lo + a1;
  135. if (lo < a1) hi++;
  136. SETMODULUS(P3);
  137. s = dw_submod(a3, hi, lo, umod);
  138. s = MULMOD(s, INV_P1P2_MOD_P3);
  139. z[0] = lo;
  140. z[1] = hi;
  141. z[2] = 0;
  142. _crt_mulP1P2_3(t, s);
  143. _crt_add3(z, t);
  144. _crt_add3(carry, z);
  145. x1[i] = _crt_div3(carry, carry, MPD_RADIX);
  146. }
  147. assert(carry[0] == 0 && carry[1] == 0 && carry[2] == 0);
  148. }