convolute.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /*
  2. * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
  16. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25. * SUCH DAMAGE.
  26. */
  27. #include "mpdecimal.h"
  28. #include "bits.h"
  29. #include "constants.h"
  30. #include "convolute.h"
  31. #include "fnt.h"
  32. #include "fourstep.h"
  33. #include "numbertheory.h"
  34. #include "sixstep.h"
  35. #include "umodarith.h"
  36. /* Bignum: Fast convolution using the Number Theoretic Transform. Used for
  37. the multiplication of very large coefficients. */
  38. /* Convolute the data in c1 and c2. Result is in c1. */
  39. int
  40. fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum)
  41. {
  42. int (*fnt)(mpd_uint_t *, mpd_size_t, int);
  43. int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
  44. #ifdef PPRO
  45. double dmod;
  46. uint32_t dinvmod[3];
  47. #endif
  48. mpd_uint_t n_inv, umod;
  49. mpd_size_t i;
  50. SETMODULUS(modnum);
  51. n_inv = POWMOD(n, (umod-2));
  52. if (ispower2(n)) {
  53. if (n > SIX_STEP_THRESHOLD) {
  54. fnt = six_step_fnt;
  55. inv_fnt = inv_six_step_fnt;
  56. }
  57. else {
  58. fnt = std_fnt;
  59. inv_fnt = std_inv_fnt;
  60. }
  61. }
  62. else {
  63. fnt = four_step_fnt;
  64. inv_fnt = inv_four_step_fnt;
  65. }
  66. if (!fnt(c1, n, modnum)) {
  67. return 0;
  68. }
  69. if (!fnt(c2, n, modnum)) {
  70. return 0;
  71. }
  72. for (i = 0; i < n-1; i += 2) {
  73. mpd_uint_t x0 = c1[i];
  74. mpd_uint_t y0 = c2[i];
  75. mpd_uint_t x1 = c1[i+1];
  76. mpd_uint_t y1 = c2[i+1];
  77. MULMOD2(&x0, y0, &x1, y1);
  78. c1[i] = x0;
  79. c1[i+1] = x1;
  80. }
  81. if (!inv_fnt(c1, n, modnum)) {
  82. return 0;
  83. }
  84. for (i = 0; i < n-3; i += 4) {
  85. mpd_uint_t x0 = c1[i];
  86. mpd_uint_t x1 = c1[i+1];
  87. mpd_uint_t x2 = c1[i+2];
  88. mpd_uint_t x3 = c1[i+3];
  89. MULMOD2C(&x0, &x1, n_inv);
  90. MULMOD2C(&x2, &x3, n_inv);
  91. c1[i] = x0;
  92. c1[i+1] = x1;
  93. c1[i+2] = x2;
  94. c1[i+3] = x3;
  95. }
  96. return 1;
  97. }
  98. /* Autoconvolute the data in c1. Result is in c1. */
  99. int
  100. fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum)
  101. {
  102. int (*fnt)(mpd_uint_t *, mpd_size_t, int);
  103. int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int);
  104. #ifdef PPRO
  105. double dmod;
  106. uint32_t dinvmod[3];
  107. #endif
  108. mpd_uint_t n_inv, umod;
  109. mpd_size_t i;
  110. SETMODULUS(modnum);
  111. n_inv = POWMOD(n, (umod-2));
  112. if (ispower2(n)) {
  113. if (n > SIX_STEP_THRESHOLD) {
  114. fnt = six_step_fnt;
  115. inv_fnt = inv_six_step_fnt;
  116. }
  117. else {
  118. fnt = std_fnt;
  119. inv_fnt = std_inv_fnt;
  120. }
  121. }
  122. else {
  123. fnt = four_step_fnt;
  124. inv_fnt = inv_four_step_fnt;
  125. }
  126. if (!fnt(c1, n, modnum)) {
  127. return 0;
  128. }
  129. for (i = 0; i < n-1; i += 2) {
  130. mpd_uint_t x0 = c1[i];
  131. mpd_uint_t x1 = c1[i+1];
  132. MULMOD2(&x0, x0, &x1, x1);
  133. c1[i] = x0;
  134. c1[i+1] = x1;
  135. }
  136. if (!inv_fnt(c1, n, modnum)) {
  137. return 0;
  138. }
  139. for (i = 0; i < n-3; i += 4) {
  140. mpd_uint_t x0 = c1[i];
  141. mpd_uint_t x1 = c1[i+1];
  142. mpd_uint_t x2 = c1[i+2];
  143. mpd_uint_t x3 = c1[i+3];
  144. MULMOD2C(&x0, &x1, n_inv);
  145. MULMOD2C(&x2, &x3, n_inv);
  146. c1[i] = x0;
  147. c1[i+1] = x1;
  148. c1[i+2] = x2;
  149. c1[i+3] = x3;
  150. }
  151. return 1;
  152. }