invert.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. /*
  2. * The copyright in this software is being made available under the 2-clauses
  3. * BSD License, included below. This software may be subject to other third
  4. * party and contributor rights, including patent rights, and no such rights
  5. * are granted under this license.
  6. *
  7. * Copyright (c) 2008, Jerome Fimes, Communications & Systemes <jerome.fimes@c-s.fr>
  8. * All rights reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. *
  19. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
  20. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. * POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. #include "opj_includes.h"
  32. /**
  33. * LUP decomposition
  34. */
  35. static OPJ_BOOL opj_lupDecompose(OPJ_FLOAT32 * matrix,
  36. OPJ_UINT32 * permutations,
  37. OPJ_FLOAT32 * p_swap_area,
  38. OPJ_UINT32 nb_compo);
  39. /**
  40. * LUP solving
  41. */
  42. static void opj_lupSolve(OPJ_FLOAT32 * pResult,
  43. OPJ_FLOAT32* pMatrix,
  44. OPJ_FLOAT32* pVector,
  45. OPJ_UINT32* pPermutations,
  46. OPJ_UINT32 nb_compo,
  47. OPJ_FLOAT32 * p_intermediate_data);
  48. /**
  49. *LUP inversion (call with the result of lupDecompose)
  50. */
  51. static void opj_lupInvert(OPJ_FLOAT32 * pSrcMatrix,
  52. OPJ_FLOAT32 * pDestMatrix,
  53. OPJ_UINT32 nb_compo,
  54. OPJ_UINT32 * pPermutations,
  55. OPJ_FLOAT32 * p_src_temp,
  56. OPJ_FLOAT32 * p_dest_temp,
  57. OPJ_FLOAT32 * p_swap_area);
  58. /*
  59. ==========================================================
  60. Matric inversion interface
  61. ==========================================================
  62. */
  63. /**
  64. * Matrix inversion.
  65. */
  66. OPJ_BOOL opj_matrix_inversion_f(OPJ_FLOAT32 * pSrcMatrix,
  67. OPJ_FLOAT32 * pDestMatrix,
  68. OPJ_UINT32 nb_compo)
  69. {
  70. OPJ_BYTE * l_data = 00;
  71. OPJ_UINT32 l_permutation_size = nb_compo * (OPJ_UINT32)sizeof(OPJ_UINT32);
  72. OPJ_UINT32 l_swap_size = nb_compo * (OPJ_UINT32)sizeof(OPJ_FLOAT32);
  73. OPJ_UINT32 l_total_size = l_permutation_size + 3 * l_swap_size;
  74. OPJ_UINT32 * lPermutations = 00;
  75. OPJ_FLOAT32 * l_double_data = 00;
  76. l_data = (OPJ_BYTE *) opj_malloc(l_total_size);
  77. if (l_data == 0) {
  78. return OPJ_FALSE;
  79. }
  80. lPermutations = (OPJ_UINT32 *) l_data;
  81. l_double_data = (OPJ_FLOAT32 *)(l_data + l_permutation_size);
  82. memset(lPermutations, 0, l_permutation_size);
  83. if (! opj_lupDecompose(pSrcMatrix, lPermutations, l_double_data, nb_compo)) {
  84. opj_free(l_data);
  85. return OPJ_FALSE;
  86. }
  87. opj_lupInvert(pSrcMatrix, pDestMatrix, nb_compo, lPermutations, l_double_data,
  88. l_double_data + nb_compo, l_double_data + 2 * nb_compo);
  89. opj_free(l_data);
  90. return OPJ_TRUE;
  91. }
  92. /*
  93. ==========================================================
  94. Local functions
  95. ==========================================================
  96. */
  97. static OPJ_BOOL opj_lupDecompose(OPJ_FLOAT32 * matrix,
  98. OPJ_UINT32 * permutations,
  99. OPJ_FLOAT32 * p_swap_area,
  100. OPJ_UINT32 nb_compo)
  101. {
  102. OPJ_UINT32 * tmpPermutations = permutations;
  103. OPJ_UINT32 * dstPermutations;
  104. OPJ_UINT32 k2 = 0, t;
  105. OPJ_FLOAT32 temp;
  106. OPJ_UINT32 i, j, k;
  107. OPJ_FLOAT32 p;
  108. OPJ_UINT32 lLastColum = nb_compo - 1;
  109. OPJ_UINT32 lSwapSize = nb_compo * (OPJ_UINT32)sizeof(OPJ_FLOAT32);
  110. OPJ_FLOAT32 * lTmpMatrix = matrix;
  111. OPJ_FLOAT32 * lColumnMatrix, * lDestMatrix;
  112. OPJ_UINT32 offset = 1;
  113. OPJ_UINT32 lStride = nb_compo - 1;
  114. /*initialize permutations */
  115. for (i = 0; i < nb_compo; ++i) {
  116. *tmpPermutations++ = i;
  117. }
  118. /* now make a pivot with column switch */
  119. tmpPermutations = permutations;
  120. for (k = 0; k < lLastColum; ++k) {
  121. p = 0.0;
  122. /* take the middle element */
  123. lColumnMatrix = lTmpMatrix + k;
  124. /* make permutation with the biggest value in the column */
  125. for (i = k; i < nb_compo; ++i) {
  126. temp = ((*lColumnMatrix > 0) ? *lColumnMatrix : -(*lColumnMatrix));
  127. if (temp > p) {
  128. p = temp;
  129. k2 = i;
  130. }
  131. /* next line */
  132. lColumnMatrix += nb_compo;
  133. }
  134. /* a whole rest of 0 -> non singular */
  135. if (p == 0.0) {
  136. return OPJ_FALSE;
  137. }
  138. /* should we permute ? */
  139. if (k2 != k) {
  140. /*exchange of line */
  141. /* k2 > k */
  142. dstPermutations = tmpPermutations + k2 - k;
  143. /* swap indices */
  144. t = *tmpPermutations;
  145. *tmpPermutations = *dstPermutations;
  146. *dstPermutations = t;
  147. /* and swap entire line. */
  148. lColumnMatrix = lTmpMatrix + (k2 - k) * nb_compo;
  149. memcpy(p_swap_area, lColumnMatrix, lSwapSize);
  150. memcpy(lColumnMatrix, lTmpMatrix, lSwapSize);
  151. memcpy(lTmpMatrix, p_swap_area, lSwapSize);
  152. }
  153. /* now update data in the rest of the line and line after */
  154. lDestMatrix = lTmpMatrix + k;
  155. lColumnMatrix = lDestMatrix + nb_compo;
  156. /* take the middle element */
  157. temp = *(lDestMatrix++);
  158. /* now compute up data (i.e. coeff up of the diagonal). */
  159. for (i = offset; i < nb_compo; ++i) {
  160. /*lColumnMatrix; */
  161. /* divide the lower column elements by the diagonal value */
  162. /* matrix[i][k] /= matrix[k][k]; */
  163. /* p = matrix[i][k] */
  164. p = *lColumnMatrix / temp;
  165. *(lColumnMatrix++) = p;
  166. for (j = /* k + 1 */ offset; j < nb_compo; ++j) {
  167. /* matrix[i][j] -= matrix[i][k] * matrix[k][j]; */
  168. *(lColumnMatrix++) -= p * (*(lDestMatrix++));
  169. }
  170. /* come back to the k+1th element */
  171. lDestMatrix -= lStride;
  172. /* go to kth element of the next line */
  173. lColumnMatrix += k;
  174. }
  175. /* offset is now k+2 */
  176. ++offset;
  177. /* 1 element less for stride */
  178. --lStride;
  179. /* next line */
  180. lTmpMatrix += nb_compo;
  181. /* next permutation element */
  182. ++tmpPermutations;
  183. }
  184. return OPJ_TRUE;
  185. }
  186. static void opj_lupSolve(OPJ_FLOAT32 * pResult,
  187. OPJ_FLOAT32 * pMatrix,
  188. OPJ_FLOAT32 * pVector,
  189. OPJ_UINT32* pPermutations,
  190. OPJ_UINT32 nb_compo, OPJ_FLOAT32 * p_intermediate_data)
  191. {
  192. OPJ_INT32 k;
  193. OPJ_UINT32 i, j;
  194. OPJ_FLOAT32 sum;
  195. OPJ_FLOAT32 u;
  196. OPJ_UINT32 lStride = nb_compo + 1;
  197. OPJ_FLOAT32 * lCurrentPtr;
  198. OPJ_FLOAT32 * lIntermediatePtr;
  199. OPJ_FLOAT32 * lDestPtr;
  200. OPJ_FLOAT32 * lTmpMatrix;
  201. OPJ_FLOAT32 * lLineMatrix = pMatrix;
  202. OPJ_FLOAT32 * lBeginPtr = pResult + nb_compo - 1;
  203. OPJ_FLOAT32 * lGeneratedData;
  204. OPJ_UINT32 * lCurrentPermutationPtr = pPermutations;
  205. lIntermediatePtr = p_intermediate_data;
  206. lGeneratedData = p_intermediate_data + nb_compo - 1;
  207. for (i = 0; i < nb_compo; ++i) {
  208. sum = 0.0;
  209. lCurrentPtr = p_intermediate_data;
  210. lTmpMatrix = lLineMatrix;
  211. for (j = 1; j <= i; ++j) {
  212. /* sum += matrix[i][j-1] * y[j-1]; */
  213. sum += (*(lTmpMatrix++)) * (*(lCurrentPtr++));
  214. }
  215. /*y[i] = pVector[pPermutations[i]] - sum; */
  216. *(lIntermediatePtr++) = pVector[*(lCurrentPermutationPtr++)] - sum;
  217. lLineMatrix += nb_compo;
  218. }
  219. /* we take the last point of the matrix */
  220. lLineMatrix = pMatrix + nb_compo * nb_compo - 1;
  221. /* and we take after the last point of the destination vector */
  222. lDestPtr = pResult + nb_compo;
  223. assert(nb_compo != 0);
  224. for (k = (OPJ_INT32)nb_compo - 1; k != -1 ; --k) {
  225. sum = 0.0;
  226. lTmpMatrix = lLineMatrix;
  227. u = *(lTmpMatrix++);
  228. lCurrentPtr = lDestPtr--;
  229. for (j = (OPJ_UINT32)(k + 1); j < nb_compo; ++j) {
  230. /* sum += matrix[k][j] * x[j] */
  231. sum += (*(lTmpMatrix++)) * (*(lCurrentPtr++));
  232. }
  233. /*x[k] = (y[k] - sum) / u; */
  234. *(lBeginPtr--) = (*(lGeneratedData--) - sum) / u;
  235. lLineMatrix -= lStride;
  236. }
  237. }
  238. static void opj_lupInvert(OPJ_FLOAT32 * pSrcMatrix,
  239. OPJ_FLOAT32 * pDestMatrix,
  240. OPJ_UINT32 nb_compo,
  241. OPJ_UINT32 * pPermutations,
  242. OPJ_FLOAT32 * p_src_temp,
  243. OPJ_FLOAT32 * p_dest_temp,
  244. OPJ_FLOAT32 * p_swap_area)
  245. {
  246. OPJ_UINT32 j, i;
  247. OPJ_FLOAT32 * lCurrentPtr;
  248. OPJ_FLOAT32 * lLineMatrix = pDestMatrix;
  249. OPJ_UINT32 lSwapSize = nb_compo * (OPJ_UINT32)sizeof(OPJ_FLOAT32);
  250. for (j = 0; j < nb_compo; ++j) {
  251. lCurrentPtr = lLineMatrix++;
  252. memset(p_src_temp, 0, lSwapSize);
  253. p_src_temp[j] = 1.0;
  254. opj_lupSolve(p_dest_temp, pSrcMatrix, p_src_temp, pPermutations, nb_compo,
  255. p_swap_area);
  256. for (i = 0; i < nb_compo; ++i) {
  257. *(lCurrentPtr) = p_dest_temp[i];
  258. lCurrentPtr += nb_compo;
  259. }
  260. }
  261. }