xvididct.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. /*
  2. * Xvid MPEG-4 IDCT
  3. *
  4. * Copyright (C) 2006-2011 Xvid Solutions GmbH
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file
  24. * Walken IDCT
  25. * Alternative IDCT implementation for decoding compatibility.
  26. *
  27. * @author Skal
  28. * @note This C version is not the original IDCT, but a modified one that
  29. * yields the same error profile as the MMX/MMXEXT/SSE2 versions.
  30. */
  31. #include "config.h"
  32. #include "libavutil/attributes.h"
  33. #include "avcodec.h"
  34. #include "idctdsp.h"
  35. #include "xvididct.h"
  36. #define ROW_SHIFT 11
  37. #define COL_SHIFT 6
  38. // #define FIX(x) (int)((x) * (1 << ROW_SHIFT))
  39. #define RND0 65536 // 1 << (COL_SHIFT + ROW_SHIFT - 1);
  40. #define RND1 3597 // FIX (1.75683487303);
  41. #define RND2 2260 // FIX (1.10355339059);
  42. #define RND3 1203 // FIX (0.587788325588);
  43. #define RND4 0
  44. #define RND5 120 // FIX (0.058658283817);
  45. #define RND6 512 // FIX (0.25);
  46. #define RND7 512 // FIX (0.25);
  47. static const int TAB04[] = { 22725, 21407, 19266, 16384, 12873, 8867, 4520 };
  48. static const int TAB17[] = { 31521, 29692, 26722, 22725, 17855, 12299, 6270 };
  49. static const int TAB26[] = { 29692, 27969, 25172, 21407, 16819, 11585, 5906 };
  50. static const int TAB35[] = { 26722, 25172, 22654, 19266, 15137, 10426, 5315 };
  51. static int idct_row(short *in, const int *const tab, int rnd)
  52. {
  53. const int c1 = tab[0];
  54. const int c2 = tab[1];
  55. const int c3 = tab[2];
  56. const int c4 = tab[3];
  57. const int c5 = tab[4];
  58. const int c6 = tab[5];
  59. const int c7 = tab[6];
  60. const int right = in[5] | in[6] | in[7];
  61. const int left = in[1] | in[2] | in[3];
  62. if (!(right | in[4])) {
  63. const int k = c4 * in[0] + rnd;
  64. if (left) {
  65. const int a0 = k + c2 * in[2];
  66. const int a1 = k + c6 * in[2];
  67. const int a2 = k - c6 * in[2];
  68. const int a3 = k - c2 * in[2];
  69. const int b0 = c1 * in[1] + c3 * in[3];
  70. const int b1 = c3 * in[1] - c7 * in[3];
  71. const int b2 = c5 * in[1] - c1 * in[3];
  72. const int b3 = c7 * in[1] - c5 * in[3];
  73. in[0] = (a0 + b0) >> ROW_SHIFT;
  74. in[1] = (a1 + b1) >> ROW_SHIFT;
  75. in[2] = (a2 + b2) >> ROW_SHIFT;
  76. in[3] = (a3 + b3) >> ROW_SHIFT;
  77. in[4] = (a3 - b3) >> ROW_SHIFT;
  78. in[5] = (a2 - b2) >> ROW_SHIFT;
  79. in[6] = (a1 - b1) >> ROW_SHIFT;
  80. in[7] = (a0 - b0) >> ROW_SHIFT;
  81. } else {
  82. const int a0 = k >> ROW_SHIFT;
  83. if (a0) {
  84. in[0] =
  85. in[1] =
  86. in[2] =
  87. in[3] =
  88. in[4] =
  89. in[5] =
  90. in[6] =
  91. in[7] = a0;
  92. } else
  93. return 0;
  94. }
  95. } else if (!(left | right)) {
  96. const int a0 = (rnd + c4 * (in[0] + in[4])) >> ROW_SHIFT;
  97. const int a1 = (rnd + c4 * (in[0] - in[4])) >> ROW_SHIFT;
  98. in[0] = a0;
  99. in[3] = a0;
  100. in[4] = a0;
  101. in[7] = a0;
  102. in[1] = a1;
  103. in[2] = a1;
  104. in[5] = a1;
  105. in[6] = a1;
  106. } else {
  107. const int k = c4 * in[0] + rnd;
  108. const int a0 = k + c2 * in[2] + c4 * in[4] + c6 * in[6];
  109. const int a1 = k + c6 * in[2] - c4 * in[4] - c2 * in[6];
  110. const int a2 = k - c6 * in[2] - c4 * in[4] + c2 * in[6];
  111. const int a3 = k - c2 * in[2] + c4 * in[4] - c6 * in[6];
  112. const int b0 = c1 * in[1] + c3 * in[3] + c5 * in[5] + c7 * in[7];
  113. const int b1 = c3 * in[1] - c7 * in[3] - c1 * in[5] - c5 * in[7];
  114. const int b2 = c5 * in[1] - c1 * in[3] + c7 * in[5] + c3 * in[7];
  115. const int b3 = c7 * in[1] - c5 * in[3] + c3 * in[5] - c1 * in[7];
  116. in[0] = (a0 + b0) >> ROW_SHIFT;
  117. in[1] = (a1 + b1) >> ROW_SHIFT;
  118. in[2] = (a2 + b2) >> ROW_SHIFT;
  119. in[3] = (a3 + b3) >> ROW_SHIFT;
  120. in[4] = (a3 - b3) >> ROW_SHIFT;
  121. in[5] = (a2 - b2) >> ROW_SHIFT;
  122. in[6] = (a1 - b1) >> ROW_SHIFT;
  123. in[7] = (a0 - b0) >> ROW_SHIFT;
  124. }
  125. return 1;
  126. }
  127. #define TAN1 0x32EC
  128. #define TAN2 0x6A0A
  129. #define TAN3 0xAB0E
  130. #define SQRT2 0x5A82
  131. #define MULT(c, x, n) (((c) * (x)) >> (n))
  132. // 12b version => #define MULT(c,x, n) ((((c) >> 3) * (x)) >> ((n) - 3))
  133. // 12b zero-testing version:
  134. #define BUTTERFLY(a, b, tmp) \
  135. (tmp) = (a) + (b); \
  136. (b) = (a) - (b); \
  137. (a) = (tmp)
  138. #define LOAD_BUTTERFLY(m1, m2, a, b, tmp, s) \
  139. (m1) = (s)[(a)] + (s)[(b)]; \
  140. (m2) = (s)[(a)] - (s)[(b)]
  141. static void idct_col_8(short *const in)
  142. {
  143. int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, spill;
  144. // odd
  145. mm4 = (int) in[7 * 8];
  146. mm5 = (int) in[5 * 8];
  147. mm6 = (int) in[3 * 8];
  148. mm7 = (int) in[1 * 8];
  149. mm0 = MULT(TAN1, mm4, 16) + mm7;
  150. mm1 = MULT(TAN1, mm7, 16) - mm4;
  151. mm2 = MULT(TAN3, mm5, 16) + mm6;
  152. mm3 = MULT(TAN3, mm6, 16) - mm5;
  153. mm7 = mm0 + mm2;
  154. mm4 = mm1 - mm3;
  155. mm0 = mm0 - mm2;
  156. mm1 = mm1 + mm3;
  157. mm6 = mm0 + mm1;
  158. mm5 = mm0 - mm1;
  159. mm5 = 2 * MULT(SQRT2, mm5, 16); // 2*sqrt2
  160. mm6 = 2 * MULT(SQRT2, mm6, 16); // Watch out: precision loss but done to match
  161. // the pmulhw used in MMX/MMXEXT/SSE2 versions
  162. // even
  163. mm1 = (int) in[2 * 8];
  164. mm2 = (int) in[6 * 8];
  165. mm3 = MULT(TAN2, mm2, 16) + mm1;
  166. mm2 = MULT(TAN2, mm1, 16) - mm2;
  167. LOAD_BUTTERFLY(mm0, mm1, 0 * 8, 4 * 8, spill, in);
  168. BUTTERFLY(mm0, mm3, spill);
  169. BUTTERFLY(mm0, mm7, spill);
  170. in[8 * 0] = (int16_t) (mm0 >> COL_SHIFT);
  171. in[8 * 7] = (int16_t) (mm7 >> COL_SHIFT);
  172. BUTTERFLY(mm3, mm4, mm0);
  173. in[8 * 3] = (int16_t) (mm3 >> COL_SHIFT);
  174. in[8 * 4] = (int16_t) (mm4 >> COL_SHIFT);
  175. BUTTERFLY(mm1, mm2, mm0);
  176. BUTTERFLY(mm1, mm6, mm0);
  177. in[8 * 1] = (int16_t) (mm1 >> COL_SHIFT);
  178. in[8 * 6] = (int16_t) (mm6 >> COL_SHIFT);
  179. BUTTERFLY(mm2, mm5, mm0);
  180. in[8 * 2] = (int16_t) (mm2 >> COL_SHIFT);
  181. in[8 * 5] = (int16_t) (mm5 >> COL_SHIFT);
  182. }
  183. static void idct_col_4(short *const in)
  184. {
  185. int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, spill;
  186. // odd
  187. mm0 = (int) in[1 * 8];
  188. mm2 = (int) in[3 * 8];
  189. mm1 = MULT(TAN1, mm0, 16);
  190. mm3 = MULT(TAN3, mm2, 16);
  191. mm7 = mm0 + mm2;
  192. mm4 = mm1 - mm3;
  193. mm0 = mm0 - mm2;
  194. mm1 = mm1 + mm3;
  195. mm6 = mm0 + mm1;
  196. mm5 = mm0 - mm1;
  197. mm6 = 2 * MULT(SQRT2, mm6, 16); // 2*sqrt2
  198. mm5 = 2 * MULT(SQRT2, mm5, 16);
  199. // even
  200. mm0 = mm1 = (int) in[0 * 8];
  201. mm3 = (int) in[2 * 8];
  202. mm2 = MULT(TAN2, mm3, 16);
  203. BUTTERFLY(mm0, mm3, spill);
  204. BUTTERFLY(mm0, mm7, spill);
  205. in[8 * 0] = (int16_t) (mm0 >> COL_SHIFT);
  206. in[8 * 7] = (int16_t) (mm7 >> COL_SHIFT);
  207. BUTTERFLY(mm3, mm4, mm0);
  208. in[8 * 3] = (int16_t) (mm3 >> COL_SHIFT);
  209. in[8 * 4] = (int16_t) (mm4 >> COL_SHIFT);
  210. BUTTERFLY(mm1, mm2, mm0);
  211. BUTTERFLY(mm1, mm6, mm0);
  212. in[8 * 1] = (int16_t) (mm1 >> COL_SHIFT);
  213. in[8 * 6] = (int16_t) (mm6 >> COL_SHIFT);
  214. BUTTERFLY(mm2, mm5, mm0);
  215. in[8 * 2] = (int16_t) (mm2 >> COL_SHIFT);
  216. in[8 * 5] = (int16_t) (mm5 >> COL_SHIFT);
  217. }
  218. static void idct_col_3(short *const in)
  219. {
  220. int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, spill;
  221. // odd
  222. mm7 = (int) in[1 * 8];
  223. mm4 = MULT(TAN1, mm7, 16);
  224. mm6 = mm7 + mm4;
  225. mm5 = mm7 - mm4;
  226. mm6 = 2 * MULT(SQRT2, mm6, 16); // 2*sqrt2
  227. mm5 = 2 * MULT(SQRT2, mm5, 16);
  228. // even
  229. mm0 = mm1 = (int) in[0 * 8];
  230. mm3 = (int) in[2 * 8];
  231. mm2 = MULT(TAN2, mm3, 16);
  232. BUTTERFLY(mm0, mm3, spill);
  233. BUTTERFLY(mm0, mm7, spill);
  234. in[8 * 0] = (int16_t) (mm0 >> COL_SHIFT);
  235. in[8 * 7] = (int16_t) (mm7 >> COL_SHIFT);
  236. BUTTERFLY(mm3, mm4, mm0);
  237. in[8 * 3] = (int16_t) (mm3 >> COL_SHIFT);
  238. in[8 * 4] = (int16_t) (mm4 >> COL_SHIFT);
  239. BUTTERFLY(mm1, mm2, mm0);
  240. BUTTERFLY(mm1, mm6, mm0);
  241. in[8 * 1] = (int16_t) (mm1 >> COL_SHIFT);
  242. in[8 * 6] = (int16_t) (mm6 >> COL_SHIFT);
  243. BUTTERFLY(mm2, mm5, mm0);
  244. in[8 * 2] = (int16_t) (mm2 >> COL_SHIFT);
  245. in[8 * 5] = (int16_t) (mm5 >> COL_SHIFT);
  246. }
  247. void ff_xvid_idct(int16_t *const in)
  248. {
  249. int i, rows = 0x07;
  250. idct_row(in + 0 * 8, TAB04, RND0);
  251. idct_row(in + 1 * 8, TAB17, RND1);
  252. idct_row(in + 2 * 8, TAB26, RND2);
  253. if (idct_row(in + 3 * 8, TAB35, RND3))
  254. rows |= 0x08;
  255. if (idct_row(in + 4 * 8, TAB04, RND4))
  256. rows |= 0x10;
  257. if (idct_row(in + 5 * 8, TAB35, RND5))
  258. rows |= 0x20;
  259. if (idct_row(in + 6 * 8, TAB26, RND6))
  260. rows |= 0x40;
  261. if (idct_row(in + 7 * 8, TAB17, RND7))
  262. rows |= 0x80;
  263. if (rows & 0xF0) {
  264. for (i = 0; i < 8; i++)
  265. idct_col_8(in + i);
  266. } else if (rows & 0x08) {
  267. for (i = 0; i < 8; i++)
  268. idct_col_4(in + i);
  269. } else {
  270. for (i = 0; i < 8; i++)
  271. idct_col_3(in + i);
  272. }
  273. }
  274. static void xvid_idct_put(uint8_t *dest, int line_size, int16_t *block)
  275. {
  276. ff_xvid_idct(block);
  277. ff_put_pixels_clamped(block, dest, line_size);
  278. }
  279. static void xvid_idct_add(uint8_t *dest, int line_size, int16_t *block)
  280. {
  281. ff_xvid_idct(block);
  282. ff_add_pixels_clamped(block, dest, line_size);
  283. }
  284. av_cold void ff_xvid_idct_init(IDCTDSPContext *c, AVCodecContext *avctx)
  285. {
  286. const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
  287. if (!high_bit_depth &&
  288. (avctx->idct_algo == FF_IDCT_AUTO ||
  289. avctx->idct_algo == FF_IDCT_XVID)) {
  290. c->idct_put = xvid_idct_put;
  291. c->idct_add = xvid_idct_add;
  292. c->idct = ff_xvid_idct;
  293. c->perm_type = FF_IDCT_PERM_NONE;
  294. }
  295. if (ARCH_X86)
  296. ff_xvid_idct_init_x86(c, avctx, high_bit_depth);
  297. ff_init_scantable_permutation(c->idct_permutation, c->perm_type);
  298. }