idctdsp.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. /*
  2. * This file is part of FFmpeg.
  3. *
  4. * FFmpeg is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU Lesser General Public
  6. * License as published by the Free Software Foundation; either
  7. * version 2.1 of the License, or (at your option) any later version.
  8. *
  9. * FFmpeg is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * Lesser General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU Lesser General Public
  15. * License along with FFmpeg; if not, write to the Free Software
  16. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. */
  18. #include "config.h"
  19. #include "config_components.h"
  20. #include "libavutil/attributes.h"
  21. #include "libavutil/common.h"
  22. #include "avcodec.h"
  23. #include "dct.h"
  24. #include "faanidct.h"
  25. #include "idctdsp.h"
  26. #include "simple_idct.h"
  27. #include "xvididct.h"
  28. av_cold void ff_permute_scantable(uint8_t dst[64], const uint8_t src[64],
  29. const uint8_t permutation[64])
  30. {
  31. for (int i = 0; i < 64; i++) {
  32. int j = src[i];
  33. dst[i] = permutation[j];
  34. }
  35. }
  36. av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
  37. enum idct_permutation_type perm_type)
  38. {
  39. int i;
  40. #if ARCH_X86
  41. if (ff_init_scantable_permutation_x86(idct_permutation,
  42. perm_type))
  43. return;
  44. #endif
  45. switch (perm_type) {
  46. case FF_IDCT_PERM_NONE:
  47. for (i = 0; i < 64; i++)
  48. idct_permutation[i] = i;
  49. break;
  50. case FF_IDCT_PERM_LIBMPEG2:
  51. for (i = 0; i < 64; i++)
  52. idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
  53. break;
  54. case FF_IDCT_PERM_TRANSPOSE:
  55. for (i = 0; i < 64; i++)
  56. idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
  57. break;
  58. case FF_IDCT_PERM_PARTTRANS:
  59. for (i = 0; i < 64; i++)
  60. idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
  61. break;
  62. default:
  63. av_log(NULL, AV_LOG_ERROR,
  64. "Internal error, IDCT permutation not set\n");
  65. }
  66. }
  67. void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
  68. ptrdiff_t line_size)
  69. {
  70. int i;
  71. /* read the pixels */
  72. for (i = 0; i < 8; i++) {
  73. pixels[0] = av_clip_uint8(block[0]);
  74. pixels[1] = av_clip_uint8(block[1]);
  75. pixels[2] = av_clip_uint8(block[2]);
  76. pixels[3] = av_clip_uint8(block[3]);
  77. pixels[4] = av_clip_uint8(block[4]);
  78. pixels[5] = av_clip_uint8(block[5]);
  79. pixels[6] = av_clip_uint8(block[6]);
  80. pixels[7] = av_clip_uint8(block[7]);
  81. pixels += line_size;
  82. block += 8;
  83. }
  84. }
  85. static void put_pixels_clamped4_c(const int16_t *block, uint8_t *restrict pixels,
  86. int line_size)
  87. {
  88. int i;
  89. /* read the pixels */
  90. for(i=0;i<4;i++) {
  91. pixels[0] = av_clip_uint8(block[0]);
  92. pixels[1] = av_clip_uint8(block[1]);
  93. pixels[2] = av_clip_uint8(block[2]);
  94. pixels[3] = av_clip_uint8(block[3]);
  95. pixels += line_size;
  96. block += 8;
  97. }
  98. }
  99. static void put_pixels_clamped2_c(const int16_t *block, uint8_t *restrict pixels,
  100. int line_size)
  101. {
  102. int i;
  103. /* read the pixels */
  104. for(i=0;i<2;i++) {
  105. pixels[0] = av_clip_uint8(block[0]);
  106. pixels[1] = av_clip_uint8(block[1]);
  107. pixels += line_size;
  108. block += 8;
  109. }
  110. }
  111. static void put_signed_pixels_clamped_c(const int16_t *block,
  112. uint8_t *restrict pixels,
  113. ptrdiff_t line_size)
  114. {
  115. int i, j;
  116. for (i = 0; i < 8; i++) {
  117. for (j = 0; j < 8; j++) {
  118. if (*block < -128)
  119. *pixels = 0;
  120. else if (*block > 127)
  121. *pixels = 255;
  122. else
  123. *pixels = (uint8_t) (*block + 128);
  124. block++;
  125. pixels++;
  126. }
  127. pixels += (line_size - 8);
  128. }
  129. }
  130. void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
  131. ptrdiff_t line_size)
  132. {
  133. int i;
  134. /* read the pixels */
  135. for (i = 0; i < 8; i++) {
  136. pixels[0] = av_clip_uint8(pixels[0] + block[0]);
  137. pixels[1] = av_clip_uint8(pixels[1] + block[1]);
  138. pixels[2] = av_clip_uint8(pixels[2] + block[2]);
  139. pixels[3] = av_clip_uint8(pixels[3] + block[3]);
  140. pixels[4] = av_clip_uint8(pixels[4] + block[4]);
  141. pixels[5] = av_clip_uint8(pixels[5] + block[5]);
  142. pixels[6] = av_clip_uint8(pixels[6] + block[6]);
  143. pixels[7] = av_clip_uint8(pixels[7] + block[7]);
  144. pixels += line_size;
  145. block += 8;
  146. }
  147. }
  148. static void add_pixels_clamped4_c(const int16_t *block, uint8_t *restrict pixels,
  149. int line_size)
  150. {
  151. int i;
  152. /* read the pixels */
  153. for(i=0;i<4;i++) {
  154. pixels[0] = av_clip_uint8(pixels[0] + block[0]);
  155. pixels[1] = av_clip_uint8(pixels[1] + block[1]);
  156. pixels[2] = av_clip_uint8(pixels[2] + block[2]);
  157. pixels[3] = av_clip_uint8(pixels[3] + block[3]);
  158. pixels += line_size;
  159. block += 8;
  160. }
  161. }
  162. static void add_pixels_clamped2_c(const int16_t *block, uint8_t *restrict pixels,
  163. int line_size)
  164. {
  165. int i;
  166. /* read the pixels */
  167. for(i=0;i<2;i++) {
  168. pixels[0] = av_clip_uint8(pixels[0] + block[0]);
  169. pixels[1] = av_clip_uint8(pixels[1] + block[1]);
  170. pixels += line_size;
  171. block += 8;
  172. }
  173. }
  174. static void ff_jref_idct4_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
  175. {
  176. ff_j_rev_dct4 (block);
  177. put_pixels_clamped4_c(block, dest, line_size);
  178. }
  179. static void ff_jref_idct4_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
  180. {
  181. ff_j_rev_dct4 (block);
  182. add_pixels_clamped4_c(block, dest, line_size);
  183. }
  184. static void ff_jref_idct2_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
  185. {
  186. ff_j_rev_dct2 (block);
  187. put_pixels_clamped2_c(block, dest, line_size);
  188. }
  189. static void ff_jref_idct2_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
  190. {
  191. ff_j_rev_dct2 (block);
  192. add_pixels_clamped2_c(block, dest, line_size);
  193. }
  194. static void ff_jref_idct1_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
  195. {
  196. dest[0] = av_clip_uint8((block[0] + 4)>>3);
  197. }
  198. static void ff_jref_idct1_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
  199. {
  200. dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
  201. }
  202. av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
  203. {
  204. av_unused const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
  205. if (avctx->lowres==1) {
  206. c->idct_put = ff_jref_idct4_put;
  207. c->idct_add = ff_jref_idct4_add;
  208. c->idct = ff_j_rev_dct4;
  209. c->perm_type = FF_IDCT_PERM_NONE;
  210. } else if (avctx->lowres==2) {
  211. c->idct_put = ff_jref_idct2_put;
  212. c->idct_add = ff_jref_idct2_add;
  213. c->idct = ff_j_rev_dct2;
  214. c->perm_type = FF_IDCT_PERM_NONE;
  215. } else if (avctx->lowres==3) {
  216. c->idct_put = ff_jref_idct1_put;
  217. c->idct_add = ff_jref_idct1_add;
  218. c->idct = ff_j_rev_dct1;
  219. c->perm_type = FF_IDCT_PERM_NONE;
  220. } else {
  221. if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
  222. /* 10-bit MPEG-4 Simple Studio Profile requires a higher precision IDCT
  223. However, it only uses idct_put */
  224. if (c->mpeg4_studio_profile) {
  225. c->idct_put = ff_simple_idct_put_int32_10bit;
  226. c->idct_add = NULL;
  227. c->idct = NULL;
  228. } else {
  229. c->idct_put = ff_simple_idct_put_int16_10bit;
  230. c->idct_add = ff_simple_idct_add_int16_10bit;
  231. c->idct = ff_simple_idct_int16_10bit;
  232. }
  233. c->perm_type = FF_IDCT_PERM_NONE;
  234. } else if (avctx->bits_per_raw_sample == 12) {
  235. c->idct_put = ff_simple_idct_put_int16_12bit;
  236. c->idct_add = ff_simple_idct_add_int16_12bit;
  237. c->idct = ff_simple_idct_int16_12bit;
  238. c->perm_type = FF_IDCT_PERM_NONE;
  239. } else {
  240. if (avctx->idct_algo == FF_IDCT_INT) {
  241. c->idct_put = ff_jref_idct_put;
  242. c->idct_add = ff_jref_idct_add;
  243. c->idct = ff_j_rev_dct;
  244. c->perm_type = FF_IDCT_PERM_LIBMPEG2;
  245. #if CONFIG_FAANIDCT
  246. } else if (avctx->idct_algo == FF_IDCT_FAAN) {
  247. c->idct_put = ff_faanidct_put;
  248. c->idct_add = ff_faanidct_add;
  249. c->idct = ff_faanidct;
  250. c->perm_type = FF_IDCT_PERM_NONE;
  251. #endif /* CONFIG_FAANIDCT */
  252. } else { // accurate/default
  253. c->idct_put = ff_simple_idct_put_int16_8bit;
  254. c->idct_add = ff_simple_idct_add_int16_8bit;
  255. c->idct = ff_simple_idct_int16_8bit;
  256. c->perm_type = FF_IDCT_PERM_NONE;
  257. }
  258. }
  259. }
  260. c->put_pixels_clamped = ff_put_pixels_clamped_c;
  261. c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
  262. c->add_pixels_clamped = ff_add_pixels_clamped_c;
  263. if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVID)
  264. ff_xvid_idct_init(c, avctx);
  265. #if ARCH_AARCH64
  266. ff_idctdsp_init_aarch64(c, avctx, high_bit_depth);
  267. #elif ARCH_ALPHA
  268. ff_idctdsp_init_alpha(c, avctx, high_bit_depth);
  269. #elif ARCH_ARM
  270. ff_idctdsp_init_arm(c, avctx, high_bit_depth);
  271. #elif ARCH_PPC
  272. ff_idctdsp_init_ppc(c, avctx, high_bit_depth);
  273. #elif ARCH_RISCV
  274. ff_idctdsp_init_riscv(c, avctx, high_bit_depth);
  275. #elif ARCH_X86
  276. ff_idctdsp_init_x86(c, avctx, high_bit_depth);
  277. #elif ARCH_MIPS
  278. ff_idctdsp_init_mips(c, avctx, high_bit_depth);
  279. #elif ARCH_LOONGARCH
  280. ff_idctdsp_init_loongarch(c, avctx, high_bit_depth);
  281. #endif
  282. ff_init_scantable_permutation(c->idct_permutation,
  283. c->perm_type);
  284. }