vf_colorspace.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. /*
  2. * Copyright (c) 2016 Ronald S. Bultje <rsbultje@gmail.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include <string.h>
  21. #include "checkasm.h"
  22. #include "libavfilter/colorspacedsp.h"
  23. #include "libavutil/common.h"
  24. #include "libavutil/internal.h"
  25. #include "libavutil/intreadwrite.h"
  26. #include "libavutil/mem_internal.h"
  27. #define W 64
  28. #define H 64
  29. #define randomize_buffers() \
  30. do { \
  31. unsigned mask = bpp_mask[idepth]; \
  32. int n, m; \
  33. int bpp = 1 + (!!idepth); \
  34. int buf_size = W * H * bpp; \
  35. for (m = 0; m < 3; m++) { \
  36. int ss = m ? ss_w + ss_h : 0; \
  37. int plane_sz = buf_size >> ss; \
  38. for (n = 0; n < plane_sz; n += 4) { \
  39. unsigned r = rnd() & mask; \
  40. AV_WN32A(&src[m][n], r); \
  41. } \
  42. } \
  43. } while (0)
  44. static const char *format_string[] = {
  45. "444", "422", "420"
  46. };
  47. static const unsigned bpp_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff };
  48. static void check_yuv2yuv(void)
  49. {
  50. declare_func(void, uint8_t *dst[3], const ptrdiff_t dst_stride[3],
  51. uint8_t *src[3], const ptrdiff_t src_stride[3],
  52. int w, int h, const int16_t coeff[3][3][8],
  53. const int16_t off[2][8]);
  54. ColorSpaceDSPContext dsp;
  55. int idepth, odepth, fmt, n;
  56. LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
  57. LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
  58. LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
  59. uint8_t *src[3] = { src_y, src_u, src_v };
  60. LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
  61. LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
  62. LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
  63. LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
  64. LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
  65. LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
  66. uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
  67. LOCAL_ALIGNED_32(int16_t, offset_buf, [16]);
  68. LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
  69. int16_t (*offset)[8] = (int16_t(*)[8]) offset_buf;
  70. int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
  71. ff_colorspacedsp_init(&dsp);
  72. for (n = 0; n < 8; n++) {
  73. offset[0][n] = offset[1][n] = 16;
  74. coeff[0][0][n] = (1 << 14) + (1 << 7) + 1;
  75. coeff[0][1][n] = (1 << 7) - 1;
  76. coeff[0][2][n] = -(1 << 8);
  77. coeff[1][0][n] = coeff[2][0][n] = 0;
  78. coeff[1][1][n] = (1 << 14) + (1 << 7);
  79. coeff[1][2][n] = -(1 << 7);
  80. coeff[2][2][n] = (1 << 14) - (1 << 6);
  81. coeff[2][1][n] = 1 << 6;
  82. }
  83. for (idepth = 0; idepth < 3; idepth++) {
  84. for (odepth = 0; odepth < 3; odepth++) {
  85. for (fmt = 0; fmt < 3; fmt++) {
  86. if (check_func(dsp.yuv2yuv[idepth][odepth][fmt],
  87. "ff_colorspacedsp_yuv2yuv_%sp%dto%d",
  88. format_string[fmt],
  89. idepth * 2 + 8, odepth * 2 + 8)) {
  90. int ss_w = !!fmt, ss_h = fmt == 2;
  91. int y_src_stride = W << !!idepth, y_dst_stride = W << !!odepth;
  92. int uv_src_stride = y_src_stride >> ss_w, uv_dst_stride = y_dst_stride >> ss_w;
  93. randomize_buffers();
  94. call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
  95. src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
  96. W, H, coeff, offset);
  97. call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
  98. src, (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
  99. W, H, coeff, offset);
  100. if (memcmp(dst0[0], dst1[0], y_dst_stride * H) ||
  101. memcmp(dst0[1], dst1[1], uv_dst_stride * H >> ss_h) ||
  102. memcmp(dst0[2], dst1[2], uv_dst_stride * H >> ss_h)) {
  103. fail();
  104. }
  105. }
  106. }
  107. }
  108. }
  109. report("yuv2yuv");
  110. }
  111. static void check_yuv2rgb(void)
  112. {
  113. declare_func(void, int16_t *dst[3], ptrdiff_t dst_stride,
  114. uint8_t *src[3], const ptrdiff_t src_stride[3],
  115. int w, int h, const int16_t coeff[3][3][8],
  116. const int16_t off[8]);
  117. ColorSpaceDSPContext dsp;
  118. int idepth, fmt, n;
  119. LOCAL_ALIGNED_32(uint8_t, src_y, [W * H * 2]);
  120. LOCAL_ALIGNED_32(uint8_t, src_u, [W * H * 2]);
  121. LOCAL_ALIGNED_32(uint8_t, src_v, [W * H * 2]);
  122. uint8_t *src[3] = { src_y, src_u, src_v };
  123. LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
  124. LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
  125. LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
  126. LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
  127. LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
  128. LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
  129. int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
  130. LOCAL_ALIGNED_32(int16_t, offset, [8]);
  131. LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
  132. int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
  133. ff_colorspacedsp_init(&dsp);
  134. for (n = 0; n < 8; n++) {
  135. offset[n] = 16;
  136. coeff[0][0][n] = coeff[1][0][n] = coeff[2][0][n] = (1 << 14) | 1;
  137. coeff[0][1][n] = coeff[2][2][n] = 0;
  138. coeff[0][2][n] = 1 << 13;
  139. coeff[1][1][n] = -(1 << 12);
  140. coeff[1][2][n] = 1 << 12;
  141. coeff[2][1][n] = 1 << 11;
  142. }
  143. for (idepth = 0; idepth < 3; idepth++) {
  144. for (fmt = 0; fmt < 3; fmt++) {
  145. if (check_func(dsp.yuv2rgb[idepth][fmt],
  146. "ff_colorspacedsp_yuv2rgb_%sp%d",
  147. format_string[fmt], idepth * 2 + 8)) {
  148. int ss_w = !!fmt, ss_h = fmt == 2;
  149. int y_src_stride = W << !!idepth;
  150. int uv_src_stride = y_src_stride >> ss_w;
  151. randomize_buffers();
  152. call_ref(dst0, W, src,
  153. (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
  154. W, H, coeff, offset);
  155. call_new(dst1, W, src,
  156. (ptrdiff_t[3]) { y_src_stride, uv_src_stride, uv_src_stride },
  157. W, H, coeff, offset);
  158. if (memcmp(dst0[0], dst1[0], W * H * sizeof(int16_t)) ||
  159. memcmp(dst0[1], dst1[1], W * H * sizeof(int16_t)) ||
  160. memcmp(dst0[2], dst1[2], W * H * sizeof(int16_t))) {
  161. fail();
  162. }
  163. }
  164. }
  165. }
  166. report("yuv2rgb");
  167. }
  168. #undef randomize_buffers
  169. #define randomize_buffers() \
  170. do { \
  171. int y, x, p; \
  172. for (p = 0; p < 3; p++) { \
  173. for (y = 0; y < H; y++) { \
  174. for (x = 0; x < W; x++) { \
  175. int r = rnd() & 0x7fff; \
  176. r -= (32768 - 28672) >> 1; \
  177. src[p][y * W + x] = r; \
  178. } \
  179. } \
  180. } \
  181. } while (0)
  182. static void check_rgb2yuv(void)
  183. {
  184. declare_func(void, uint8_t *dst[3], const ptrdiff_t dst_stride[3],
  185. int16_t *src[3], ptrdiff_t src_stride,
  186. int w, int h, const int16_t coeff[3][3][8],
  187. const int16_t off[8]);
  188. ColorSpaceDSPContext dsp;
  189. int odepth, fmt, n;
  190. LOCAL_ALIGNED_32(int16_t, src_y, [W * H * 2]);
  191. LOCAL_ALIGNED_32(int16_t, src_u, [W * H * 2]);
  192. LOCAL_ALIGNED_32(int16_t, src_v, [W * H * 2]);
  193. int16_t *src[3] = { src_y, src_u, src_v };
  194. LOCAL_ALIGNED_32(uint8_t, dst0_y, [W * H * 2]);
  195. LOCAL_ALIGNED_32(uint8_t, dst0_u, [W * H * 2]);
  196. LOCAL_ALIGNED_32(uint8_t, dst0_v, [W * H * 2]);
  197. LOCAL_ALIGNED_32(uint8_t, dst1_y, [W * H * 2]);
  198. LOCAL_ALIGNED_32(uint8_t, dst1_u, [W * H * 2]);
  199. LOCAL_ALIGNED_32(uint8_t, dst1_v, [W * H * 2]);
  200. uint8_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
  201. LOCAL_ALIGNED_32(int16_t, offset, [8]);
  202. LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
  203. int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
  204. ff_colorspacedsp_init(&dsp);
  205. for (n = 0; n < 8; n++) {
  206. offset[n] = 16;
  207. // these somewhat resemble bt601/smpte170m coefficients
  208. coeff[0][0][n] = lrint(0.3 * (1 << 14));
  209. coeff[0][1][n] = lrint(0.6 * (1 << 14));
  210. coeff[0][2][n] = lrint(0.1 * (1 << 14));
  211. coeff[1][0][n] = lrint(-0.15 * (1 << 14));
  212. coeff[1][1][n] = lrint(-0.35 * (1 << 14));
  213. coeff[1][2][n] = lrint(0.5 * (1 << 14));
  214. coeff[2][0][n] = lrint(0.5 * (1 << 14));
  215. coeff[2][1][n] = lrint(-0.42 * (1 << 14));
  216. coeff[2][2][n] = lrint(-0.08 * (1 << 14));
  217. }
  218. for (odepth = 0; odepth < 3; odepth++) {
  219. for (fmt = 0; fmt < 3; fmt++) {
  220. if (check_func(dsp.rgb2yuv[odepth][fmt],
  221. "ff_colorspacedsp_rgb2yuv_%sp%d",
  222. format_string[fmt], odepth * 2 + 8)) {
  223. int ss_w = !!fmt, ss_h = fmt == 2;
  224. int y_dst_stride = W << !!odepth;
  225. int uv_dst_stride = y_dst_stride >> ss_w;
  226. randomize_buffers();
  227. call_ref(dst0, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
  228. src, W, W, H, coeff, offset);
  229. call_new(dst1, (ptrdiff_t[3]) { y_dst_stride, uv_dst_stride, uv_dst_stride },
  230. src, W, W, H, coeff, offset);
  231. if (memcmp(dst0[0], dst1[0], H * y_dst_stride) ||
  232. memcmp(dst0[1], dst1[1], H * uv_dst_stride >> ss_h) ||
  233. memcmp(dst0[2], dst1[2], H * uv_dst_stride >> ss_h)) {
  234. fail();
  235. }
  236. }
  237. }
  238. }
  239. report("rgb2yuv");
  240. }
  241. static void check_multiply3x3(void)
  242. {
  243. declare_func(void, int16_t *data[3], ptrdiff_t stride,
  244. int w, int h, const int16_t coeff[3][3][8]);
  245. ColorSpaceDSPContext dsp;
  246. LOCAL_ALIGNED_32(int16_t, dst0_y, [W * H]);
  247. LOCAL_ALIGNED_32(int16_t, dst0_u, [W * H]);
  248. LOCAL_ALIGNED_32(int16_t, dst0_v, [W * H]);
  249. LOCAL_ALIGNED_32(int16_t, dst1_y, [W * H]);
  250. LOCAL_ALIGNED_32(int16_t, dst1_u, [W * H]);
  251. LOCAL_ALIGNED_32(int16_t, dst1_v, [W * H]);
  252. int16_t *dst0[3] = { dst0_y, dst0_u, dst0_v }, *dst1[3] = { dst1_y, dst1_u, dst1_v };
  253. int16_t **src = dst0;
  254. LOCAL_ALIGNED_32(int16_t, coeff_buf, [3 * 3 * 8]);
  255. int16_t (*coeff)[3][8] = (int16_t(*)[3][8]) coeff_buf;
  256. int n;
  257. ff_colorspacedsp_init(&dsp);
  258. for (n = 0; n < 8; n++) {
  259. coeff[0][0][n] = lrint(0.85 * (1 << 14));
  260. coeff[0][1][n] = lrint(0.10 * (1 << 14));
  261. coeff[0][2][n] = lrint(0.05 * (1 << 14));
  262. coeff[1][0][n] = lrint(-0.1 * (1 << 14));
  263. coeff[1][1][n] = lrint(0.95 * (1 << 14));
  264. coeff[1][2][n] = lrint(0.15 * (1 << 14));
  265. coeff[2][0][n] = lrint(-0.2 * (1 << 14));
  266. coeff[2][1][n] = lrint(0.30 * (1 << 14));
  267. coeff[2][2][n] = lrint(0.90 * (1 << 14));
  268. }
  269. if (check_func(dsp.multiply3x3, "ff_colorspacedsp_multiply3x3")) {
  270. randomize_buffers();
  271. memcpy(dst1_y, dst0_y, W * H * sizeof(*dst1_y));
  272. memcpy(dst1_u, dst0_u, W * H * sizeof(*dst1_u));
  273. memcpy(dst1_v, dst0_v, W * H * sizeof(*dst1_v));
  274. call_ref(dst0, W, W, H, coeff);
  275. call_new(dst1, W, W, H, coeff);
  276. if (memcmp(dst0[0], dst1[0], H * W * sizeof(*dst0_y)) ||
  277. memcmp(dst0[1], dst1[1], H * W * sizeof(*dst0_u)) ||
  278. memcmp(dst0[2], dst1[2], H * W * sizeof(*dst0_v))) {
  279. fail();
  280. }
  281. }
  282. report("multiply3x3");
  283. }
  284. void checkasm_check_colorspace(void)
  285. {
  286. check_yuv2yuv();
  287. check_yuv2rgb();
  288. check_rgb2yuv();
  289. check_multiply3x3();
  290. }