sw_gbrp.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /*
  2. *
  3. * This file is part of FFmpeg.
  4. *
  5. * FFmpeg is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * FFmpeg is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18. */
  19. #include <string.h>
  20. #include "libavutil/common.h"
  21. #include "libavutil/intreadwrite.h"
  22. #include "libavutil/mem_internal.h"
  23. #include "libavutil/pixdesc.h"
  24. #include "libswscale/swscale.h"
  25. #include "libswscale/swscale_internal.h"
  26. #include "checkasm.h"
  27. #define randomize_buffers(buf, size) \
  28. do { \
  29. int j; \
  30. for (j = 0; j < size; j+=4) \
  31. AV_WN32(buf + j, rnd()); \
  32. } while (0)
  33. static const int planar_fmts[] = {
  34. AV_PIX_FMT_GBRP,
  35. AV_PIX_FMT_GBRP9BE,
  36. AV_PIX_FMT_GBRP9LE,
  37. AV_PIX_FMT_GBRP10BE,
  38. AV_PIX_FMT_GBRP10LE,
  39. AV_PIX_FMT_GBRP12BE,
  40. AV_PIX_FMT_GBRP12LE,
  41. AV_PIX_FMT_GBRP14BE,
  42. AV_PIX_FMT_GBRP14LE,
  43. AV_PIX_FMT_GBRAP,
  44. AV_PIX_FMT_GBRAP10BE,
  45. AV_PIX_FMT_GBRAP10LE,
  46. AV_PIX_FMT_GBRAP12BE,
  47. AV_PIX_FMT_GBRAP12LE,
  48. AV_PIX_FMT_GBRP16BE,
  49. AV_PIX_FMT_GBRP16LE,
  50. AV_PIX_FMT_GBRAP16BE,
  51. AV_PIX_FMT_GBRAP16LE,
  52. AV_PIX_FMT_GBRPF32BE,
  53. AV_PIX_FMT_GBRPF32LE,
  54. AV_PIX_FMT_GBRAPF32BE,
  55. AV_PIX_FMT_GBRAPF32LE
  56. };
  57. static void check_output_yuv2gbrp(void)
  58. {
  59. SwsContext *sws;
  60. SwsInternal *c;
  61. const AVPixFmtDescriptor *desc;
  62. int fmi, fsi, isi, i;
  63. int dstW, byte_size, luma_filter_size, chr_filter_size;
  64. #define LARGEST_FILTER 16
  65. #define FILTER_SIZES 4
  66. static const int filter_sizes[] = {1, 4, 8, 16};
  67. #define LARGEST_INPUT_SIZE 512
  68. static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
  69. uint8_t *dst0[4];
  70. uint8_t *dst1[4];
  71. declare_func(void, SwsInternal *c, const int16_t *lumFilter,
  72. const int16_t **lumSrcx, int lumFilterSize,
  73. const int16_t *chrFilter, const int16_t **chrUSrcx,
  74. const int16_t **chrVSrcx, int chrFilterSize,
  75. const int16_t **alpSrcx, uint8_t **dest,
  76. int dstW, int y);
  77. const int16_t *luma[LARGEST_FILTER];
  78. const int16_t *chru[LARGEST_FILTER];
  79. const int16_t *chrv[LARGEST_FILTER];
  80. const int16_t *alpha[LARGEST_FILTER];
  81. LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]);
  82. LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]);
  83. LOCAL_ALIGNED_8(int32_t, src_y, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
  84. LOCAL_ALIGNED_8(int32_t, src_u, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
  85. LOCAL_ALIGNED_8(int32_t, src_v, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
  86. LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_FILTER * LARGEST_INPUT_SIZE]);
  87. LOCAL_ALIGNED_8(uint8_t, dst0_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  88. LOCAL_ALIGNED_8(uint8_t, dst0_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  89. LOCAL_ALIGNED_8(uint8_t, dst0_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  90. LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  91. LOCAL_ALIGNED_8(uint8_t, dst1_r, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  92. LOCAL_ALIGNED_8(uint8_t, dst1_g, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  93. LOCAL_ALIGNED_8(uint8_t, dst1_b, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  94. LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  95. randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
  96. randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
  97. randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
  98. randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * LARGEST_INPUT_SIZE * sizeof(int32_t));
  99. randomize_buffers((uint8_t*)luma_filter, LARGEST_FILTER * sizeof(int16_t));
  100. randomize_buffers((uint8_t*)chr_filter, LARGEST_FILTER * sizeof(int16_t));
  101. dst0[0] = (uint8_t*)dst0_g;
  102. dst0[1] = (uint8_t*)dst0_b;
  103. dst0[2] = (uint8_t*)dst0_r;
  104. dst0[3] = (uint8_t*)dst0_a;
  105. dst1[0] = (uint8_t*)dst1_g;
  106. dst1[1] = (uint8_t*)dst1_b;
  107. dst1[2] = (uint8_t*)dst1_r;
  108. dst1[3] = (uint8_t*)dst1_a;
  109. for (i = 0; i < LARGEST_FILTER; i++) {
  110. luma[i] = (int16_t *)(src_y + i*LARGEST_INPUT_SIZE);
  111. chru[i] = (int16_t *)(src_u + i*LARGEST_INPUT_SIZE);
  112. chrv[i] = (int16_t *)(src_v + i*LARGEST_INPUT_SIZE);
  113. alpha[i] = (int16_t *)(src_a + i*LARGEST_INPUT_SIZE);
  114. }
  115. sws = sws_alloc_context();
  116. if (sws_init_context(sws, NULL, NULL) < 0)
  117. fail();
  118. c = sws_internal(sws);
  119. sws->flags |= SWS_FULL_CHR_H_INT;
  120. for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
  121. for (fsi = 0; fsi < FILTER_SIZES; fsi++) {
  122. for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++ ) {
  123. desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
  124. sws->dst_format = planar_fmts[fmi];
  125. dstW = input_sizes[isi];
  126. luma_filter_size = filter_sizes[fsi];
  127. chr_filter_size = filter_sizes[fsi];
  128. if (desc->comp[0].depth > 16) {
  129. byte_size = 4;
  130. } else if (desc->comp[0].depth > 8) {
  131. byte_size = 2;
  132. } else {
  133. byte_size = 1;
  134. }
  135. ff_sws_init_scale(c);
  136. if (check_func(c->yuv2anyX, "yuv2%s_full_X_%d_%d", desc->name, luma_filter_size, dstW)) {
  137. for (i = 0; i < 4; i ++) {
  138. memset(dst0[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  139. memset(dst1[i], 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  140. }
  141. call_ref(c, luma_filter, luma, luma_filter_size,
  142. chr_filter, chru, chrv, chr_filter_size,
  143. alpha, dst0, dstW, 0);
  144. call_new(c, luma_filter, luma, luma_filter_size,
  145. chr_filter, chru, chrv, chr_filter_size,
  146. alpha, dst1, dstW, 0);
  147. if (memcmp(dst0[0], dst1[0], dstW * byte_size) ||
  148. memcmp(dst0[1], dst1[1], dstW * byte_size) ||
  149. memcmp(dst0[2], dst1[2], dstW * byte_size) ||
  150. memcmp(dst0[3], dst1[3], dstW * byte_size) )
  151. fail();
  152. bench_new(c, luma_filter, luma, luma_filter_size,
  153. chr_filter, chru, chrv, chr_filter_size,
  154. alpha, dst1, dstW, 0);
  155. }
  156. }
  157. }
  158. }
  159. sws_freeContext(sws);
  160. }
  161. #undef LARGEST_INPUT_SIZE
  162. static void check_input_planar_rgb_to_y(void)
  163. {
  164. SwsContext *sws;
  165. SwsInternal *c;
  166. const AVPixFmtDescriptor *desc;
  167. int fmi, isi;
  168. int dstW, byte_size;
  169. #define LARGEST_INPUT_SIZE 512
  170. static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
  171. const uint8_t *src[4];
  172. int32_t rgb2yuv[9] = {0};
  173. declare_func(void, uint8_t *dst, const uint8_t *src[4],
  174. int w, int32_t *rgb2yuv, void *opaque);
  175. LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
  176. LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
  177. LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
  178. LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
  179. LOCAL_ALIGNED_8(uint8_t, dst0_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  180. LOCAL_ALIGNED_8(uint8_t, dst1_y, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  181. randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
  182. randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
  183. randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
  184. randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
  185. randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
  186. src[0] = (uint8_t*)src_g;
  187. src[1] = (uint8_t*)src_b;
  188. src[2] = (uint8_t*)src_r;
  189. src[3] = (uint8_t*)src_a;
  190. sws = sws_alloc_context();
  191. if (sws_init_context(sws, NULL, NULL) < 0)
  192. fail();
  193. c = sws_internal(sws);
  194. for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
  195. for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++ ) {
  196. desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
  197. sws->src_format = planar_fmts[fmi];
  198. sws->dst_format = AV_PIX_FMT_YUVA444P16;
  199. byte_size = 2;
  200. dstW = input_sizes[isi];
  201. ff_sws_init_scale(c);
  202. if(check_func(c->readLumPlanar, "planar_%s_to_y_%d", desc->name, dstW)) {
  203. memset(dst0_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  204. memset(dst1_y, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  205. call_ref(dst0_y, src, dstW, rgb2yuv, NULL);
  206. call_new(dst1_y, src, dstW, rgb2yuv, NULL);
  207. if (memcmp(dst0_y, dst1_y, dstW * byte_size))
  208. fail();
  209. bench_new(dst1_y, src, dstW, rgb2yuv, NULL);
  210. }
  211. }
  212. }
  213. sws_freeContext(sws);
  214. }
  215. #undef LARGEST_INPUT_SIZE
  216. static void check_input_planar_rgb_to_uv(void)
  217. {
  218. SwsContext *sws;
  219. SwsInternal *c;
  220. const AVPixFmtDescriptor *desc;
  221. int fmi, isi;
  222. int dstW, byte_size;
  223. #define LARGEST_INPUT_SIZE 512
  224. static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
  225. const uint8_t *src[4];
  226. int32_t rgb2yuv[9] = {0};
  227. declare_func(void, uint8_t *dstU, uint8_t *dstV,
  228. const uint8_t *src[4], int w, int32_t *rgb2yuv, void *opaque);
  229. LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
  230. LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
  231. LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
  232. LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
  233. LOCAL_ALIGNED_8(uint8_t, dst0_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  234. LOCAL_ALIGNED_8(uint8_t, dst0_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  235. LOCAL_ALIGNED_8(uint8_t, dst1_u, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  236. LOCAL_ALIGNED_8(uint8_t, dst1_v, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  237. randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
  238. randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
  239. randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
  240. randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
  241. randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
  242. src[0] = (uint8_t*)src_g;
  243. src[1] = (uint8_t*)src_b;
  244. src[2] = (uint8_t*)src_r;
  245. src[3] = (uint8_t*)src_a;
  246. sws = sws_alloc_context();
  247. if (sws_init_context(sws, NULL, NULL) < 0)
  248. fail();
  249. c = sws_internal(sws);
  250. for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
  251. for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++ ) {
  252. desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
  253. sws->src_format = planar_fmts[fmi];
  254. sws->dst_format = AV_PIX_FMT_YUVA444P16;
  255. byte_size = 2;
  256. dstW = input_sizes[isi];
  257. ff_sws_init_scale(c);
  258. if(check_func(c->readChrPlanar, "planar_%s_to_uv_%d", desc->name, dstW)) {
  259. memset(dst0_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  260. memset(dst0_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  261. memset(dst1_u, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  262. memset(dst1_v, 0xFF, LARGEST_INPUT_SIZE * sizeof(int32_t));
  263. call_ref(dst0_u, dst0_v, src, dstW, rgb2yuv, NULL);
  264. call_new(dst1_u, dst1_v, src, dstW, rgb2yuv, NULL);
  265. if (memcmp(dst0_u, dst1_u, dstW * byte_size) ||
  266. memcmp(dst0_v, dst1_v, dstW * byte_size))
  267. fail();
  268. bench_new(dst1_u, dst1_v, src, dstW, rgb2yuv, NULL);
  269. }
  270. }
  271. }
  272. sws_freeContext(sws);
  273. }
  274. #undef LARGEST_INPUT_SIZE
  275. static void check_input_planar_rgb_to_a(void)
  276. {
  277. SwsContext *sws;
  278. SwsInternal *c;
  279. const AVPixFmtDescriptor *desc;
  280. int fmi, isi;
  281. int dstW, byte_size;
  282. #define LARGEST_INPUT_SIZE 512
  283. static const int input_sizes[] = {8, 24, 128, 144, 256, 512};
  284. const uint8_t *src[4];
  285. int32_t rgb2yuv[9] = {0};
  286. declare_func(void, uint8_t *dst, const uint8_t *src[4],
  287. int w, int32_t *rgb2yuv, void *opaque);
  288. LOCAL_ALIGNED_8(int32_t, src_r, [LARGEST_INPUT_SIZE]);
  289. LOCAL_ALIGNED_8(int32_t, src_g, [LARGEST_INPUT_SIZE]);
  290. LOCAL_ALIGNED_8(int32_t, src_b, [LARGEST_INPUT_SIZE]);
  291. LOCAL_ALIGNED_8(int32_t, src_a, [LARGEST_INPUT_SIZE]);
  292. LOCAL_ALIGNED_8(uint8_t, dst0_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  293. LOCAL_ALIGNED_8(uint8_t, dst1_a, [LARGEST_INPUT_SIZE * sizeof(int32_t)]);
  294. randomize_buffers((uint8_t*)src_r, LARGEST_INPUT_SIZE * sizeof(int32_t));
  295. randomize_buffers((uint8_t*)src_g, LARGEST_INPUT_SIZE * sizeof(int32_t));
  296. randomize_buffers((uint8_t*)src_b, LARGEST_INPUT_SIZE * sizeof(int32_t));
  297. randomize_buffers((uint8_t*)src_a, LARGEST_INPUT_SIZE * sizeof(int32_t));
  298. randomize_buffers((uint8_t*)rgb2yuv, 9 * sizeof(int32_t));
  299. src[0] = (uint8_t*)src_g;
  300. src[1] = (uint8_t*)src_b;
  301. src[2] = (uint8_t*)src_r;
  302. src[3] = (uint8_t*)src_a;
  303. sws = sws_alloc_context();
  304. if (sws_init_context(sws, NULL, NULL) < 0)
  305. fail();
  306. c = sws_internal(sws);
  307. for (fmi = 0; fmi < FF_ARRAY_ELEMS(planar_fmts); fmi++) {
  308. for (isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++ ) {
  309. desc = av_pix_fmt_desc_get(planar_fmts[fmi]);
  310. if (!(desc->flags & AV_PIX_FMT_FLAG_ALPHA))
  311. continue;
  312. sws->src_format = planar_fmts[fmi];
  313. sws->dst_format = AV_PIX_FMT_YUVA444P16;
  314. byte_size = 2;
  315. dstW = input_sizes[isi];
  316. ff_sws_init_scale(c);
  317. if(check_func(c->readAlpPlanar, "planar_%s_to_a_%d", desc->name, dstW)) {
  318. memset(dst0_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
  319. memset(dst1_a, 0x00, LARGEST_INPUT_SIZE * sizeof(int32_t));
  320. call_ref(dst0_a, src, dstW, rgb2yuv, NULL);
  321. call_new(dst1_a, src, dstW, rgb2yuv, NULL);
  322. if (memcmp(dst0_a, dst1_a, dstW * byte_size))
  323. fail();
  324. bench_new(dst1_a, src, dstW, rgb2yuv, NULL);
  325. }
  326. }
  327. }
  328. sws_freeContext(sws);
  329. }
  330. void checkasm_check_sw_gbrp(void)
  331. {
  332. check_output_yuv2gbrp();
  333. report("output_yuv2gbrp");
  334. check_input_planar_rgb_to_y();
  335. report("input_planar_rgb_y");
  336. check_input_planar_rgb_to_uv();
  337. report("input_planar_rgb_uv");
  338. check_input_planar_rgb_to_a();
  339. report("input_planar_rgb_a");
  340. }