hevc_pel.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /*
  2. * Copyright (c) 2015 Henrik Gramner
  3. * Copyright (c) 2021 Josh Dekker
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. */
  21. #include <string.h>
  22. #include "checkasm.h"
  23. #include "libavcodec/hevcdsp.h"
  24. #include "libavutil/common.h"
  25. #include "libavutil/internal.h"
  26. #include "libavutil/intreadwrite.h"
  27. static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
  28. static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
  29. static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
  30. static const int weights[] = { 0, 128, 255, -1 };
  31. static const int denoms[] = {0, 7, 12, -1 };
  32. static const int offsets[] = {0, 255, -1 };
  33. #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
  34. #define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
  35. #define randomize_buffers() \
  36. do { \
  37. uint32_t mask = pixel_mask[bit_depth - 8]; \
  38. int k; \
  39. for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \
  40. uint32_t r = rnd() & mask; \
  41. AV_WN32A(buf0 + k, r); \
  42. AV_WN32A(buf1 + k, r); \
  43. if (k >= BUF_SIZE) \
  44. continue; \
  45. r = rnd(); \
  46. AV_WN32A(dst0 + k, r); \
  47. AV_WN32A(dst1 + k, r); \
  48. } \
  49. } while (0)
  50. #define randomize_buffers_ref() \
  51. randomize_buffers(); \
  52. do { \
  53. uint32_t mask = pixel_mask16[bit_depth - 8]; \
  54. int k; \
  55. for (k = 0; k < BUF_SIZE; k += 2) { \
  56. uint32_t r = rnd() & mask; \
  57. AV_WN32A(ref0 + k, r); \
  58. AV_WN32A(ref1 + k, r); \
  59. } \
  60. } while (0)
  61. #define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */
  62. #define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
  63. /* FIXME: Does the need for SRC_EXTRA for these tests indicate a bug? */
  64. #define SRC_EXTRA 8
  65. static void checkasm_check_hevc_qpel(void)
  66. {
  67. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  68. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  69. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  70. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  71. HEVCDSPContext h;
  72. int size, bit_depth, i, j, row;
  73. declare_func(void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
  74. int height, intptr_t mx, intptr_t my, int width);
  75. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  76. ff_hevc_dsp_init(&h, bit_depth);
  77. for (i = 0; i < 2; i++) {
  78. for (j = 0; j < 2; j++) {
  79. for (size = 1; size < 10; size++) {
  80. const char *type;
  81. switch ((j << 1) | i) {
  82. case 0: type = "pel_pixels"; break; // 0 0
  83. case 1: type = "qpel_h"; break; // 0 1
  84. case 2: type = "qpel_v"; break; // 1 0
  85. case 3: type = "qpel_hv"; break; // 1 1
  86. }
  87. if (check_func(h.put_hevc_qpel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  88. int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
  89. randomize_buffers();
  90. call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  91. call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  92. for (row = 0; row < size[sizes]; row++) {
  93. if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
  94. fail();
  95. }
  96. bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  97. }
  98. }
  99. }
  100. }
  101. }
  102. report("qpel");
  103. }
  104. static void checkasm_check_hevc_qpel_uni(void)
  105. {
  106. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  107. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  108. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  109. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  110. HEVCDSPContext h;
  111. int size, bit_depth, i, j;
  112. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  113. int height, intptr_t mx, intptr_t my, int width);
  114. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  115. ff_hevc_dsp_init(&h, bit_depth);
  116. for (i = 0; i < 2; i++) {
  117. for (j = 0; j < 2; j++) {
  118. for (size = 1; size < 10; size++) {
  119. const char *type;
  120. switch ((j << 1) | i) {
  121. case 0: type = "pel_uni_pixels"; break; // 0 0
  122. case 1: type = "qpel_uni_h"; break; // 0 1
  123. case 2: type = "qpel_uni_v"; break; // 1 0
  124. case 3: type = "qpel_uni_hv"; break; // 1 1
  125. }
  126. if (check_func(h.put_hevc_qpel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  127. randomize_buffers();
  128. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  129. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  130. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  131. fail();
  132. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  133. }
  134. }
  135. }
  136. }
  137. }
  138. report("qpel_uni");
  139. }
  140. static void checkasm_check_hevc_qpel_uni_w(void)
  141. {
  142. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  143. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  144. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  145. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  146. HEVCDSPContext h;
  147. int size, bit_depth, i, j;
  148. const int *denom, *wx, *ox;
  149. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  150. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
  151. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  152. ff_hevc_dsp_init(&h, bit_depth);
  153. for (i = 0; i < 2; i++) {
  154. for (j = 0; j < 2; j++) {
  155. for (size = 1; size < 10; size++) {
  156. const char *type;
  157. switch ((j << 1) | i) {
  158. case 0: type = "pel_uni_w_pixels"; break; // 0 0
  159. case 1: type = "qpel_uni_w_h"; break; // 0 1
  160. case 2: type = "qpel_uni_w_v"; break; // 1 0
  161. case 3: type = "qpel_uni_w_hv"; break; // 1 1
  162. }
  163. if (check_func(h.put_hevc_qpel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  164. for (denom = denoms; *denom >= 0; denom++) {
  165. for (wx = weights; *wx >= 0; wx++) {
  166. for (ox = offsets; *ox >= 0; ox++) {
  167. randomize_buffers();
  168. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  169. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  170. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  171. fail();
  172. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  173. }
  174. }
  175. }
  176. }
  177. }
  178. }
  179. }
  180. }
  181. report("qpel_uni_w");
  182. }
  183. static void checkasm_check_hevc_qpel_bi(void)
  184. {
  185. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  186. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  187. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  188. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  189. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  190. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  191. HEVCDSPContext h;
  192. int size, bit_depth, i, j;
  193. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  194. int16_t *src2,
  195. int height, intptr_t mx, intptr_t my, int width);
  196. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  197. ff_hevc_dsp_init(&h, bit_depth);
  198. for (i = 0; i < 2; i++) {
  199. for (j = 0; j < 2; j++) {
  200. for (size = 1; size < 10; size++) {
  201. const char *type;
  202. switch ((j << 1) | i) {
  203. case 0: type = "pel_bi_pixels"; break; // 0 0
  204. case 1: type = "qpel_bi_h"; break; // 0 1
  205. case 2: type = "qpel_bi_v"; break; // 1 0
  206. case 3: type = "qpel_bi_hv"; break; // 1 1
  207. }
  208. if (check_func(h.put_hevc_qpel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  209. randomize_buffers_ref();
  210. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
  211. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
  212. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  213. fail();
  214. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
  215. }
  216. }
  217. }
  218. }
  219. }
  220. report("qpel_bi");
  221. }
  222. static void checkasm_check_hevc_qpel_bi_w(void)
  223. {
  224. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  225. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  226. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  227. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  228. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  229. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  230. HEVCDSPContext h;
  231. int size, bit_depth, i, j;
  232. const int *denom, *wx, *ox;
  233. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  234. int16_t *src2,
  235. int height, int denom, int wx0, int wx1,
  236. int ox0, int ox1, intptr_t mx, intptr_t my, int width);
  237. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  238. ff_hevc_dsp_init(&h, bit_depth);
  239. for (i = 0; i < 2; i++) {
  240. for (j = 0; j < 2; j++) {
  241. for (size = 1; size < 10; size++) {
  242. const char *type;
  243. switch ((j << 1) | i) {
  244. case 0: type = "pel_bi_w_pixels"; break; // 0 0
  245. case 1: type = "qpel_bi_w_h"; break; // 0 1
  246. case 2: type = "qpel_bi_w_v"; break; // 1 0
  247. case 3: type = "qpel_bi_w_hv"; break; // 1 1
  248. }
  249. if (check_func(h.put_hevc_qpel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  250. for (denom = denoms; *denom >= 0; denom++) {
  251. for (wx = weights; *wx >= 0; wx++) {
  252. for (ox = offsets; *ox >= 0; ox++) {
  253. randomize_buffers_ref();
  254. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  255. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  256. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  257. fail();
  258. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  259. }
  260. }
  261. }
  262. }
  263. }
  264. }
  265. }
  266. }
  267. report("qpel_bi_w");
  268. }
  269. #undef SRC_EXTRA
  270. #define SRC_EXTRA 0
  271. static void checkasm_check_hevc_epel(void)
  272. {
  273. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  274. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  275. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  276. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  277. HEVCDSPContext h;
  278. int size, bit_depth, i, j, row;
  279. declare_func(void, int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
  280. int height, intptr_t mx, intptr_t my, int width);
  281. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  282. ff_hevc_dsp_init(&h, bit_depth);
  283. for (i = 0; i < 2; i++) {
  284. for (j = 0; j < 2; j++) {
  285. for (size = 1; size < 10; size++) {
  286. const char *type;
  287. switch ((j << 1) | i) {
  288. case 0: type = "pel_pixels"; break; // 0 0
  289. case 1: type = "epel_h"; break; // 0 1
  290. case 2: type = "epel_v"; break; // 1 0
  291. case 3: type = "epel_hv"; break; // 1 1
  292. }
  293. if (check_func(h.put_hevc_epel[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  294. int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
  295. randomize_buffers();
  296. call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  297. call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  298. for (row = 0; row < size[sizes]; row++) {
  299. if (memcmp(dstw0 + row * MAX_PB_SIZE, dstw1 + row * MAX_PB_SIZE, sizes[size] * SIZEOF_PIXEL))
  300. fail();
  301. }
  302. bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  303. }
  304. }
  305. }
  306. }
  307. }
  308. report("epel");
  309. }
  310. static void checkasm_check_hevc_epel_uni(void)
  311. {
  312. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  313. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  314. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  315. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  316. HEVCDSPContext h;
  317. int size, bit_depth, i, j;
  318. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  319. int height, intptr_t mx, intptr_t my, int width);
  320. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  321. ff_hevc_dsp_init(&h, bit_depth);
  322. for (i = 0; i < 2; i++) {
  323. for (j = 0; j < 2; j++) {
  324. for (size = 1; size < 10; size++) {
  325. const char *type;
  326. switch ((j << 1) | i) {
  327. case 0: type = "pel_uni_pixels"; break; // 0 0
  328. case 1: type = "epel_uni_h"; break; // 0 1
  329. case 2: type = "epel_uni_v"; break; // 1 0
  330. case 3: type = "epel_uni_hv"; break; // 1 1
  331. }
  332. if (check_func(h.put_hevc_epel_uni[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  333. randomize_buffers();
  334. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  335. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  336. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  337. fail();
  338. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  339. }
  340. }
  341. }
  342. }
  343. }
  344. report("epel_uni");
  345. }
  346. static void checkasm_check_hevc_epel_uni_w(void)
  347. {
  348. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  349. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  350. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  351. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  352. HEVCDSPContext h;
  353. int size, bit_depth, i, j;
  354. const int *denom, *wx, *ox;
  355. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  356. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
  357. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  358. ff_hevc_dsp_init(&h, bit_depth);
  359. for (i = 0; i < 2; i++) {
  360. for (j = 0; j < 2; j++) {
  361. for (size = 1; size < 10; size++) {
  362. const char *type;
  363. switch ((j << 1) | i) {
  364. case 0: type = "pel_uni_w_pixels"; break; // 0 0
  365. case 1: type = "epel_uni_w_h"; break; // 0 1
  366. case 2: type = "epel_uni_w_v"; break; // 1 0
  367. case 3: type = "epel_uni_w_hv"; break; // 1 1
  368. }
  369. if (check_func(h.put_hevc_epel_uni_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  370. for (denom = denoms; *denom >= 0; denom++) {
  371. for (wx = weights; *wx >= 0; wx++) {
  372. for (ox = offsets; *ox >= 0; ox++) {
  373. randomize_buffers();
  374. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  375. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  376. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  377. fail();
  378. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  379. }
  380. }
  381. }
  382. }
  383. }
  384. }
  385. }
  386. }
  387. report("epel_uni_w");
  388. }
  389. static void checkasm_check_hevc_epel_bi(void)
  390. {
  391. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  392. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  393. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  394. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  395. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  396. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  397. HEVCDSPContext h;
  398. int size, bit_depth, i, j;
  399. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  400. int16_t *src2,
  401. int height, intptr_t mx, intptr_t my, int width);
  402. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  403. ff_hevc_dsp_init(&h, bit_depth);
  404. for (i = 0; i < 2; i++) {
  405. for (j = 0; j < 2; j++) {
  406. for (size = 1; size < 10; size++) {
  407. const char *type;
  408. switch ((j << 1) | i) {
  409. case 0: type = "pel_bi_pixels"; break; // 0 0
  410. case 1: type = "epel_bi_h"; break; // 0 1
  411. case 2: type = "epel_bi_v"; break; // 1 0
  412. case 3: type = "epel_bi_hv"; break; // 1 1
  413. }
  414. if (check_func(h.put_hevc_epel_bi[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  415. randomize_buffers_ref();
  416. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], i, j, sizes[size]);
  417. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
  418. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  419. fail();
  420. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], i, j, sizes[size]);
  421. }
  422. }
  423. }
  424. }
  425. }
  426. report("epel_bi");
  427. }
  428. static void checkasm_check_hevc_epel_bi_w(void)
  429. {
  430. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  431. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  432. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  433. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  434. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  435. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  436. HEVCDSPContext h;
  437. int size, bit_depth, i, j;
  438. const int *denom, *wx, *ox;
  439. declare_func(void, uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
  440. int16_t *src2,
  441. int height, int denom, int wx0, int wx1,
  442. int ox0, int ox1, intptr_t mx, intptr_t my, int width);
  443. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  444. ff_hevc_dsp_init(&h, bit_depth);
  445. for (i = 0; i < 2; i++) {
  446. for (j = 0; j < 2; j++) {
  447. for (size = 1; size < 10; size++) {
  448. const char *type;
  449. switch ((j << 1) | i) {
  450. case 0: type = "pel_bi_w_pixels"; break; // 0 0
  451. case 1: type = "epel_bi_w_h"; break; // 0 1
  452. case 2: type = "epel_bi_w_v"; break; // 1 0
  453. case 3: type = "epel_bi_w_hv"; break; // 1 1
  454. }
  455. if (check_func(h.put_hevc_epel_bi_w[size][j][i], "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  456. for (denom = denoms; *denom >= 0; denom++) {
  457. for (wx = weights; *wx >= 0; wx++) {
  458. for (ox = offsets; *ox >= 0; ox++) {
  459. randomize_buffers_ref();
  460. call_ref(dst0, sizes[size] * SIZEOF_PIXEL, src0, sizes[size] * SIZEOF_PIXEL, ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  461. call_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  462. if (memcmp(dst0, dst1, sizes[size] * sizes[size] * SIZEOF_PIXEL))
  463. fail();
  464. bench_new(dst1, sizes[size] * SIZEOF_PIXEL, src1, sizes[size] * SIZEOF_PIXEL, ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  465. }
  466. }
  467. }
  468. }
  469. }
  470. }
  471. }
  472. }
  473. report("epel_bi_w");
  474. }
  475. void checkasm_check_hevc_pel(void)
  476. {
  477. checkasm_check_hevc_qpel();
  478. checkasm_check_hevc_qpel_uni();
  479. checkasm_check_hevc_qpel_uni_w();
  480. checkasm_check_hevc_qpel_bi();
  481. checkasm_check_hevc_qpel_bi_w();
  482. checkasm_check_hevc_epel();
  483. checkasm_check_hevc_epel_uni();
  484. checkasm_check_hevc_epel_uni_w();
  485. checkasm_check_hevc_epel_bi();
  486. checkasm_check_hevc_epel_bi_w();
  487. }