hevc_pel.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /*
  2. * Copyright (c) 2015 Henrik Gramner
  3. * Copyright (c) 2021 Josh Dekker
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. */
  21. #include <string.h>
  22. #include "checkasm.h"
  23. #include "libavcodec/hevc/dsp.h"
  24. #include "libavutil/common.h"
  25. #include "libavutil/internal.h"
  26. #include "libavutil/intreadwrite.h"
  27. static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
  28. static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff };
  29. static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
  30. static const int weights[] = { 0, 128, 255, -1 };
  31. static const int denoms[] = {0, 7, 12, -1 };
  32. static const int offsets[] = {0, 255, -1 };
  33. #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
  34. #define BUF_SIZE (2 * MAX_PB_SIZE * (2 * 4 + MAX_PB_SIZE))
  35. #define randomize_buffers() \
  36. do { \
  37. uint32_t mask = pixel_mask[bit_depth - 8]; \
  38. int k; \
  39. for (k = 0; k < BUF_SIZE + SRC_EXTRA; k += 4) { \
  40. uint32_t r = rnd() & mask; \
  41. AV_WN32A(buf0 + k, r); \
  42. AV_WN32A(buf1 + k, r); \
  43. if (k >= BUF_SIZE) \
  44. continue; \
  45. r = rnd(); \
  46. AV_WN32A(dst0 + k, r); \
  47. AV_WN32A(dst1 + k, r); \
  48. } \
  49. } while (0)
  50. #define randomize_buffers_ref() \
  51. randomize_buffers(); \
  52. do { \
  53. uint32_t mask = pixel_mask16[bit_depth - 8]; \
  54. int k; \
  55. for (k = 0; k < BUF_SIZE; k += 2) { \
  56. uint32_t r = rnd() & mask; \
  57. AV_WN32A(ref0 + k, r); \
  58. AV_WN32A(ref1 + k, r); \
  59. } \
  60. } while (0)
  61. #define src0 (buf0 + 2 * 4 * MAX_PB_SIZE) /* hevc qpel functions read data from negative src pointer offsets */
  62. #define src1 (buf1 + 2 * 4 * MAX_PB_SIZE)
  63. /* FIXME: Does the need for SRC_EXTRA for these tests indicate a bug? */
  64. #define SRC_EXTRA 8
  65. static void checkasm_check_hevc_qpel(void)
  66. {
  67. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  68. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  69. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  70. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  71. HEVCDSPContext h;
  72. int size, bit_depth, i, j;
  73. declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
  74. int height, intptr_t mx, intptr_t my, int width);
  75. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  76. ff_hevc_dsp_init(&h, bit_depth);
  77. for (i = 0; i < 2; i++) {
  78. for (j = 0; j < 2; j++) {
  79. for (size = 1; size < 10; size++) {
  80. const char *type;
  81. switch ((j << 1) | i) {
  82. case 0: type = "pel_pixels"; break; // 0 0
  83. case 1: type = "qpel_h"; break; // 0 1
  84. case 2: type = "qpel_v"; break; // 1 0
  85. case 3: type = "qpel_hv"; break; // 1 1
  86. }
  87. if (check_func(h.put_hevc_qpel[size][j][i],
  88. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  89. int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
  90. randomize_buffers();
  91. call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  92. call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  93. checkasm_check(int16_t, dstw0, MAX_PB_SIZE * sizeof(int16_t),
  94. dstw1, MAX_PB_SIZE * sizeof(int16_t),
  95. size[sizes], size[sizes], "dst");
  96. bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  97. }
  98. }
  99. }
  100. }
  101. }
  102. report("qpel");
  103. }
  104. static void checkasm_check_hevc_qpel_uni(void)
  105. {
  106. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  107. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  108. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  109. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  110. HEVCDSPContext h;
  111. int size, bit_depth, i, j;
  112. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  113. int height, intptr_t mx, intptr_t my, int width);
  114. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  115. ff_hevc_dsp_init(&h, bit_depth);
  116. for (i = 0; i < 2; i++) {
  117. for (j = 0; j < 2; j++) {
  118. for (size = 1; size < 10; size++) {
  119. const char *type;
  120. switch ((j << 1) | i) {
  121. case 0: type = "pel_uni_pixels"; break; // 0 0
  122. case 1: type = "qpel_uni_h"; break; // 0 1
  123. case 2: type = "qpel_uni_v"; break; // 1 0
  124. case 3: type = "qpel_uni_hv"; break; // 1 1
  125. }
  126. if (check_func(h.put_hevc_qpel_uni[size][j][i],
  127. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  128. randomize_buffers();
  129. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  130. src0, sizes[size] * SIZEOF_PIXEL,
  131. sizes[size], i, j, sizes[size]);
  132. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  133. src1, sizes[size] * SIZEOF_PIXEL,
  134. sizes[size], i, j, sizes[size]);
  135. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  136. dst1, sizes[size] * SIZEOF_PIXEL,
  137. size[sizes], size[sizes], "dst");
  138. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  139. src1, sizes[size] * SIZEOF_PIXEL,
  140. sizes[size], i, j, sizes[size]);
  141. }
  142. }
  143. }
  144. }
  145. }
  146. report("qpel_uni");
  147. }
  148. static void checkasm_check_hevc_qpel_uni_w(void)
  149. {
  150. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  151. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  152. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  153. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  154. HEVCDSPContext h;
  155. int size, bit_depth, i, j;
  156. const int *denom, *wx, *ox;
  157. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  158. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
  159. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  160. ff_hevc_dsp_init(&h, bit_depth);
  161. for (i = 0; i < 2; i++) {
  162. for (j = 0; j < 2; j++) {
  163. for (size = 1; size < 10; size++) {
  164. const char *type;
  165. switch ((j << 1) | i) {
  166. case 0: type = "pel_uni_w_pixels"; break; // 0 0
  167. case 1: type = "qpel_uni_w_h"; break; // 0 1
  168. case 2: type = "qpel_uni_w_v"; break; // 1 0
  169. case 3: type = "qpel_uni_w_hv"; break; // 1 1
  170. }
  171. if (check_func(h.put_hevc_qpel_uni_w[size][j][i],
  172. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  173. for (denom = denoms; *denom >= 0; denom++) {
  174. for (wx = weights; *wx >= 0; wx++) {
  175. for (ox = offsets; *ox >= 0; ox++) {
  176. randomize_buffers();
  177. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  178. src0, sizes[size] * SIZEOF_PIXEL,
  179. sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  180. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  181. src1, sizes[size] * SIZEOF_PIXEL,
  182. sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  183. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  184. dst1, sizes[size] * SIZEOF_PIXEL,
  185. size[sizes], size[sizes], "dst");
  186. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  187. src1, sizes[size] * SIZEOF_PIXEL,
  188. sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  189. }
  190. }
  191. }
  192. }
  193. }
  194. }
  195. }
  196. }
  197. report("qpel_uni_w");
  198. }
  199. static void checkasm_check_hevc_qpel_bi(void)
  200. {
  201. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  202. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  203. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  204. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  205. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  206. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  207. HEVCDSPContext h;
  208. int size, bit_depth, i, j;
  209. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  210. const int16_t *src2,
  211. int height, intptr_t mx, intptr_t my, int width);
  212. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  213. ff_hevc_dsp_init(&h, bit_depth);
  214. for (i = 0; i < 2; i++) {
  215. for (j = 0; j < 2; j++) {
  216. for (size = 1; size < 10; size++) {
  217. const char *type;
  218. switch ((j << 1) | i) {
  219. case 0: type = "pel_bi_pixels"; break; // 0 0
  220. case 1: type = "qpel_bi_h"; break; // 0 1
  221. case 2: type = "qpel_bi_v"; break; // 1 0
  222. case 3: type = "qpel_bi_hv"; break; // 1 1
  223. }
  224. if (check_func(h.put_hevc_qpel_bi[size][j][i],
  225. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  226. randomize_buffers_ref();
  227. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  228. src0, sizes[size] * SIZEOF_PIXEL,
  229. ref0, sizes[size], i, j, sizes[size]);
  230. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  231. src1, sizes[size] * SIZEOF_PIXEL,
  232. ref1, sizes[size], i, j, sizes[size]);
  233. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  234. dst1, sizes[size] * SIZEOF_PIXEL,
  235. size[sizes], size[sizes], "dst");
  236. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  237. src1, sizes[size] * SIZEOF_PIXEL,
  238. ref1, sizes[size], i, j, sizes[size]);
  239. }
  240. }
  241. }
  242. }
  243. }
  244. report("qpel_bi");
  245. }
  246. static void checkasm_check_hevc_qpel_bi_w(void)
  247. {
  248. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE + SRC_EXTRA]);
  249. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE + SRC_EXTRA]);
  250. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  251. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  252. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  253. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  254. HEVCDSPContext h;
  255. int size, bit_depth, i, j;
  256. const int *denom, *wx, *ox;
  257. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  258. const int16_t *src2,
  259. int height, int denom, int wx0, int wx1,
  260. int ox0, int ox1, intptr_t mx, intptr_t my, int width);
  261. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  262. ff_hevc_dsp_init(&h, bit_depth);
  263. for (i = 0; i < 2; i++) {
  264. for (j = 0; j < 2; j++) {
  265. for (size = 1; size < 10; size++) {
  266. const char *type;
  267. switch ((j << 1) | i) {
  268. case 0: type = "pel_bi_w_pixels"; break; // 0 0
  269. case 1: type = "qpel_bi_w_h"; break; // 0 1
  270. case 2: type = "qpel_bi_w_v"; break; // 1 0
  271. case 3: type = "qpel_bi_w_hv"; break; // 1 1
  272. }
  273. if (check_func(h.put_hevc_qpel_bi_w[size][j][i],
  274. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  275. for (denom = denoms; *denom >= 0; denom++) {
  276. for (wx = weights; *wx >= 0; wx++) {
  277. for (ox = offsets; *ox >= 0; ox++) {
  278. randomize_buffers_ref();
  279. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  280. src0, sizes[size] * SIZEOF_PIXEL,
  281. ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  282. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  283. src1, sizes[size] * SIZEOF_PIXEL,
  284. ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  285. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  286. dst1, sizes[size] * SIZEOF_PIXEL,
  287. size[sizes], size[sizes], "dst");
  288. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  289. src1, sizes[size] * SIZEOF_PIXEL,
  290. ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  291. }
  292. }
  293. }
  294. }
  295. }
  296. }
  297. }
  298. }
  299. report("qpel_bi_w");
  300. }
  301. #undef SRC_EXTRA
  302. #define SRC_EXTRA 0
  303. static void checkasm_check_hevc_epel(void)
  304. {
  305. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  306. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  307. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  308. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  309. HEVCDSPContext h;
  310. int size, bit_depth, i, j;
  311. declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
  312. int height, intptr_t mx, intptr_t my, int width);
  313. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  314. ff_hevc_dsp_init(&h, bit_depth);
  315. for (i = 0; i < 2; i++) {
  316. for (j = 0; j < 2; j++) {
  317. for (size = 1; size < 10; size++) {
  318. const char *type;
  319. switch ((j << 1) | i) {
  320. case 0: type = "pel_pixels"; break; // 0 0
  321. case 1: type = "epel_h"; break; // 0 1
  322. case 2: type = "epel_v"; break; // 1 0
  323. case 3: type = "epel_hv"; break; // 1 1
  324. }
  325. if (check_func(h.put_hevc_epel[size][j][i],
  326. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  327. int16_t *dstw0 = (int16_t *) dst0, *dstw1 = (int16_t *) dst1;
  328. randomize_buffers();
  329. call_ref(dstw0, src0, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  330. call_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  331. checkasm_check(int16_t, dstw0, MAX_PB_SIZE * sizeof(int16_t),
  332. dstw1, MAX_PB_SIZE * sizeof(int16_t),
  333. size[sizes], size[sizes], "dst");
  334. bench_new(dstw1, src1, sizes[size] * SIZEOF_PIXEL, sizes[size], i, j, sizes[size]);
  335. }
  336. }
  337. }
  338. }
  339. }
  340. report("epel");
  341. }
  342. static void checkasm_check_hevc_epel_uni(void)
  343. {
  344. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  345. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  346. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  347. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  348. HEVCDSPContext h;
  349. int size, bit_depth, i, j;
  350. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  351. int height, intptr_t mx, intptr_t my, int width);
  352. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  353. ff_hevc_dsp_init(&h, bit_depth);
  354. for (i = 0; i < 2; i++) {
  355. for (j = 0; j < 2; j++) {
  356. for (size = 1; size < 10; size++) {
  357. const char *type;
  358. switch ((j << 1) | i) {
  359. case 0: type = "pel_uni_pixels"; break; // 0 0
  360. case 1: type = "epel_uni_h"; break; // 0 1
  361. case 2: type = "epel_uni_v"; break; // 1 0
  362. case 3: type = "epel_uni_hv"; break; // 1 1
  363. }
  364. if (check_func(h.put_hevc_epel_uni[size][j][i],
  365. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  366. randomize_buffers();
  367. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  368. src0, sizes[size] * SIZEOF_PIXEL,
  369. sizes[size], i, j, sizes[size]);
  370. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  371. src1, sizes[size] * SIZEOF_PIXEL,
  372. sizes[size], i, j, sizes[size]);
  373. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  374. dst1, sizes[size] * SIZEOF_PIXEL,
  375. size[sizes], size[sizes], "dst");
  376. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  377. src1, sizes[size] * SIZEOF_PIXEL,
  378. sizes[size], i, j, sizes[size]);
  379. }
  380. }
  381. }
  382. }
  383. }
  384. report("epel_uni");
  385. }
  386. static void checkasm_check_hevc_epel_uni_w(void)
  387. {
  388. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  389. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  390. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  391. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  392. HEVCDSPContext h;
  393. int size, bit_depth, i, j;
  394. const int *denom, *wx, *ox;
  395. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  396. int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
  397. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  398. ff_hevc_dsp_init(&h, bit_depth);
  399. for (i = 0; i < 2; i++) {
  400. for (j = 0; j < 2; j++) {
  401. for (size = 1; size < 10; size++) {
  402. const char *type;
  403. switch ((j << 1) | i) {
  404. case 0: type = "pel_uni_w_pixels"; break; // 0 0
  405. case 1: type = "epel_uni_w_h"; break; // 0 1
  406. case 2: type = "epel_uni_w_v"; break; // 1 0
  407. case 3: type = "epel_uni_w_hv"; break; // 1 1
  408. }
  409. if (check_func(h.put_hevc_epel_uni_w[size][j][i],
  410. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  411. for (denom = denoms; *denom >= 0; denom++) {
  412. for (wx = weights; *wx >= 0; wx++) {
  413. for (ox = offsets; *ox >= 0; ox++) {
  414. randomize_buffers();
  415. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  416. src0, sizes[size] * SIZEOF_PIXEL,
  417. sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  418. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  419. src1, sizes[size] * SIZEOF_PIXEL,
  420. sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  421. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  422. dst1, sizes[size] * SIZEOF_PIXEL,
  423. size[sizes], size[sizes], "dst");
  424. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  425. src1, sizes[size] * SIZEOF_PIXEL,
  426. sizes[size], *denom, *wx, *ox, i, j, sizes[size]);
  427. }
  428. }
  429. }
  430. }
  431. }
  432. }
  433. }
  434. }
  435. report("epel_uni_w");
  436. }
  437. static void checkasm_check_hevc_epel_bi(void)
  438. {
  439. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  440. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  441. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  442. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  443. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  444. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  445. HEVCDSPContext h;
  446. int size, bit_depth, i, j;
  447. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  448. const int16_t *src2,
  449. int height, intptr_t mx, intptr_t my, int width);
  450. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  451. ff_hevc_dsp_init(&h, bit_depth);
  452. for (i = 0; i < 2; i++) {
  453. for (j = 0; j < 2; j++) {
  454. for (size = 1; size < 10; size++) {
  455. const char *type;
  456. switch ((j << 1) | i) {
  457. case 0: type = "pel_bi_pixels"; break; // 0 0
  458. case 1: type = "epel_bi_h"; break; // 0 1
  459. case 2: type = "epel_bi_v"; break; // 1 0
  460. case 3: type = "epel_bi_hv"; break; // 1 1
  461. }
  462. if (check_func(h.put_hevc_epel_bi[size][j][i],
  463. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  464. randomize_buffers_ref();
  465. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  466. src0, sizes[size] * SIZEOF_PIXEL,
  467. ref0, sizes[size], i, j, sizes[size]);
  468. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  469. src1, sizes[size] * SIZEOF_PIXEL,
  470. ref1, sizes[size], i, j, sizes[size]);
  471. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  472. dst1, sizes[size] * SIZEOF_PIXEL,
  473. size[sizes], size[sizes], "dst");
  474. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  475. src1, sizes[size] * SIZEOF_PIXEL,
  476. ref1, sizes[size], i, j, sizes[size]);
  477. }
  478. }
  479. }
  480. }
  481. }
  482. report("epel_bi");
  483. }
  484. static void checkasm_check_hevc_epel_bi_w(void)
  485. {
  486. LOCAL_ALIGNED_32(uint8_t, buf0, [BUF_SIZE]);
  487. LOCAL_ALIGNED_32(uint8_t, buf1, [BUF_SIZE]);
  488. LOCAL_ALIGNED_32(uint8_t, dst0, [BUF_SIZE]);
  489. LOCAL_ALIGNED_32(uint8_t, dst1, [BUF_SIZE]);
  490. LOCAL_ALIGNED_32(int16_t, ref0, [BUF_SIZE]);
  491. LOCAL_ALIGNED_32(int16_t, ref1, [BUF_SIZE]);
  492. HEVCDSPContext h;
  493. int size, bit_depth, i, j;
  494. const int *denom, *wx, *ox;
  495. declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
  496. const int16_t *src2,
  497. int height, int denom, int wx0, int wx1,
  498. int ox0, int ox1, intptr_t mx, intptr_t my, int width);
  499. for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
  500. ff_hevc_dsp_init(&h, bit_depth);
  501. for (i = 0; i < 2; i++) {
  502. for (j = 0; j < 2; j++) {
  503. for (size = 1; size < 10; size++) {
  504. const char *type;
  505. switch ((j << 1) | i) {
  506. case 0: type = "pel_bi_w_pixels"; break; // 0 0
  507. case 1: type = "epel_bi_w_h"; break; // 0 1
  508. case 2: type = "epel_bi_w_v"; break; // 1 0
  509. case 3: type = "epel_bi_w_hv"; break; // 1 1
  510. }
  511. if (check_func(h.put_hevc_epel_bi_w[size][j][i],
  512. "put_hevc_%s%d_%d", type, sizes[size], bit_depth)) {
  513. for (denom = denoms; *denom >= 0; denom++) {
  514. for (wx = weights; *wx >= 0; wx++) {
  515. for (ox = offsets; *ox >= 0; ox++) {
  516. randomize_buffers_ref();
  517. call_ref(dst0, sizes[size] * SIZEOF_PIXEL,
  518. src0, sizes[size] * SIZEOF_PIXEL,
  519. ref0, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  520. call_new(dst1, sizes[size] * SIZEOF_PIXEL,
  521. src1, sizes[size] * SIZEOF_PIXEL,
  522. ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  523. checkasm_check_pixel(dst0, sizes[size] * SIZEOF_PIXEL,
  524. dst1, sizes[size] * SIZEOF_PIXEL,
  525. size[sizes], size[sizes], "dst");
  526. bench_new(dst1, sizes[size] * SIZEOF_PIXEL,
  527. src1, sizes[size] * SIZEOF_PIXEL,
  528. ref1, sizes[size], *denom, *wx, *wx, *ox, *ox, i, j, sizes[size]);
  529. }
  530. }
  531. }
  532. }
  533. }
  534. }
  535. }
  536. }
  537. report("epel_bi_w");
  538. }
  539. void checkasm_check_hevc_pel(void)
  540. {
  541. checkasm_check_hevc_qpel();
  542. checkasm_check_hevc_qpel_uni();
  543. checkasm_check_hevc_qpel_uni_w();
  544. checkasm_check_hevc_qpel_bi();
  545. checkasm_check_hevc_qpel_bi_w();
  546. checkasm_check_hevc_epel();
  547. checkasm_check_hevc_epel_uni();
  548. checkasm_check_hevc_epel_uni_w();
  549. checkasm_check_hevc_epel_bi();
  550. checkasm_check_hevc_epel_bi_w();
  551. }