vvc_mc.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. /*
  2. * Copyright (c) 2023-2024 Nuo Mi
  3. * Copyright (c) 2023-2024 Wu Jianhua
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. */
  21. #include <string.h>
  22. #include "checkasm.h"
  23. #include "libavcodec/vvc/ctu.h"
  24. #include "libavcodec/vvc/data.h"
  25. #include "libavcodec/vvc/dsp.h"
  26. #include "libavutil/common.h"
  27. #include "libavutil/intreadwrite.h"
  28. #include "libavutil/mem_internal.h"
  29. static const uint32_t pixel_mask[] = { 0xffffffff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff, 0xffffffff };
  30. static const int sizes[] = { 2, 4, 8, 16, 32, 64, 128 };
  31. #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
  32. #define PIXEL_STRIDE (MAX_CTU_SIZE * 2)
  33. #define EXTRA_BEFORE 3
  34. #define EXTRA_AFTER 4
  35. #define SRC_EXTRA (EXTRA_BEFORE + EXTRA_AFTER) * 2
  36. #define SRC_BUF_SIZE (PIXEL_STRIDE + SRC_EXTRA) * (PIXEL_STRIDE + SRC_EXTRA)
  37. #define DST_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE * 2)
  38. #define SRC_OFFSET ((PIXEL_STRIDE + EXTRA_BEFORE * 2) * EXTRA_BEFORE)
  39. #define randomize_buffers(buf0, buf1, size, mask) \
  40. do { \
  41. int k; \
  42. for (k = 0; k < size; k += 4 / sizeof(*buf0)) { \
  43. uint32_t r = rnd() & mask; \
  44. AV_WN32A(buf0 + k, r); \
  45. AV_WN32A(buf1 + k, r); \
  46. } \
  47. } while (0)
  48. #define randomize_pixels(buf0, buf1, size) \
  49. do { \
  50. uint32_t mask = pixel_mask[(bit_depth - 8) >> 1]; \
  51. randomize_buffers(buf0, buf1, size, mask); \
  52. } while (0)
  53. #define randomize_avg_src(buf0, buf1, size) \
  54. do { \
  55. uint32_t mask = 0x3fff3fff; \
  56. randomize_buffers(buf0, buf1, size, mask); \
  57. } while (0)
  58. #define randomize_prof_src(buf0, buf1, size) \
  59. do { \
  60. const int shift = 14 - bit_depth; \
  61. const int mask16 = 0x3fff >> shift << shift; \
  62. uint32_t mask = (mask16 << 16) | mask16; \
  63. randomize_buffers(buf0, buf1, size, mask); \
  64. } while (0)
  65. static void check_put_vvc_luma(void)
  66. {
  67. LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
  68. LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]);
  69. LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
  70. LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
  71. VVCDSPContext c;
  72. declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride,
  73. const int height, const int8_t *hf, const int8_t *vf, const int width);
  74. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  75. randomize_pixels(src0, src1, SRC_BUF_SIZE);
  76. ff_vvc_dsp_init(&c, bit_depth);
  77. for (int i = 0; i < 2; i++) {
  78. for (int j = 0; j < 2; j++) {
  79. for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
  80. for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
  81. const int idx = av_log2(w) - 1;
  82. const int mx = rnd() % 16;
  83. const int my = rnd() % 16;
  84. const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % 3][mx];
  85. const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % 3][my];
  86. const char *type;
  87. switch ((j << 1) | i) {
  88. case 0: type = "put_luma_pixels"; break; // 0 0
  89. case 1: type = "put_luma_h"; break; // 0 1
  90. case 2: type = "put_luma_v"; break; // 1 0
  91. case 3: type = "put_luma_hv"; break; // 1 1
  92. }
  93. if (check_func(c.inter.put[LUMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) {
  94. memset(dst0, 0, DST_BUF_SIZE);
  95. memset(dst1, 0, DST_BUF_SIZE);
  96. call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  97. call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  98. if (memcmp(dst0, dst1, DST_BUF_SIZE))
  99. fail();
  100. if (w == h)
  101. bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  102. }
  103. }
  104. }
  105. }
  106. }
  107. }
  108. report("put_luma");
  109. }
  110. static void check_put_vvc_luma_uni(void)
  111. {
  112. LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
  113. LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
  114. LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
  115. LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
  116. VVCDSPContext c;
  117. declare_func(void, uint8_t *dst, ptrdiff_t dststride,
  118. const uint8_t *src, ptrdiff_t srcstride, int height,
  119. const int8_t *hf, const int8_t *vf, int width);
  120. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  121. ff_vvc_dsp_init(&c, bit_depth);
  122. randomize_pixels(src0, src1, SRC_BUF_SIZE);
  123. for (int i = 0; i < 2; i++) {
  124. for (int j = 0; j < 2; j++) {
  125. for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
  126. for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
  127. const int idx = av_log2(w) - 1;
  128. const int mx = rnd() % VVC_INTER_LUMA_FACTS;
  129. const int my = rnd() % VVC_INTER_LUMA_FACTS;
  130. const int8_t *hf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][mx];
  131. const int8_t *vf = ff_vvc_inter_luma_filters[rnd() % VVC_INTER_LUMA_FILTER_TYPES][my];
  132. const char *type;
  133. switch ((j << 1) | i) {
  134. case 0: type = "put_uni_pixels"; break; // 0 0
  135. case 1: type = "put_uni_h"; break; // 0 1
  136. case 2: type = "put_uni_v"; break; // 1 0
  137. case 3: type = "put_uni_hv"; break; // 1 1
  138. }
  139. if (check_func(c.inter.put_uni[LUMA][idx][j][i], "%s_luma_%d_%dx%d", type, bit_depth, w, h)) {
  140. memset(dst0, 0, DST_BUF_SIZE);
  141. memset(dst1, 0, DST_BUF_SIZE);
  142. call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  143. call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  144. if (memcmp(dst0, dst1, DST_BUF_SIZE))
  145. fail();
  146. if (w == h)
  147. bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  148. }
  149. }
  150. }
  151. }
  152. }
  153. }
  154. report("put_uni_luma");
  155. }
  156. static void check_put_vvc_chroma(void)
  157. {
  158. LOCAL_ALIGNED_32(int16_t, dst0, [DST_BUF_SIZE / 2]);
  159. LOCAL_ALIGNED_32(int16_t, dst1, [DST_BUF_SIZE / 2]);
  160. LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
  161. LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
  162. VVCDSPContext c;
  163. declare_func(void, int16_t *dst, const uint8_t *src, const ptrdiff_t src_stride,
  164. const int height, const int8_t *hf, const int8_t *vf, const int width);
  165. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  166. randomize_pixels(src0, src1, SRC_BUF_SIZE);
  167. ff_vvc_dsp_init(&c, bit_depth);
  168. for (int i = 0; i < 2; i++) {
  169. for (int j = 0; j < 2; j++) {
  170. for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) {
  171. for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) {
  172. const int idx = av_log2(w) - 1;
  173. const int mx = rnd() % VVC_INTER_CHROMA_FACTS;
  174. const int my = rnd() % VVC_INTER_CHROMA_FACTS;
  175. const int8_t *hf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][mx];
  176. const int8_t *vf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][my];
  177. const char *type;
  178. switch ((j << 1) | i) {
  179. case 0: type = "put_chroma_pixels"; break; // 0 0
  180. case 1: type = "put_chroma_h"; break; // 0 1
  181. case 2: type = "put_chroma_v"; break; // 1 0
  182. case 3: type = "put_chroma_hv"; break; // 1 1
  183. }
  184. if (check_func(c.inter.put[CHROMA][idx][j][i], "%s_%d_%dx%d", type, bit_depth, w, h)) {
  185. memset(dst0, 0, DST_BUF_SIZE);
  186. memset(dst1, 0, DST_BUF_SIZE);
  187. call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  188. call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  189. if (memcmp(dst0, dst1, DST_BUF_SIZE))
  190. fail();
  191. if (w == h)
  192. bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  193. }
  194. }
  195. }
  196. }
  197. }
  198. }
  199. report("put_chroma");
  200. }
  201. static void check_put_vvc_chroma_uni(void)
  202. {
  203. LOCAL_ALIGNED_32(uint8_t, dst0, [DST_BUF_SIZE]);
  204. LOCAL_ALIGNED_32(uint8_t, dst1, [DST_BUF_SIZE]);
  205. LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
  206. LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
  207. VVCDSPContext c;
  208. declare_func(void, uint8_t *dst, ptrdiff_t dststride,
  209. const uint8_t *src, ptrdiff_t srcstride, int height,
  210. const int8_t *hf, const int8_t *vf, int width);
  211. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  212. ff_vvc_dsp_init(&c, bit_depth);
  213. randomize_pixels(src0, src1, SRC_BUF_SIZE);
  214. for (int i = 0; i < 2; i++) {
  215. for (int j = 0; j < 2; j++) {
  216. for (int h = 4; h <= MAX_CTU_SIZE; h *= 2) {
  217. for (int w = 4; w <= MAX_CTU_SIZE; w *= 2) {
  218. const int idx = av_log2(w) - 1;
  219. const int mx = rnd() % VVC_INTER_CHROMA_FACTS;
  220. const int my = rnd() % VVC_INTER_CHROMA_FACTS;
  221. const int8_t *hf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][mx];
  222. const int8_t *vf = ff_vvc_inter_chroma_filters[rnd() % VVC_INTER_CHROMA_FILTER_TYPES][my];
  223. const char *type;
  224. switch ((j << 1) | i) {
  225. case 0: type = "put_uni_pixels"; break; // 0 0
  226. case 1: type = "put_uni_h"; break; // 0 1
  227. case 2: type = "put_uni_v"; break; // 1 0
  228. case 3: type = "put_uni_hv"; break; // 1 1
  229. }
  230. if (check_func(c.inter.put_uni[CHROMA][idx][j][i], "%s_chroma_%d_%dx%d", type, bit_depth, w, h)) {
  231. memset(dst0, 0, DST_BUF_SIZE);
  232. memset(dst1, 0, DST_BUF_SIZE);
  233. call_ref(dst0, PIXEL_STRIDE, src0 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  234. call_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  235. if (memcmp(dst0, dst1, DST_BUF_SIZE))
  236. fail();
  237. if (w == h)
  238. bench_new(dst1, PIXEL_STRIDE, src1 + SRC_OFFSET, PIXEL_STRIDE, h, hf, vf, w);
  239. }
  240. }
  241. }
  242. }
  243. }
  244. }
  245. report("put_uni_chroma");
  246. }
  247. #define AVG_SRC_BUF_SIZE (MAX_CTU_SIZE * MAX_CTU_SIZE)
  248. #define AVG_DST_BUF_SIZE (MAX_PB_SIZE * MAX_PB_SIZE * 2)
  249. static void check_avg(void)
  250. {
  251. LOCAL_ALIGNED_32(int16_t, src00, [AVG_SRC_BUF_SIZE]);
  252. LOCAL_ALIGNED_32(int16_t, src01, [AVG_SRC_BUF_SIZE]);
  253. LOCAL_ALIGNED_32(int16_t, src10, [AVG_SRC_BUF_SIZE]);
  254. LOCAL_ALIGNED_32(int16_t, src11, [AVG_SRC_BUF_SIZE]);
  255. LOCAL_ALIGNED_32(uint8_t, dst0, [AVG_DST_BUF_SIZE]);
  256. LOCAL_ALIGNED_32(uint8_t, dst1, [AVG_DST_BUF_SIZE]);
  257. VVCDSPContext c;
  258. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  259. randomize_avg_src((uint8_t*)src00, (uint8_t*)src10, AVG_SRC_BUF_SIZE * sizeof(int16_t));
  260. randomize_avg_src((uint8_t*)src01, (uint8_t*)src11, AVG_SRC_BUF_SIZE * sizeof(int16_t));
  261. ff_vvc_dsp_init(&c, bit_depth);
  262. for (int h = 2; h <= MAX_CTU_SIZE; h *= 2) {
  263. for (int w = 2; w <= MAX_CTU_SIZE; w *= 2) {
  264. {
  265. declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
  266. const int16_t *src0, const int16_t *src1, int width, int height);
  267. if (check_func(c.inter.avg, "avg_%d_%dx%d", bit_depth, w, h)) {
  268. memset(dst0, 0, AVG_DST_BUF_SIZE);
  269. memset(dst1, 0, AVG_DST_BUF_SIZE);
  270. call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h);
  271. call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h);
  272. if (memcmp(dst0, dst1, DST_BUF_SIZE))
  273. fail();
  274. if (w == h)
  275. bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h);
  276. }
  277. }
  278. {
  279. declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
  280. const int16_t *src0, const int16_t *src1, int width, int height,
  281. int denom, int w0, int w1, int o0, int o1);
  282. {
  283. const int denom = rnd() % 8;
  284. const int w0 = rnd() % 256 - 128;
  285. const int w1 = rnd() % 256 - 128;
  286. const int o0 = rnd() % 256 - 128;
  287. const int o1 = rnd() % 256 - 128;
  288. if (check_func(c.inter.w_avg, "w_avg_%d_%dx%d", bit_depth, w, h)) {
  289. memset(dst0, 0, AVG_DST_BUF_SIZE);
  290. memset(dst1, 0, AVG_DST_BUF_SIZE);
  291. call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
  292. call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
  293. if (memcmp(dst0, dst1, DST_BUF_SIZE))
  294. fail();
  295. if (w == h)
  296. bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
  297. }
  298. }
  299. }
  300. }
  301. }
  302. }
  303. report("avg");
  304. }
  305. #define SR_RANGE 2
  306. static void check_dmvr(void)
  307. {
  308. LOCAL_ALIGNED_32(uint16_t, dst0, [DST_BUF_SIZE]);
  309. LOCAL_ALIGNED_32(uint16_t, dst1, [DST_BUF_SIZE]);
  310. LOCAL_ALIGNED_32(uint8_t, src0, [SRC_BUF_SIZE]);
  311. LOCAL_ALIGNED_32(uint8_t, src1, [SRC_BUF_SIZE]);
  312. const int dst_stride = MAX_PB_SIZE * sizeof(int16_t);
  313. VVCDSPContext c;
  314. declare_func(void, int16_t *dst, const uint8_t *src, ptrdiff_t src_stride, int height,
  315. intptr_t mx, intptr_t my, int width);
  316. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  317. ff_vvc_dsp_init(&c, bit_depth);
  318. randomize_pixels(src0, src1, SRC_BUF_SIZE);
  319. for (int i = 0; i < 2; i++) {
  320. for (int j = 0; j < 2; j++) {
  321. for (int h = 8; h <= 16; h *= 2) {
  322. for (int w = 8; w <= 16; w *= 2) {
  323. const int pred_w = w + 2 * SR_RANGE;
  324. const int pred_h = h + 2 * SR_RANGE;
  325. const int mx = rnd() % VVC_INTER_LUMA_DMVR_FACTS;
  326. const int my = rnd() % VVC_INTER_LUMA_DMVR_FACTS;
  327. const char *type;
  328. if (w * h < 128)
  329. continue;
  330. switch ((j << 1) | i) {
  331. case 0: type = "dmvr"; break; // 0 0
  332. case 1: type = "dmvr_h"; break; // 0 1
  333. case 2: type = "dmvr_v"; break; // 1 0
  334. case 3: type = "dmvr_hv"; break; // 1 1
  335. }
  336. if (check_func(c.inter.dmvr[j][i], "%s_%d_%dx%d", type, bit_depth, pred_w, pred_h)) {
  337. memset(dst0, 0, DST_BUF_SIZE);
  338. memset(dst1, 0, DST_BUF_SIZE);
  339. call_ref(dst0, src0 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
  340. call_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
  341. for (int k = 0; k < pred_h; k++) {
  342. if (memcmp(dst0 + k * dst_stride, dst1 + k * dst_stride, pred_w * sizeof(int16_t))) {
  343. fail();
  344. break;
  345. }
  346. }
  347. bench_new(dst1, src1 + SRC_OFFSET, PIXEL_STRIDE, pred_h, mx, my, pred_w);
  348. }
  349. }
  350. }
  351. }
  352. }
  353. }
  354. report("dmvr");
  355. }
  356. #define BDOF_BLOCK_SIZE 16
  357. #define BDOF_SRC_SIZE (MAX_PB_SIZE* (BDOF_BLOCK_SIZE + 2))
  358. #define BDOF_SRC_OFFSET (MAX_PB_SIZE + 1)
  359. #define BDOF_DST_SIZE (BDOF_BLOCK_SIZE * BDOF_BLOCK_SIZE * 2)
  360. static void check_bdof(void)
  361. {
  362. LOCAL_ALIGNED_32(uint8_t, dst0, [BDOF_DST_SIZE]);
  363. LOCAL_ALIGNED_32(uint8_t, dst1, [BDOF_DST_SIZE]);
  364. LOCAL_ALIGNED_32(uint16_t, src00, [BDOF_SRC_SIZE]);
  365. LOCAL_ALIGNED_32(uint16_t, src01, [BDOF_SRC_SIZE]);
  366. LOCAL_ALIGNED_32(uint16_t, src10, [BDOF_SRC_SIZE]);
  367. LOCAL_ALIGNED_32(uint16_t, src11, [BDOF_SRC_SIZE]);
  368. VVCDSPContext c;
  369. declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int block_w, int block_h);
  370. for (int bit_depth = 8; bit_depth <= 12; bit_depth += 2) {
  371. const int dst_stride = BDOF_BLOCK_SIZE * SIZEOF_PIXEL;
  372. ff_vvc_dsp_init(&c, bit_depth);
  373. randomize_prof_src(src00, src10, BDOF_SRC_SIZE);
  374. randomize_prof_src(src01, src11, BDOF_SRC_SIZE);
  375. for (int h = 8; h <= 16; h *= 2) {
  376. for (int w = 8; w <= 16; w *= 2) {
  377. if (w * h < 128)
  378. continue;
  379. if (check_func(c.inter.apply_bdof, "apply_bdof_%d_%dx%d", bit_depth, w, h)) {
  380. memset(dst0, 0, BDOF_DST_SIZE);
  381. memset(dst1, 0, BDOF_DST_SIZE);
  382. call_ref(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h);
  383. call_new(dst1, dst_stride, src10 + BDOF_SRC_OFFSET, src11 + BDOF_SRC_OFFSET, w, h);
  384. if (memcmp(dst0, dst1, BDOF_DST_SIZE))
  385. fail();
  386. bench_new(dst0, dst_stride, src00 + BDOF_SRC_OFFSET, src01 + BDOF_SRC_OFFSET, w, h);
  387. }
  388. }
  389. }
  390. }
  391. report("apply_bdof");
  392. }
  393. static void check_vvc_sad(void)
  394. {
  395. const int bit_depth = 10;
  396. VVCDSPContext c;
  397. LOCAL_ALIGNED_32(uint16_t, src0, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
  398. LOCAL_ALIGNED_32(uint16_t, src1, [MAX_CTU_SIZE * MAX_CTU_SIZE * 4]);
  399. declare_func(int, const int16_t *src0, const int16_t *src1, int dx, int dy, int block_w, int block_h);
  400. ff_vvc_dsp_init(&c, bit_depth);
  401. randomize_pixels(src0, src1, MAX_CTU_SIZE * MAX_CTU_SIZE * 4);
  402. for (int h = 8; h <= 16; h *= 2) {
  403. for (int w = 8; w <= 16; w *= 2) {
  404. for(int offy = 0; offy <= 4; offy++) {
  405. for(int offx = 0; offx <= 4; offx++) {
  406. if (w * h < 128)
  407. continue;
  408. if (check_func(c.inter.sad, "sad_%dx%d", w, h)) {
  409. int result0;
  410. int result1;
  411. result0 = call_ref(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
  412. result1 = call_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
  413. if (result1 != result0)
  414. fail();
  415. if(offx == 0 && offy == 0)
  416. bench_new(src0 + PIXEL_STRIDE * 2 + 2, src1 + PIXEL_STRIDE * 2 + 2, offx, offy, w, h);
  417. }
  418. }
  419. }
  420. }
  421. }
  422. report("sad");
  423. }
  424. void checkasm_check_vvc_mc(void)
  425. {
  426. check_dmvr();
  427. check_bdof();
  428. check_vvc_sad();
  429. check_put_vvc_luma();
  430. check_put_vvc_luma_uni();
  431. check_put_vvc_chroma();
  432. check_put_vvc_chroma_uni();
  433. check_avg();
  434. }