ac3dsp.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. /*
  2. * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
  3. * Copyright (c) 2024 Geoff Hill <geoff@geoffhill.org>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. */
  21. #include <stdint.h>
  22. #include <string.h>
  23. #include "libavutil/mem.h"
  24. #include "libavutil/mem_internal.h"
  25. #include "libavcodec/ac3dsp.h"
  26. #include "checkasm.h"
  27. #define randomize_exp(buf, len) \
  28. do { \
  29. int i; \
  30. for (i = 0; i < len; i++) { \
  31. buf[i] = (uint8_t)rnd(); \
  32. } \
  33. } while (0)
  34. #define randomize_i24(buf, len) \
  35. do { \
  36. int i; \
  37. for (i = 0; i < len; i++) { \
  38. int32_t v = (int32_t)rnd(); \
  39. int32_t u = (v & 0xFFFFFF); \
  40. buf[i] = (v < 0) ? -u : u; \
  41. } \
  42. } while (0)
  43. #define randomize_float(buf, len) \
  44. do { \
  45. int i; \
  46. for (i = 0; i < len; i++) { \
  47. float f = (float)rnd() / (UINT_MAX >> 5) - 16.0f; \
  48. buf[i] = f; \
  49. } \
  50. } while (0)
  51. static void check_ac3_exponent_min(AC3DSPContext *c) {
  52. #define MAX_COEFS 256
  53. #define MAX_CTXT 6
  54. #define EXP_SIZE (MAX_CTXT * MAX_COEFS)
  55. LOCAL_ALIGNED_16(uint8_t, src, [EXP_SIZE]);
  56. LOCAL_ALIGNED_16(uint8_t, v1, [EXP_SIZE]);
  57. LOCAL_ALIGNED_16(uint8_t, v2, [EXP_SIZE]);
  58. int n;
  59. declare_func(void, uint8_t *, int, int);
  60. for (n = 0; n < MAX_CTXT; ++n) {
  61. if (check_func(c->ac3_exponent_min, "ac3_exponent_min_reuse%d", n)) {
  62. randomize_exp(src, EXP_SIZE);
  63. memcpy(v1, src, EXP_SIZE);
  64. memcpy(v2, src, EXP_SIZE);
  65. call_ref(v1, n, MAX_COEFS);
  66. call_new(v2, n, MAX_COEFS);
  67. if (memcmp(v1, v2, EXP_SIZE) != 0)
  68. fail();
  69. bench_new(v2, n, MAX_COEFS);
  70. }
  71. }
  72. report("ac3_exponent_min");
  73. }
  74. static void check_ac3_extract_exponents(AC3DSPContext *c) {
  75. #define MAX_EXPS 3072
  76. LOCAL_ALIGNED_16(int32_t, src, [MAX_EXPS]);
  77. LOCAL_ALIGNED_16(uint8_t, v1, [MAX_EXPS]);
  78. LOCAL_ALIGNED_16(uint8_t, v2, [MAX_EXPS]);
  79. int n;
  80. declare_func(void, uint8_t *, int32_t *, int);
  81. for (n = 512; n <= MAX_EXPS; n += 256) {
  82. if (check_func(c->extract_exponents, "ac3_extract_exponents_n%d", n)) {
  83. randomize_i24(src, n);
  84. call_ref(v1, src, n);
  85. call_new(v2, src, n);
  86. if (memcmp(v1, v2, n) != 0)
  87. fail();
  88. bench_new(v1, src, n);
  89. }
  90. }
  91. report("ac3_extract_exponents");
  92. }
  93. static void check_float_to_fixed24(AC3DSPContext *c) {
  94. #define BUF_SIZE 1024
  95. LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
  96. declare_func(void, int32_t *, const float *, size_t);
  97. randomize_float(src, BUF_SIZE);
  98. if (check_func(c->float_to_fixed24, "float_to_fixed24")) {
  99. LOCAL_ALIGNED_32(int32_t, dst, [BUF_SIZE]);
  100. LOCAL_ALIGNED_32(int32_t, dst2, [BUF_SIZE]);
  101. call_ref(dst, src, BUF_SIZE);
  102. call_new(dst2, src, BUF_SIZE);
  103. if (memcmp(dst, dst2, BUF_SIZE) != 0)
  104. fail();
  105. bench_new(dst, src, BUF_SIZE);
  106. }
  107. report("float_to_fixed24");
  108. }
  109. static void check_ac3_sum_square_butterfly_int32(AC3DSPContext *c) {
  110. #define ELEMS 240
  111. LOCAL_ALIGNED_16(int32_t, lt, [ELEMS]);
  112. LOCAL_ALIGNED_16(int32_t, rt, [ELEMS]);
  113. LOCAL_ALIGNED_16(uint64_t, v1, [4]);
  114. LOCAL_ALIGNED_16(uint64_t, v2, [4]);
  115. declare_func(void, int64_t[4], const int32_t *, const int32_t *, int);
  116. randomize_i24(lt, ELEMS);
  117. randomize_i24(rt, ELEMS);
  118. if (check_func(c->sum_square_butterfly_int32,
  119. "ac3_sum_square_bufferfly_int32")) {
  120. call_ref(v1, lt, rt, ELEMS);
  121. call_new(v2, lt, rt, ELEMS);
  122. if (memcmp(v1, v2, sizeof(int64_t[4])) != 0)
  123. fail();
  124. bench_new(v2, lt, rt, ELEMS);
  125. }
  126. report("ac3_sum_square_butterfly_int32");
  127. }
  128. static void check_ac3_sum_square_butterfly_float(AC3DSPContext *c) {
  129. LOCAL_ALIGNED_32(float, lt, [ELEMS]);
  130. LOCAL_ALIGNED_32(float, rt, [ELEMS]);
  131. LOCAL_ALIGNED_16(float, v1, [4]);
  132. LOCAL_ALIGNED_16(float, v2, [4]);
  133. declare_func(void, float[4], const float *, const float *, int);
  134. randomize_float(lt, ELEMS);
  135. randomize_float(rt, ELEMS);
  136. if (check_func(c->sum_square_butterfly_float,
  137. "ac3_sum_square_bufferfly_float")) {
  138. call_ref(v1, lt, rt, ELEMS);
  139. call_new(v2, lt, rt, ELEMS);
  140. if (!float_near_ulp_array(v1, v2, 10, 4))
  141. fail();
  142. bench_new(v2, lt, rt, ELEMS);
  143. }
  144. report("ac3_sum_square_butterfly_float");
  145. }
  146. void checkasm_check_ac3dsp(void)
  147. {
  148. AC3DSPContext c;
  149. ff_ac3dsp_init(&c);
  150. check_ac3_exponent_min(&c);
  151. check_ac3_extract_exponents(&c);
  152. check_float_to_fixed24(&c);
  153. check_ac3_sum_square_butterfly_int32(&c);
  154. check_ac3_sum_square_butterfly_float(&c);
  155. }