dither.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439
  1. /*
  2. * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  3. *
  4. * Triangular with Noise Shaping is based on opusfile.
  5. * Copyright (c) 1994-2012 by the Xiph.Org Foundation and contributors
  6. *
  7. * This file is part of Libav.
  8. *
  9. * Libav is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * Libav is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with Libav; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * Dithered Audio Sample Quantization
  26. *
  27. * Converts from dbl, flt, or s32 to s16 using dithering.
  28. */
  29. #include <math.h>
  30. #include <stdint.h>
  31. #include "libavutil/common.h"
  32. #include "libavutil/lfg.h"
  33. #include "libavutil/mem.h"
  34. #include "libavutil/samplefmt.h"
  35. #include "audio_convert.h"
  36. #include "dither.h"
  37. #include "internal.h"
  38. typedef struct DitherState {
  39. int mute;
  40. unsigned int seed;
  41. AVLFG lfg;
  42. float *noise_buf;
  43. int noise_buf_size;
  44. int noise_buf_ptr;
  45. float dither_a[4];
  46. float dither_b[4];
  47. } DitherState;
  48. struct DitherContext {
  49. DitherDSPContext ddsp;
  50. enum AVResampleDitherMethod method;
  51. int apply_map;
  52. ChannelMapInfo *ch_map_info;
  53. int mute_dither_threshold; // threshold for disabling dither
  54. int mute_reset_threshold; // threshold for resetting noise shaping
  55. const float *ns_coef_b; // noise shaping coeffs
  56. const float *ns_coef_a; // noise shaping coeffs
  57. int channels;
  58. DitherState *state; // dither states for each channel
  59. AudioData *flt_data; // input data in fltp
  60. AudioData *s16_data; // dithered output in s16p
  61. AudioConvert *ac_in; // converter for input to fltp
  62. AudioConvert *ac_out; // converter for s16p to s16 (if needed)
  63. void (*quantize)(int16_t *dst, const float *src, float *dither, int len);
  64. int samples_align;
  65. };
  66. /* mute threshold, in seconds */
  67. #define MUTE_THRESHOLD_SEC 0.000333
  68. /* scale factor for 16-bit output.
  69. The signal is attenuated slightly to avoid clipping */
  70. #define S16_SCALE 32753.0f
  71. /* scale to convert lfg from INT_MIN/INT_MAX to -0.5/0.5 */
  72. #define LFG_SCALE (1.0f / (2.0f * INT32_MAX))
  73. /* noise shaping coefficients */
  74. static const float ns_48_coef_b[4] = {
  75. 2.2374f, -0.7339f, -0.1251f, -0.6033f
  76. };
  77. static const float ns_48_coef_a[4] = {
  78. 0.9030f, 0.0116f, -0.5853f, -0.2571f
  79. };
  80. static const float ns_44_coef_b[4] = {
  81. 2.2061f, -0.4707f, -0.2534f, -0.6213f
  82. };
  83. static const float ns_44_coef_a[4] = {
  84. 1.0587f, 0.0676f, -0.6054f, -0.2738f
  85. };
  86. static void dither_int_to_float_rectangular_c(float *dst, int *src, int len)
  87. {
  88. int i;
  89. for (i = 0; i < len; i++)
  90. dst[i] = src[i] * LFG_SCALE;
  91. }
  92. static void dither_int_to_float_triangular_c(float *dst, int *src0, int len)
  93. {
  94. int i;
  95. int *src1 = src0 + len;
  96. for (i = 0; i < len; i++) {
  97. float r = src0[i] * LFG_SCALE;
  98. r += src1[i] * LFG_SCALE;
  99. dst[i] = r;
  100. }
  101. }
  102. static void quantize_c(int16_t *dst, const float *src, float *dither, int len)
  103. {
  104. int i;
  105. for (i = 0; i < len; i++)
  106. dst[i] = av_clip_int16(lrintf(src[i] * S16_SCALE + dither[i]));
  107. }
  108. #define SQRT_1_6 0.40824829046386301723f
  109. static void dither_highpass_filter(float *src, int len)
  110. {
  111. int i;
  112. /* filter is from libswresample in FFmpeg */
  113. for (i = 0; i < len - 2; i++)
  114. src[i] = (-src[i] + 2 * src[i + 1] - src[i + 2]) * SQRT_1_6;
  115. }
  116. static int generate_dither_noise(DitherContext *c, DitherState *state,
  117. int min_samples)
  118. {
  119. int i;
  120. int nb_samples = FFALIGN(min_samples, 16) + 16;
  121. int buf_samples = nb_samples *
  122. (c->method == AV_RESAMPLE_DITHER_RECTANGULAR ? 1 : 2);
  123. unsigned int *noise_buf_ui;
  124. av_freep(&state->noise_buf);
  125. state->noise_buf_size = state->noise_buf_ptr = 0;
  126. state->noise_buf = av_malloc(buf_samples * sizeof(*state->noise_buf));
  127. if (!state->noise_buf)
  128. return AVERROR(ENOMEM);
  129. state->noise_buf_size = FFALIGN(min_samples, 16);
  130. noise_buf_ui = (unsigned int *)state->noise_buf;
  131. av_lfg_init(&state->lfg, state->seed);
  132. for (i = 0; i < buf_samples; i++)
  133. noise_buf_ui[i] = av_lfg_get(&state->lfg);
  134. c->ddsp.dither_int_to_float(state->noise_buf, noise_buf_ui, nb_samples);
  135. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_HP)
  136. dither_highpass_filter(state->noise_buf, nb_samples);
  137. return 0;
  138. }
  139. static void quantize_triangular_ns(DitherContext *c, DitherState *state,
  140. int16_t *dst, const float *src,
  141. int nb_samples)
  142. {
  143. int i, j;
  144. float *dither = &state->noise_buf[state->noise_buf_ptr];
  145. if (state->mute > c->mute_reset_threshold)
  146. memset(state->dither_a, 0, sizeof(state->dither_a));
  147. for (i = 0; i < nb_samples; i++) {
  148. float err = 0;
  149. float sample = src[i] * S16_SCALE;
  150. for (j = 0; j < 4; j++) {
  151. err += c->ns_coef_b[j] * state->dither_b[j] -
  152. c->ns_coef_a[j] * state->dither_a[j];
  153. }
  154. for (j = 3; j > 0; j--) {
  155. state->dither_a[j] = state->dither_a[j - 1];
  156. state->dither_b[j] = state->dither_b[j - 1];
  157. }
  158. state->dither_a[0] = err;
  159. sample -= err;
  160. if (state->mute > c->mute_dither_threshold) {
  161. dst[i] = av_clip_int16(lrintf(sample));
  162. state->dither_b[0] = 0;
  163. } else {
  164. dst[i] = av_clip_int16(lrintf(sample + dither[i]));
  165. state->dither_b[0] = av_clipf(dst[i] - sample, -1.5f, 1.5f);
  166. }
  167. state->mute++;
  168. if (src[i])
  169. state->mute = 0;
  170. }
  171. }
  172. static int convert_samples(DitherContext *c, int16_t **dst, float * const *src,
  173. int channels, int nb_samples)
  174. {
  175. int ch, ret;
  176. int aligned_samples = FFALIGN(nb_samples, 16);
  177. for (ch = 0; ch < channels; ch++) {
  178. DitherState *state = &c->state[ch];
  179. if (state->noise_buf_size < aligned_samples) {
  180. ret = generate_dither_noise(c, state, nb_samples);
  181. if (ret < 0)
  182. return ret;
  183. } else if (state->noise_buf_size - state->noise_buf_ptr < aligned_samples) {
  184. state->noise_buf_ptr = 0;
  185. }
  186. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  187. quantize_triangular_ns(c, state, dst[ch], src[ch], nb_samples);
  188. } else {
  189. c->quantize(dst[ch], src[ch],
  190. &state->noise_buf[state->noise_buf_ptr],
  191. FFALIGN(nb_samples, c->samples_align));
  192. }
  193. state->noise_buf_ptr += aligned_samples;
  194. }
  195. return 0;
  196. }
  197. int ff_convert_dither(DitherContext *c, AudioData *dst, AudioData *src)
  198. {
  199. int ret;
  200. AudioData *flt_data;
  201. /* output directly to dst if it is planar */
  202. if (dst->sample_fmt == AV_SAMPLE_FMT_S16P)
  203. c->s16_data = dst;
  204. else {
  205. /* make sure s16_data is large enough for the output */
  206. ret = ff_audio_data_realloc(c->s16_data, src->nb_samples);
  207. if (ret < 0)
  208. return ret;
  209. }
  210. if (src->sample_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
  211. /* make sure flt_data is large enough for the input */
  212. ret = ff_audio_data_realloc(c->flt_data, src->nb_samples);
  213. if (ret < 0)
  214. return ret;
  215. flt_data = c->flt_data;
  216. }
  217. if (src->sample_fmt != AV_SAMPLE_FMT_FLTP) {
  218. /* convert input samples to fltp and scale to s16 range */
  219. ret = ff_audio_convert(c->ac_in, flt_data, src);
  220. if (ret < 0)
  221. return ret;
  222. } else if (c->apply_map) {
  223. ret = ff_audio_data_copy(flt_data, src, c->ch_map_info);
  224. if (ret < 0)
  225. return ret;
  226. } else {
  227. flt_data = src;
  228. }
  229. /* check alignment and padding constraints */
  230. if (c->method != AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  231. int ptr_align = FFMIN(flt_data->ptr_align, c->s16_data->ptr_align);
  232. int samples_align = FFMIN(flt_data->samples_align, c->s16_data->samples_align);
  233. int aligned_len = FFALIGN(src->nb_samples, c->ddsp.samples_align);
  234. if (!(ptr_align % c->ddsp.ptr_align) && samples_align >= aligned_len) {
  235. c->quantize = c->ddsp.quantize;
  236. c->samples_align = c->ddsp.samples_align;
  237. } else {
  238. c->quantize = quantize_c;
  239. c->samples_align = 1;
  240. }
  241. }
  242. ret = convert_samples(c, (int16_t **)c->s16_data->data,
  243. (float * const *)flt_data->data, src->channels,
  244. src->nb_samples);
  245. if (ret < 0)
  246. return ret;
  247. c->s16_data->nb_samples = src->nb_samples;
  248. /* interleave output to dst if needed */
  249. if (dst->sample_fmt == AV_SAMPLE_FMT_S16) {
  250. ret = ff_audio_convert(c->ac_out, dst, c->s16_data);
  251. if (ret < 0)
  252. return ret;
  253. } else
  254. c->s16_data = NULL;
  255. return 0;
  256. }
  257. void ff_dither_free(DitherContext **cp)
  258. {
  259. DitherContext *c = *cp;
  260. int ch;
  261. if (!c)
  262. return;
  263. ff_audio_data_free(&c->flt_data);
  264. ff_audio_data_free(&c->s16_data);
  265. ff_audio_convert_free(&c->ac_in);
  266. ff_audio_convert_free(&c->ac_out);
  267. for (ch = 0; ch < c->channels; ch++)
  268. av_free(c->state[ch].noise_buf);
  269. av_free(c->state);
  270. av_freep(cp);
  271. }
  272. static void dither_init(DitherDSPContext *ddsp,
  273. enum AVResampleDitherMethod method)
  274. {
  275. ddsp->quantize = quantize_c;
  276. ddsp->ptr_align = 1;
  277. ddsp->samples_align = 1;
  278. if (method == AV_RESAMPLE_DITHER_RECTANGULAR)
  279. ddsp->dither_int_to_float = dither_int_to_float_rectangular_c;
  280. else
  281. ddsp->dither_int_to_float = dither_int_to_float_triangular_c;
  282. if (ARCH_X86)
  283. ff_dither_init_x86(ddsp, method);
  284. }
  285. DitherContext *ff_dither_alloc(AVAudioResampleContext *avr,
  286. enum AVSampleFormat out_fmt,
  287. enum AVSampleFormat in_fmt,
  288. int channels, int sample_rate, int apply_map)
  289. {
  290. AVLFG seed_gen;
  291. DitherContext *c;
  292. int ch;
  293. if (av_get_packed_sample_fmt(out_fmt) != AV_SAMPLE_FMT_S16 ||
  294. av_get_bytes_per_sample(in_fmt) <= 2) {
  295. av_log(avr, AV_LOG_ERROR, "dithering %s to %s is not supported\n",
  296. av_get_sample_fmt_name(in_fmt), av_get_sample_fmt_name(out_fmt));
  297. return NULL;
  298. }
  299. c = av_mallocz(sizeof(*c));
  300. if (!c)
  301. return NULL;
  302. c->apply_map = apply_map;
  303. if (apply_map)
  304. c->ch_map_info = &avr->ch_map_info;
  305. if (avr->dither_method == AV_RESAMPLE_DITHER_TRIANGULAR_NS &&
  306. sample_rate != 48000 && sample_rate != 44100) {
  307. av_log(avr, AV_LOG_WARNING, "sample rate must be 48000 or 44100 Hz "
  308. "for triangular_ns dither. using triangular_hp instead.\n");
  309. avr->dither_method = AV_RESAMPLE_DITHER_TRIANGULAR_HP;
  310. }
  311. c->method = avr->dither_method;
  312. dither_init(&c->ddsp, c->method);
  313. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  314. if (sample_rate == 48000) {
  315. c->ns_coef_b = ns_48_coef_b;
  316. c->ns_coef_a = ns_48_coef_a;
  317. } else {
  318. c->ns_coef_b = ns_44_coef_b;
  319. c->ns_coef_a = ns_44_coef_a;
  320. }
  321. }
  322. /* Either s16 or s16p output format is allowed, but s16p is used
  323. internally, so we need to use a temp buffer and interleave if the output
  324. format is s16 */
  325. if (out_fmt != AV_SAMPLE_FMT_S16P) {
  326. c->s16_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_S16P,
  327. "dither s16 buffer");
  328. if (!c->s16_data)
  329. goto fail;
  330. c->ac_out = ff_audio_convert_alloc(avr, out_fmt, AV_SAMPLE_FMT_S16P,
  331. channels, sample_rate, 0);
  332. if (!c->ac_out)
  333. goto fail;
  334. }
  335. if (in_fmt != AV_SAMPLE_FMT_FLTP || c->apply_map) {
  336. c->flt_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_FLTP,
  337. "dither flt buffer");
  338. if (!c->flt_data)
  339. goto fail;
  340. }
  341. if (in_fmt != AV_SAMPLE_FMT_FLTP) {
  342. c->ac_in = ff_audio_convert_alloc(avr, AV_SAMPLE_FMT_FLTP, in_fmt,
  343. channels, sample_rate, c->apply_map);
  344. if (!c->ac_in)
  345. goto fail;
  346. }
  347. c->state = av_mallocz(channels * sizeof(*c->state));
  348. if (!c->state)
  349. goto fail;
  350. c->channels = channels;
  351. /* calculate thresholds for turning off dithering during periods of
  352. silence to avoid replacing digital silence with quiet dither noise */
  353. c->mute_dither_threshold = lrintf(sample_rate * MUTE_THRESHOLD_SEC);
  354. c->mute_reset_threshold = c->mute_dither_threshold * 4;
  355. /* initialize dither states */
  356. av_lfg_init(&seed_gen, 0xC0FFEE);
  357. for (ch = 0; ch < channels; ch++) {
  358. DitherState *state = &c->state[ch];
  359. state->mute = c->mute_reset_threshold + 1;
  360. state->seed = av_lfg_get(&seed_gen);
  361. generate_dither_noise(c, state, FFMAX(32768, sample_rate / 2));
  362. }
  363. return c;
  364. fail:
  365. ff_dither_free(&c);
  366. return NULL;
  367. }