dither.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. /*
  2. * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
  3. *
  4. * Triangular with Noise Shaping is based on opusfile.
  5. * Copyright (c) 1994-2012 by the Xiph.Org Foundation and contributors
  6. *
  7. * This file is part of Libav.
  8. *
  9. * Libav is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * Libav is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with Libav; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. /**
  24. * @file
  25. * Dithered Audio Sample Quantization
  26. *
  27. * Converts from dbl, flt, or s32 to s16 using dithering.
  28. */
  29. #include <math.h>
  30. #include <stdint.h>
  31. #include "libavutil/common.h"
  32. #include "libavutil/lfg.h"
  33. #include "libavutil/mem.h"
  34. #include "libavutil/samplefmt.h"
  35. #include "audio_convert.h"
  36. #include "dither.h"
  37. #include "internal.h"
  38. typedef struct DitherState {
  39. int mute;
  40. unsigned int seed;
  41. AVLFG lfg;
  42. float *noise_buf;
  43. int noise_buf_size;
  44. int noise_buf_ptr;
  45. float dither_a[4];
  46. float dither_b[4];
  47. } DitherState;
  48. struct DitherContext {
  49. DitherDSPContext ddsp;
  50. enum AVResampleDitherMethod method;
  51. int mute_dither_threshold; // threshold for disabling dither
  52. int mute_reset_threshold; // threshold for resetting noise shaping
  53. const float *ns_coef_b; // noise shaping coeffs
  54. const float *ns_coef_a; // noise shaping coeffs
  55. int channels;
  56. DitherState *state; // dither states for each channel
  57. AudioData *flt_data; // input data in fltp
  58. AudioData *s16_data; // dithered output in s16p
  59. AudioConvert *ac_in; // converter for input to fltp
  60. AudioConvert *ac_out; // converter for s16p to s16 (if needed)
  61. void (*quantize)(int16_t *dst, const float *src, float *dither, int len);
  62. int samples_align;
  63. };
  64. /* mute threshold, in seconds */
  65. #define MUTE_THRESHOLD_SEC 0.000333
  66. /* scale factor for 16-bit output.
  67. The signal is attenuated slightly to avoid clipping */
  68. #define S16_SCALE 32753.0f
  69. /* scale to convert lfg from INT_MIN/INT_MAX to -0.5/0.5 */
  70. #define LFG_SCALE (1.0f / (2.0f * INT32_MAX))
  71. /* noise shaping coefficients */
  72. static const float ns_48_coef_b[4] = {
  73. 2.2374f, -0.7339f, -0.1251f, -0.6033f
  74. };
  75. static const float ns_48_coef_a[4] = {
  76. 0.9030f, 0.0116f, -0.5853f, -0.2571f
  77. };
  78. static const float ns_44_coef_b[4] = {
  79. 2.2061f, -0.4707f, -0.2534f, -0.6213f
  80. };
  81. static const float ns_44_coef_a[4] = {
  82. 1.0587f, 0.0676f, -0.6054f, -0.2738f
  83. };
  84. static void dither_int_to_float_rectangular_c(float *dst, int *src, int len)
  85. {
  86. int i;
  87. for (i = 0; i < len; i++)
  88. dst[i] = src[i] * LFG_SCALE;
  89. }
  90. static void dither_int_to_float_triangular_c(float *dst, int *src0, int len)
  91. {
  92. int i;
  93. int *src1 = src0 + len;
  94. for (i = 0; i < len; i++) {
  95. float r = src0[i] * LFG_SCALE;
  96. r += src1[i] * LFG_SCALE;
  97. dst[i] = r;
  98. }
  99. }
  100. static void quantize_c(int16_t *dst, const float *src, float *dither, int len)
  101. {
  102. int i;
  103. for (i = 0; i < len; i++)
  104. dst[i] = av_clip_int16(lrintf(src[i] * S16_SCALE + dither[i]));
  105. }
  106. #define SQRT_1_6 0.40824829046386301723f
  107. static void dither_highpass_filter(float *src, int len)
  108. {
  109. int i;
  110. /* filter is from libswresample in FFmpeg */
  111. for (i = 0; i < len - 2; i++)
  112. src[i] = (-src[i] + 2 * src[i + 1] - src[i + 2]) * SQRT_1_6;
  113. }
  114. static int generate_dither_noise(DitherContext *c, DitherState *state,
  115. int min_samples)
  116. {
  117. int i;
  118. int nb_samples = FFALIGN(min_samples, 16) + 16;
  119. int buf_samples = nb_samples *
  120. (c->method == AV_RESAMPLE_DITHER_RECTANGULAR ? 1 : 2);
  121. unsigned int *noise_buf_ui;
  122. av_freep(&state->noise_buf);
  123. state->noise_buf_size = state->noise_buf_ptr = 0;
  124. state->noise_buf = av_malloc(buf_samples * sizeof(*state->noise_buf));
  125. if (!state->noise_buf)
  126. return AVERROR(ENOMEM);
  127. state->noise_buf_size = FFALIGN(min_samples, 16);
  128. noise_buf_ui = (unsigned int *)state->noise_buf;
  129. av_lfg_init(&state->lfg, state->seed);
  130. for (i = 0; i < buf_samples; i++)
  131. noise_buf_ui[i] = av_lfg_get(&state->lfg);
  132. c->ddsp.dither_int_to_float(state->noise_buf, noise_buf_ui, nb_samples);
  133. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_HP)
  134. dither_highpass_filter(state->noise_buf, nb_samples);
  135. return 0;
  136. }
  137. static void quantize_triangular_ns(DitherContext *c, DitherState *state,
  138. int16_t *dst, const float *src,
  139. int nb_samples)
  140. {
  141. int i, j;
  142. float *dither = &state->noise_buf[state->noise_buf_ptr];
  143. if (state->mute > c->mute_reset_threshold)
  144. memset(state->dither_a, 0, sizeof(state->dither_a));
  145. for (i = 0; i < nb_samples; i++) {
  146. float err = 0;
  147. float sample = src[i] * S16_SCALE;
  148. for (j = 0; j < 4; j++) {
  149. err += c->ns_coef_b[j] * state->dither_b[j] -
  150. c->ns_coef_a[j] * state->dither_a[j];
  151. }
  152. for (j = 3; j > 0; j--) {
  153. state->dither_a[j] = state->dither_a[j - 1];
  154. state->dither_b[j] = state->dither_b[j - 1];
  155. }
  156. state->dither_a[0] = err;
  157. sample -= err;
  158. if (state->mute > c->mute_dither_threshold) {
  159. dst[i] = av_clip_int16(lrintf(sample));
  160. state->dither_b[0] = 0;
  161. } else {
  162. dst[i] = av_clip_int16(lrintf(sample + dither[i]));
  163. state->dither_b[0] = av_clipf(dst[i] - sample, -1.5f, 1.5f);
  164. }
  165. state->mute++;
  166. if (src[i])
  167. state->mute = 0;
  168. }
  169. }
  170. static int convert_samples(DitherContext *c, int16_t **dst, float * const *src,
  171. int channels, int nb_samples)
  172. {
  173. int ch, ret;
  174. int aligned_samples = FFALIGN(nb_samples, 16);
  175. for (ch = 0; ch < channels; ch++) {
  176. DitherState *state = &c->state[ch];
  177. if (state->noise_buf_size < aligned_samples) {
  178. ret = generate_dither_noise(c, state, nb_samples);
  179. if (ret < 0)
  180. return ret;
  181. } else if (state->noise_buf_size - state->noise_buf_ptr < aligned_samples) {
  182. state->noise_buf_ptr = 0;
  183. }
  184. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  185. quantize_triangular_ns(c, state, dst[ch], src[ch], nb_samples);
  186. } else {
  187. c->quantize(dst[ch], src[ch],
  188. &state->noise_buf[state->noise_buf_ptr],
  189. FFALIGN(nb_samples, c->samples_align));
  190. }
  191. state->noise_buf_ptr += aligned_samples;
  192. }
  193. return 0;
  194. }
  195. int ff_convert_dither(DitherContext *c, AudioData *dst, AudioData *src)
  196. {
  197. int ret;
  198. AudioData *flt_data;
  199. /* output directly to dst if it is planar */
  200. if (dst->sample_fmt == AV_SAMPLE_FMT_S16P)
  201. c->s16_data = dst;
  202. else {
  203. /* make sure s16_data is large enough for the output */
  204. ret = ff_audio_data_realloc(c->s16_data, src->nb_samples);
  205. if (ret < 0)
  206. return ret;
  207. }
  208. if (src->sample_fmt != AV_SAMPLE_FMT_FLTP) {
  209. /* make sure flt_data is large enough for the input */
  210. ret = ff_audio_data_realloc(c->flt_data, src->nb_samples);
  211. if (ret < 0)
  212. return ret;
  213. flt_data = c->flt_data;
  214. /* convert input samples to fltp and scale to s16 range */
  215. ret = ff_audio_convert(c->ac_in, flt_data, src);
  216. if (ret < 0)
  217. return ret;
  218. } else {
  219. flt_data = src;
  220. }
  221. /* check alignment and padding constraints */
  222. if (c->method != AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  223. int ptr_align = FFMIN(flt_data->ptr_align, c->s16_data->ptr_align);
  224. int samples_align = FFMIN(flt_data->samples_align, c->s16_data->samples_align);
  225. int aligned_len = FFALIGN(src->nb_samples, c->ddsp.samples_align);
  226. if (!(ptr_align % c->ddsp.ptr_align) && samples_align >= aligned_len) {
  227. c->quantize = c->ddsp.quantize;
  228. c->samples_align = c->ddsp.samples_align;
  229. } else {
  230. c->quantize = quantize_c;
  231. c->samples_align = 1;
  232. }
  233. }
  234. ret = convert_samples(c, (int16_t **)c->s16_data->data,
  235. (float * const *)flt_data->data, src->channels,
  236. src->nb_samples);
  237. if (ret < 0)
  238. return ret;
  239. c->s16_data->nb_samples = src->nb_samples;
  240. /* interleave output to dst if needed */
  241. if (dst->sample_fmt == AV_SAMPLE_FMT_S16) {
  242. ret = ff_audio_convert(c->ac_out, dst, c->s16_data);
  243. if (ret < 0)
  244. return ret;
  245. } else
  246. c->s16_data = NULL;
  247. return 0;
  248. }
  249. void ff_dither_free(DitherContext **cp)
  250. {
  251. DitherContext *c = *cp;
  252. int ch;
  253. if (!c)
  254. return;
  255. ff_audio_data_free(&c->flt_data);
  256. ff_audio_data_free(&c->s16_data);
  257. ff_audio_convert_free(&c->ac_in);
  258. ff_audio_convert_free(&c->ac_out);
  259. for (ch = 0; ch < c->channels; ch++)
  260. av_free(c->state[ch].noise_buf);
  261. av_free(c->state);
  262. av_freep(cp);
  263. }
  264. static void dither_init(DitherDSPContext *ddsp,
  265. enum AVResampleDitherMethod method)
  266. {
  267. ddsp->quantize = quantize_c;
  268. ddsp->ptr_align = 1;
  269. ddsp->samples_align = 1;
  270. if (method == AV_RESAMPLE_DITHER_RECTANGULAR)
  271. ddsp->dither_int_to_float = dither_int_to_float_rectangular_c;
  272. else
  273. ddsp->dither_int_to_float = dither_int_to_float_triangular_c;
  274. }
  275. DitherContext *ff_dither_alloc(AVAudioResampleContext *avr,
  276. enum AVSampleFormat out_fmt,
  277. enum AVSampleFormat in_fmt,
  278. int channels, int sample_rate)
  279. {
  280. AVLFG seed_gen;
  281. DitherContext *c;
  282. int ch;
  283. if (av_get_packed_sample_fmt(out_fmt) != AV_SAMPLE_FMT_S16 ||
  284. av_get_bytes_per_sample(in_fmt) <= 2) {
  285. av_log(avr, AV_LOG_ERROR, "dithering %s to %s is not supported\n",
  286. av_get_sample_fmt_name(in_fmt), av_get_sample_fmt_name(out_fmt));
  287. return NULL;
  288. }
  289. c = av_mallocz(sizeof(*c));
  290. if (!c)
  291. return NULL;
  292. if (avr->dither_method == AV_RESAMPLE_DITHER_TRIANGULAR_NS &&
  293. sample_rate != 48000 && sample_rate != 44100) {
  294. av_log(avr, AV_LOG_WARNING, "sample rate must be 48000 or 44100 Hz "
  295. "for triangular_ns dither. using triangular_hp instead.\n");
  296. avr->dither_method = AV_RESAMPLE_DITHER_TRIANGULAR_HP;
  297. }
  298. c->method = avr->dither_method;
  299. dither_init(&c->ddsp, c->method);
  300. if (c->method == AV_RESAMPLE_DITHER_TRIANGULAR_NS) {
  301. if (sample_rate == 48000) {
  302. c->ns_coef_b = ns_48_coef_b;
  303. c->ns_coef_a = ns_48_coef_a;
  304. } else {
  305. c->ns_coef_b = ns_44_coef_b;
  306. c->ns_coef_a = ns_44_coef_a;
  307. }
  308. }
  309. /* Either s16 or s16p output format is allowed, but s16p is used
  310. internally, so we need to use a temp buffer and interleave if the output
  311. format is s16 */
  312. if (out_fmt != AV_SAMPLE_FMT_S16P) {
  313. c->s16_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_S16P,
  314. "dither s16 buffer");
  315. if (!c->s16_data)
  316. goto fail;
  317. c->ac_out = ff_audio_convert_alloc(avr, out_fmt, AV_SAMPLE_FMT_S16P,
  318. channels, sample_rate);
  319. if (!c->ac_out)
  320. goto fail;
  321. }
  322. if (in_fmt != AV_SAMPLE_FMT_FLTP) {
  323. c->flt_data = ff_audio_data_alloc(channels, 1024, AV_SAMPLE_FMT_FLTP,
  324. "dither flt buffer");
  325. if (!c->flt_data)
  326. goto fail;
  327. c->ac_in = ff_audio_convert_alloc(avr, AV_SAMPLE_FMT_FLTP, in_fmt,
  328. channels, sample_rate);
  329. if (!c->ac_in)
  330. goto fail;
  331. }
  332. c->state = av_mallocz(channels * sizeof(*c->state));
  333. if (!c->state)
  334. goto fail;
  335. c->channels = channels;
  336. /* calculate thresholds for turning off dithering during periods of
  337. silence to avoid replacing digital silence with quiet dither noise */
  338. c->mute_dither_threshold = lrintf(sample_rate * MUTE_THRESHOLD_SEC);
  339. c->mute_reset_threshold = c->mute_dither_threshold * 4;
  340. /* initialize dither states */
  341. av_lfg_init(&seed_gen, 0xC0FFEE);
  342. for (ch = 0; ch < channels; ch++) {
  343. DitherState *state = &c->state[ch];
  344. state->mute = c->mute_reset_threshold + 1;
  345. state->seed = av_lfg_get(&seed_gen);
  346. generate_dither_noise(c, state, FFMAX(32768, sample_rate / 2));
  347. }
  348. return c;
  349. fail:
  350. ff_dither_free(&c);
  351. return NULL;
  352. }