13 years ago · 3074f03a07
--- a/Doxyfile
+++ b/Doxyfile
@@ -1160,6 +1160,7 @@ INCLUDE_FILE_PATTERNS  =
 
															 PREDEFINED             = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \
														
 
															                          HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \
														
 
															+                         "DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \
														
 
															 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
														
 
															 # this tag can be used to specify a list of macro names that should be expanded.
														
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -164,21 +164,8 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
 
															     int i;
														
 
															     for (i = 0; i < nb_coefs; i++) {
														
 
															-        int e;
														
 
															         int v = abs(coef[i]);
														
 
															-        if (v == 0)
														
 
															-            e = 24;
														
 
															-        else {
														
 
															-            e = 23 - av_log2(v);
														
 
															-            if (e >= 24) {
														
 
															-                e = 24;
														
 
															-                coef[i] = 0;
														
 
															-            } else if (e < 0) {
														
 
															-                e = 0;
														
 
															-                coef[i] = av_clip(coef[i], -16777215, 16777215);
														
 
															-            }
														
 
															-        }
														
 
															-        exp[i] = e;
														
 
															+        exp[i] = v ? 23 - av_log2(v) : 24;
														
 
															     }
														
 
															 }
														
--- a/libavcodec/ac3enc.h
+++ b/libavcodec/ac3enc.h
@@ -50,12 +50,16 @@
 
															 #if CONFIG_AC3ENC_FLOAT
														
 
															 #define AC3_NAME(x) ff_ac3_float_ ## x
														
 
															 #define MAC_COEF(d,a,b) ((d)+=(a)*(b))
														
 
															+#define COEF_MIN (-16777215.0/16777216.0)
														
 
															+#define COEF_MAX ( 16777215.0/16777216.0)
														
 
															 typedef float SampleType;
														
 
															 typedef float CoefType;
														
 
															 typedef float CoefSumType;
														
 
															 #else
														
 
															 #define AC3_NAME(x) ff_ac3_fixed_ ## x
														
 
															 #define MAC_COEF(d,a,b) MAC64(d,a,b)
														
 
															+#define COEF_MIN -16777215
														
 
															+#define COEF_MAX  16777215
														
 
															 typedef int16_t SampleType;
														
 
															 typedef int32_t CoefType;
														
 
															 typedef int64_t CoefSumType;
														
--- a/libavcodec/ac3enc_fixed.c
+++ b/libavcodec/ac3enc_fixed.c
@@ -104,6 +104,15 @@ static void scale_coefficients(AC3EncodeContext *s)
 
															 }
														
 
															+/**
														
 
															+ * Clip MDCT coefficients to allowable range.
														
 
															+ */
														
 
															+static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len)
														
 
															+{
														
 
															+    dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
														
 
															+}
														
 
															+
														
 
															+
														
 
															 static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx)
														
 
															 {
														
 
															     AC3EncodeContext *s = avctx->priv_data;
														
--- a/libavcodec/ac3enc_float.c
+++ b/libavcodec/ac3enc_float.c
@@ -111,6 +111,15 @@ static void scale_coefficients(AC3EncodeContext *s)
 
															 }
														
 
															+/**
														
 
															+ * Clip MDCT coefficients to allowable range.
														
 
															+ */
														
 
															+static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
														
 
															+{
														
 
															+    dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
														
 
															+}
														
 
															+
														
 
															+
														
 
															 #if CONFIG_AC3_ENCODER
														
 
															 AVCodec ff_ac3_float_encoder = {
														
 
															     "ac3_float",
														
--- a/libavcodec/ac3enc_template.c
+++ b/libavcodec/ac3enc_template.c
@@ -41,6 +41,8 @@ static void apply_window(DSPContext *dsp, SampleType *output,
 
															 static int normalize_samples(AC3EncodeContext *s);
														
 
															+static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len);
														
 
															+
														
 
															 int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
														
 
															 {
														
@@ -171,8 +173,8 @@ static void apply_channel_coupling(AC3EncodeContext *s)
 
															                 cpl_coef[i] += ch_coef[i];
														
 
															         }
														
 
															-        /* coefficients must be clipped to +/- 1.0 in order to be encoded */
														
 
															-        s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs);
														
 
															+        /* coefficients must be clipped in order to be encoded */
														
 
															+        clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
														
 
															         /* scale coupling coefficients from float to 24-bit fixed-point */
														
 
															         s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start],
														
@@ -300,6 +302,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
 
															         if (!block->cpl_in_use || !block->new_cpl_coords)
														
 
															             continue;
														
 
															+        clip_coefficients(&s->dsp, cpl_coords[blk][1], s->fbw_channels * 16);
														
 
															         s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
														
 
															                                    cpl_coords[blk][1],
														
 
															                                    s->fbw_channels * 16);
														
@@ -433,7 +436,11 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
 
															     apply_mdct(s);
														
 
															-    scale_coefficients(s);
														
 
															+    if (s->fixed_point)
														
 
															+        scale_coefficients(s);
														
 
															+
														
 
															+    clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
														
 
															+                      AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
														
 
															     s->cpl_on = s->cpl_enabled;
														
 
															     ff_ac3_compute_coupling_strategy(s);
														
@@ -443,6 +450,9 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
 
															     compute_rematrixing_strategy(s);
														
 
															+    if (!s->fixed_point)
														
 
															+        scale_coefficients(s);
														
 
															+
														
 
															     ff_ac3_apply_rematrixing(s);
														
 
															     ff_ac3_process_exponents(s);
														
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2664,6 +2664,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input,
 
															     }
														
 
															 }
														
 
															+static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
														
 
															+                                int32_t max, unsigned int len)
														
 
															+{
														
 
															+    do {
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        *dst++ = av_clip(*src++, min, max);
														
 
															+        len -= 8;
														
 
															+    } while (len > 0);
														
 
															+}
														
 
															+
														
 
															 #define W0 2048
														
 
															 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
														
 
															 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
														
@@ -3106,6 +3122,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 
															     c->scalarproduct_int16 = scalarproduct_int16_c;
														
 
															     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
														
 
															     c->apply_window_int16 = apply_window_int16_c;
														
 
															+    c->vector_clip_int32 = vector_clip_int32_c;
														
 
															     c->scalarproduct_float = scalarproduct_float_c;
														
 
															     c->butterflies_float = butterflies_float_c;
														
 
															     c->vector_fmul_scalar = vector_fmul_scalar_c;
														
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -553,6 +553,22 @@ typedef struct DSPContext {
 
															     void (*apply_window_int16)(int16_t *output, const int16_t *input,
														
 
															                                const int16_t *window, unsigned int len);
														
 
															+    /**
														
 
															+     * Clip each element in an array of int32_t to a given minimum and maximum value.
														
 
															+     * @param dst  destination array
														
 
															+     *             constraints: 16-byte aligned
														
 
															+     * @param src  source array
														
 
															+     *             constraints: 16-byte aligned
														
 
															+     * @param min  minimum value
														
 
															+     *             constraints: must in the the range [-(1<<24), 1<<24]
														
 
															+     * @param max  maximum value
														
 
															+     *             constraints: must in the the range [-(1<<24), 1<<24]
														
 
															+     * @param len  number of elements in the array
														
 
															+     *             constraints: multiple of 32 greater than zero
														
 
															+     */
														
 
															+    void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
														
 
															+                              int32_t max, unsigned int len);
														
 
															+
														
 
															     /* rv30 functions */
														
 
															     qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
														
 
															     qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
														
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -201,19 +201,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){
 
															         }                                                               \
														
 
															     } while (0)
														
 
															-#if ARCH_X86
														
 
															-#   define SKIP_CACHE(name, gb, num)                            \
														
 
															-    __asm__("shldl %2, %1, %0          \n\t"                    \
														
 
															-            "shll  %2, %1              \n\t"                    \
														
 
															-            : "+r" (name##_cache0), "+r" (name##_cache1)        \
														
 
															-            : "Ic" ((uint8_t)(num)))
														
 
															-#else
														
 
															 #   define SKIP_CACHE(name, gb, num) do {               \
														
 
															         name##_cache0 <<= (num);                        \
														
 
															         name##_cache0 |= NEG_USR32(name##_cache1,num);  \
														
 
															         name##_cache1 <<= (num);                        \
														
 
															     } while (0)
														
 
															-#endif
														
 
															 #   define SKIP_COUNTER(name, gb, num) name##_bit_count += (num)
														
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -32,6 +32,11 @@ cextern ac3_bap_bits
 
															 pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
														
 
															 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
														
 
															+; used in ff_ac3_extract_exponents()
														
 
															+pd_1:   times 4 dd 1
														
 
															+pd_151: times 4 dd 151
														
 
															+pb_shuf_4dwb: db 0, 4, 8, 12
														
 
															+
														
 
															 SECTION .text
														
 
															 ;-----------------------------------------------------------------------------
														
@@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
 
															     movd       eax, m0
														
 
															     add        eax, sumd
														
 
															     RET
														
 
															+
														
 
															+;------------------------------------------------------------------------------
														
 
															+; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
														
 
															+;------------------------------------------------------------------------------
														
 
															+
														
 
															+%macro PABSD_MMX 2 ; src/dst, tmp
														
 
															+    pxor     %2, %2
														
 
															+    pcmpgtd  %2, %1
														
 
															+    pxor     %1, %2
														
 
															+    psubd    %1, %2
														
 
															+%endmacro
														
 
															+
														
 
															+%macro PABSD_SSSE3 1-2 ; src/dst, unused
														
 
															+    pabsd    %1, %1
														
 
															+%endmacro
														
 
															+
														
 
															+%ifdef HAVE_AMD3DNOW
														
 
															+INIT_MMX
														
 
															+cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
														
 
															+    add      expq, lenq
														
 
															+    lea     coefq, [coefq+4*lenq]
														
 
															+    neg      lenq
														
 
															+    movq       m3, [pd_1]
														
 
															+    movq       m4, [pd_151]
														
 
															+.loop:
														
 
															+    movq       m0, [coefq+4*lenq  ]
														
 
															+    movq       m1, [coefq+4*lenq+8]
														
 
															+    PABSD_MMX  m0, m2
														
 
															+    PABSD_MMX  m1, m2
														
 
															+    pslld      m0, 1
														
 
															+    por        m0, m3
														
 
															+    pi2fd      m2, m0
														
 
															+    psrld      m2, 23
														
 
															+    movq       m0, m4
														
 
															+    psubd      m0, m2
														
 
															+    pslld      m1, 1
														
 
															+    por        m1, m3
														
 
															+    pi2fd      m2, m1
														
 
															+    psrld      m2, 23
														
 
															+    movq       m1, m4
														
 
															+    psubd      m1, m2
														
 
															+    packssdw   m0, m0
														
 
															+    packuswb   m0, m0
														
 
															+    packssdw   m1, m1
														
 
															+    packuswb   m1, m1
														
 
															+    punpcklwd  m0, m1
														
 
															+    movd  [expq+lenq], m0
														
 
															+    add      lenq, 4
														
 
															+    jl .loop
														
 
															+    REP_RET
														
 
															+%endif
														
 
															+
														
 
															+%macro AC3_EXTRACT_EXPONENTS 1
														
 
															+cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
														
 
															+    add     expq, lenq
														
 
															+    lea    coefq, [coefq+4*lenq]
														
 
															+    neg     lenq
														
 
															+    mova      m2, [pd_1]
														
 
															+    mova      m3, [pd_151]
														
 
															+%ifidn %1, ssse3 ;
														
 
															+    movd      m4, [pb_shuf_4dwb]
														
 
															+%endif
														
 
															+.loop:
														
 
															+    ; move 4 32-bit coefs to xmm0
														
 
															+    mova      m0, [coefq+4*lenq]
														
 
															+    ; absolute value
														
 
															+    PABSD     m0, m1
														
 
															+    ; convert to float and extract exponents
														
 
															+    pslld     m0, 1
														
 
															+    por       m0, m2
														
 
															+    cvtdq2ps  m1, m0
														
 
															+    psrld     m1, 23
														
 
															+    mova      m0, m3
														
 
															+    psubd     m0, m1
														
 
															+    ; move the lowest byte in each of 4 dwords to the low dword
														
 
															+%ifidn %1, ssse3
														
 
															+    pshufb    m0, m4
														
 
															+%else
														
 
															+    packssdw  m0, m0
														
 
															+    packuswb  m0, m0
														
 
															+%endif
														
 
															+    movd  [expq+lenq], m0
														
 
															+
														
 
															+    add     lenq, 4
														
 
															+    jl .loop
														
 
															+    REP_RET
														
 
															+%endmacro
														
 
															+
														
 
															+%ifdef HAVE_SSE
														
 
															+INIT_XMM
														
 
															+%define PABSD PABSD_MMX
														
 
															+AC3_EXTRACT_EXPONENTS sse2
														
 
															+%ifdef HAVE_SSSE3
														
 
															+%define PABSD PABSD_SSSE3
														
 
															+AC3_EXTRACT_EXPONENTS ssse3
														
 
															+%endif
														
 
															+%endif