Просмотр исходного кода

VC1: don't use vc1_put_block() in vc1_decode_i_blocks_adv().

Advanced profile never uses "range reduction", so vc1_put_block() quite
literally just calls put_pixels_clamped() from vc1_decode_i_blocks_adv().
By inlining the function, we can prevent calling IDCT8x8 if
CODEC_FLAG_GRAY is set, and we don't have to scale the coeffs in the
[0,256] range, but can instead use put_signed_pixels_clamped().
(cherry picked from commit 70aa916e4630bcec14439a2d703074b6d4c890a8)
Ronald S. Bultje 14 лет назад
Родитель
Сommit
a8858ee11c
1 измененных файлов с 11 добавлено и 3 удалено
  1. 11 3
      libavcodec/vc1dec.c

+ 11 - 3
libavcodec/vc1dec.c

@@ -2704,7 +2704,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
  */
  */
 static void vc1_decode_i_blocks_adv(VC1Context *v)
 static void vc1_decode_i_blocks_adv(VC1Context *v)
 {
 {
-    int k, j;
+    int k;
     MpegEncContext *s = &v->s;
     MpegEncContext *s = &v->s;
     int cbp, val;
     int cbp, val;
     uint8_t *coded_val;
     uint8_t *coded_val;
@@ -2747,7 +2747,14 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
         s->mb_x = 0;
         s->mb_x = 0;
         ff_init_block_index(s);
         ff_init_block_index(s);
         for(;s->mb_x < s->mb_width; s->mb_x++) {
         for(;s->mb_x < s->mb_width; s->mb_x++) {
+            uint8_t *dst[6];
             ff_update_block_index(s);
             ff_update_block_index(s);
+            dst[0] = s->dest[0];
+            dst[1] = dst[0] + 8;
+            dst[2] = s->dest[0] + s->linesize * 8;
+            dst[3] = dst[2] + 8;
+            dst[4] = s->dest[1];
+            dst[5] = s->dest[2];
             s->dsp.clear_blocks(s->block[0]);
             s->dsp.clear_blocks(s->block[0]);
             mb_pos = s->mb_x + s->mb_y * s->mb_stride;
             mb_pos = s->mb_x + s->mb_y * s->mb_stride;
             s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
             s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
@@ -2791,11 +2798,12 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
 
 
                 vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
                 vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
 
 
+                if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue;
                 v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
                 v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
-                for(j = 0; j < 64; j++) s->block[k][j] += 128;
+                s->dsp.put_signed_pixels_clamped(s->block[k], dst[k],
+                                                 k & 4 ? s->uvlinesize : s->linesize);
             }
             }
 
 
-            vc1_put_block(v, s->block);
             if(overlap) {
             if(overlap) {
                 if(s->mb_x) {
                 if(s->mb_x) {
                     v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
                     v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);