Browse Source

ARM: add some PLD in NEON IDCT

Originally committed as revision 18972 to svn://svn.ffmpeg.org/ffmpeg/trunk
Måns Rullgård 16 years ago
parent
commit
c9311a12d5
1 changed files with 15 additions and 2 deletions
  1. 15 2
      libavcodec/arm/simple_idct_neon.S

+ 15 - 2
libavcodec/arm/simple_idct_neon.S

@@ -68,6 +68,19 @@
         .text
         .align 6
 
+function idct_row4_pld_neon
+        pld             [r0]
+        add             r3,  r0,  r1,  lsl #2
+        pld             [r0, r1]
+        pld             [r0, r1, lsl #1]
+        pld             [r3, -r1]
+        pld             [r3]
+        pld             [r3, r1]
+        add             r3,  r3,  r1,  lsl #1
+        pld             [r3]
+        pld             [r3, r1]
+        .endfunc
+
 function idct_row4_neon
         vmov.i32        q15, #(1<<(ROW_SHIFT-1))
         vld1.64         {d2-d5},  [r2,:128]!
@@ -252,7 +265,7 @@ idct_coeff_neon:
 function ff_simple_idct_put_neon, export=1
         idct_start      r2
 
-        bl              idct_row4_neon
+        bl              idct_row4_pld_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon
@@ -307,7 +320,7 @@ function idct_col4_add8_neon
 function ff_simple_idct_add_neon, export=1
         idct_start      r2
 
-        bl              idct_row4_neon
+        bl              idct_row4_pld_neon
         bl              idct_row4_neon
         add             r2,  r2,  #-128
         bl              idct_col4_neon