|
@@ -240,7 +240,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
|
|
|
"movq %%mm2, (%0, %1) \n\t"
|
|
|
"movq %%mm4, (%0, %1, 2) \n\t"
|
|
|
"movq %%mm6, (%0, %2) \n\t"
|
|
|
- ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p)
|
|
|
+ ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)
|
|
|
:"memory");
|
|
|
pix += line_size*4;
|
|
|
p += 32;
|
|
@@ -265,7 +265,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
|
|
|
"movq %%mm2, (%0, %1) \n\t"
|
|
|
"movq %%mm4, (%0, %1, 2) \n\t"
|
|
|
"movq %%mm6, (%0, %2) \n\t"
|
|
|
- ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p)
|
|
|
+ ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)
|
|
|
:"memory");
|
|
|
}
|
|
|
|
|
@@ -349,7 +349,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size
|
|
|
"subl $4, %0 \n\t"
|
|
|
"jnz 1b \n\t"
|
|
|
: "+g"(h), "+r" (pixels), "+r" (block)
|
|
|
- : "r"((long)line_size)
|
|
|
+ : "r"((x86_reg)line_size)
|
|
|
: "%"REG_a, "memory"
|
|
|
);
|
|
|
}
|
|
@@ -375,7 +375,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size
|
|
|
"subl $4, %0 \n\t"
|
|
|
"jnz 1b \n\t"
|
|
|
: "+g"(h), "+r" (pixels), "+r" (block)
|
|
|
- : "r"((long)line_size)
|
|
|
+ : "r"((x86_reg)line_size)
|
|
|
: "%"REG_a, "memory"
|
|
|
);
|
|
|
}
|
|
@@ -409,7 +409,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz
|
|
|
"subl $4, %0 \n\t"
|
|
|
"jnz 1b \n\t"
|
|
|
: "+g"(h), "+r" (pixels), "+r" (block)
|
|
|
- : "r"((long)line_size)
|
|
|
+ : "r"((x86_reg)line_size)
|
|
|
: "%"REG_a, "memory"
|
|
|
);
|
|
|
}
|
|
@@ -431,7 +431,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
|
|
|
"lea (%2,%3,4), %2 \n\t"
|
|
|
"jnz 1b \n\t"
|
|
|
: "+g"(h), "+r" (pixels), "+r" (block)
|
|
|
- : "r"((long)line_size), "r"(3L*line_size)
|
|
|
+ : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
|
|
|
: "memory"
|
|
|
);
|
|
|
}
|
|
@@ -457,7 +457,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
|
|
|
"lea (%2,%3,4), %2 \n\t"
|
|
|
"jnz 1b \n\t"
|
|
|
: "+g"(h), "+r" (pixels), "+r" (block)
|
|
|
- : "r"((long)line_size), "r"(3L*line_size)
|
|
|
+ : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
|
|
|
: "memory"
|
|
|
);
|
|
|
}
|
|
@@ -480,7 +480,7 @@ static void clear_blocks_mmx(DCTELEM *blocks)
|
|
|
}
|
|
|
|
|
|
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
|
|
|
- long i=0;
|
|
|
+ x86_reg i=0;
|
|
|
asm volatile(
|
|
|
"1: \n\t"
|
|
|
"movq (%1, %0), %%mm0 \n\t"
|
|
@@ -495,14 +495,14 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
|
|
|
"cmp %3, %0 \n\t"
|
|
|
" jb 1b \n\t"
|
|
|
: "+r" (i)
|
|
|
- : "r"(src), "r"(dst), "r"((long)w-15)
|
|
|
+ : "r"(src), "r"(dst), "r"((x86_reg)w-15)
|
|
|
);
|
|
|
for(; i<w; i++)
|
|
|
dst[i+0] += src[i+0];
|
|
|
}
|
|
|
|
|
|
static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
|
|
|
- long i=0;
|
|
|
+ x86_reg i=0;
|
|
|
asm volatile(
|
|
|
"1: \n\t"
|
|
|
"movq (%2, %0), %%mm0 \n\t"
|
|
@@ -515,7 +515,7 @@ static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
|
|
|
"cmp %4, %0 \n\t"
|
|
|
" jb 1b \n\t"
|
|
|
: "+r" (i)
|
|
|
- : "r"(dst), "r"(src1), "r"(src2), "r"((long)w-15)
|
|
|
+ : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
|
|
|
);
|
|
|
for(; i<w; i++)
|
|
|
dst[i] = src1[i] + src2[i];
|
|
@@ -689,8 +689,8 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
|
|
|
"movd %%mm6, (%1,%3) \n\t"
|
|
|
:: "r" (src),
|
|
|
"r" (src + 4*stride),
|
|
|
- "r" ((long) stride ),
|
|
|
- "r" ((long)(3*stride))
|
|
|
+ "r" ((x86_reg) stride ),
|
|
|
+ "r" ((x86_reg)(3*stride))
|
|
|
);
|
|
|
}
|
|
|
}
|
|
@@ -723,7 +723,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
|
|
"cmp %3, %0 \n\t"
|
|
|
" jb 1b \n\t"
|
|
|
: "+r" (ptr)
|
|
|
- : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
|
|
+ : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
|
|
|
);
|
|
|
}
|
|
|
else
|
|
@@ -746,7 +746,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
|
|
"cmp %3, %0 \n\t"
|
|
|
" jb 1b \n\t"
|
|
|
: "+r" (ptr)
|
|
|
- : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
|
|
|
+ : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
|
|
|
);
|
|
|
}
|
|
|
|
|
@@ -764,7 +764,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
|
|
"cmp %4, %0 \n\t"
|
|
|
" jb 1b \n\t"
|
|
|
: "+r" (ptr)
|
|
|
- : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
|
|
|
+ : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
|
|
|
);
|
|
|
ptr= last_line + (i + 1) * wrap - w;
|
|
|
asm volatile(
|
|
@@ -778,7 +778,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
|
|
"cmp %4, %0 \n\t"
|
|
|
" jb 1b \n\t"
|
|
|
: "+r" (ptr)
|
|
|
- : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
|
|
|
+ : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
|
|
|
);
|
|
|
}
|
|
|
}
|
|
@@ -786,8 +786,8 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
|
|
|
#define PAETH(cpu, abs3)\
|
|
|
void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
|
|
|
{\
|
|
|
- long i = -bpp;\
|
|
|
- long end = w-3;\
|
|
|
+ x86_reg i = -bpp;\
|
|
|
+ x86_reg end = w-3;\
|
|
|
asm volatile(\
|
|
|
"pxor %%mm7, %%mm7 \n"\
|
|
|
"movd (%1,%0), %%mm0 \n"\
|
|
@@ -830,7 +830,7 @@ void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, in
|
|
|
"cmp %5, %0 \n"\
|
|
|
"jle 1b \n"\
|
|
|
:"+r"(i)\
|
|
|
- :"r"(dst), "r"(top), "r"(src), "r"((long)bpp), "g"(end),\
|
|
|
+ :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
|
|
|
"m"(ff_pw_255)\
|
|
|
:"memory"\
|
|
|
);\
|
|
@@ -994,7 +994,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, in
|
|
|
"decl %2 \n\t"\
|
|
|
" jnz 1b \n\t"\
|
|
|
: "+a"(src), "+c"(dst), "+D"(h)\
|
|
|
- : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
|
|
|
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
|
|
|
: "memory"\
|
|
|
);\
|
|
|
}\
|
|
@@ -1105,7 +1105,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int
|
|
|
"decl %2 \n\t"\
|
|
|
" jnz 1b \n\t"\
|
|
|
: "+a"(src), "+c"(dst), "+d"(h)\
|
|
|
- : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
|
|
|
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
|
|
|
: "memory"\
|
|
|
);\
|
|
|
}\
|
|
@@ -1169,7 +1169,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
|
|
|
"decl %2 \n\t"\
|
|
|
" jnz 1b \n\t"\
|
|
|
: "+r" (src), "+r" (temp_ptr), "+r"(count)\
|
|
|
- : "r" ((long)srcStride)\
|
|
|
+ : "r" ((x86_reg)srcStride)\
|
|
|
: "memory"\
|
|
|
);\
|
|
|
\
|
|
@@ -1216,7 +1216,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
|
|
|
" jnz 1b \n\t"\
|
|
|
\
|
|
|
: "+r"(temp_ptr), "+r"(dst), "+g"(count)\
|
|
|
- : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
|
|
|
+ : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\
|
|
|
:"memory"\
|
|
|
);\
|
|
|
}\
|
|
@@ -1241,7 +1241,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
|
|
|
"decl %2 \n\t"\
|
|
|
" jnz 1b \n\t"\
|
|
|
: "+r" (src), "+r" (temp_ptr), "+r"(count)\
|
|
|
- : "r" ((long)srcStride)\
|
|
|
+ : "r" ((x86_reg)srcStride)\
|
|
|
: "memory"\
|
|
|
);\
|
|
|
\
|
|
@@ -1276,7 +1276,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
|
|
|
" jnz 1b \n\t"\
|
|
|
\
|
|
|
: "+r"(temp_ptr), "+r"(dst), "+g"(count)\
|
|
|
- : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
|
|
|
+ : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\
|
|
|
: "memory"\
|
|
|
);\
|
|
|
}\
|
|
@@ -1839,7 +1839,7 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
|
|
|
}
|
|
|
|
|
|
static void vector_fmul_3dnow(float *dst, const float *src, int len){
|
|
|
- long i = (len-4)*4;
|
|
|
+ x86_reg i = (len-4)*4;
|
|
|
asm volatile(
|
|
|
"1: \n\t"
|
|
|
"movq (%1,%0), %%mm0 \n\t"
|
|
@@ -1857,7 +1857,7 @@ static void vector_fmul_3dnow(float *dst, const float *src, int len){
|
|
|
);
|
|
|
}
|
|
|
static void vector_fmul_sse(float *dst, const float *src, int len){
|
|
|
- long i = (len-8)*4;
|
|
|
+ x86_reg i = (len-8)*4;
|
|
|
asm volatile(
|
|
|
"1: \n\t"
|
|
|
"movaps (%1,%0), %%xmm0 \n\t"
|
|
@@ -1875,7 +1875,7 @@ static void vector_fmul_sse(float *dst, const float *src, int len){
|
|
|
}
|
|
|
|
|
|
static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
|
|
|
- long i = len*4-16;
|
|
|
+ x86_reg i = len*4-16;
|
|
|
asm volatile(
|
|
|
"1: \n\t"
|
|
|
"pswapd 8(%1), %%mm0 \n\t"
|
|
@@ -1893,7 +1893,7 @@ static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const floa
|
|
|
asm volatile("femms");
|
|
|
}
|
|
|
static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
|
|
|
- long i = len*4-32;
|
|
|
+ x86_reg i = len*4-32;
|
|
|
asm volatile(
|
|
|
"1: \n\t"
|
|
|
"movaps 16(%1), %%xmm0 \n\t"
|
|
@@ -1914,7 +1914,7 @@ static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *
|
|
|
|
|
|
static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
|
|
|
const float *src2, int src3, int len, int step){
|
|
|
- long i = (len-4)*4;
|
|
|
+ x86_reg i = (len-4)*4;
|
|
|
if(step == 2 && src3 == 0){
|
|
|
dst += (len-4)*2;
|
|
|
asm volatile(
|
|
@@ -1963,7 +1963,7 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float
|
|
|
}
|
|
|
static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
|
|
|
const float *src2, int src3, int len, int step){
|
|
|
- long i = (len-8)*4;
|
|
|
+ x86_reg i = (len-8)*4;
|
|
|
if(step == 2 && src3 == 0){
|
|
|
dst += (len-8)*2;
|
|
|
asm volatile(
|