|
@@ -66,38 +66,47 @@ dither_8to16(const uint8_t *srcDither, int rot)
|
|
|
}
|
|
|
#endif
|
|
|
|
|
|
-static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
|
|
|
- const int16_t *chrUSrc, const int16_t *chrVSrc,
|
|
|
- const int16_t *alpSrc,
|
|
|
- uint8_t *dst[4], int dstW, int chrDstW)
|
|
|
+static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
|
|
|
+ const int16_t **src, uint8_t *dest, int dstW,
|
|
|
+ const uint8_t *dither, int offset)
|
|
|
{
|
|
|
- int p= 4;
|
|
|
- const int16_t *src[4]= {
|
|
|
- lumSrc + dstW, chrUSrc + chrDstW,
|
|
|
- chrVSrc + chrDstW, alpSrc + dstW
|
|
|
- };
|
|
|
- x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
|
|
|
-
|
|
|
- while (p--) {
|
|
|
- if (dst[p]) {
|
|
|
- __asm__ volatile(
|
|
|
- "mov %2, %%"REG_a" \n\t"
|
|
|
- ".p2align 4 \n\t" /* FIXME Unroll? */
|
|
|
- "1: \n\t"
|
|
|
- "movq (%0, %%"REG_a", 2), %%mm0 \n\t"
|
|
|
- "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"
|
|
|
- "psraw $7, %%mm0 \n\t"
|
|
|
- "psraw $7, %%mm1 \n\t"
|
|
|
- "packuswb %%mm1, %%mm0 \n\t"
|
|
|
- MOVNTQ(%%mm0, (%1, %%REGa))
|
|
|
- "add $8, %%"REG_a" \n\t"
|
|
|
- "jnc 1b \n\t"
|
|
|
- :: "r" (src[p]), "r" (dst[p] + counter[p]),
|
|
|
- "g" (-counter[p])
|
|
|
- : "%"REG_a
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
+ dither_8to16(dither, offset);
|
|
|
+ __asm__ volatile(\
|
|
|
+ "psraw $4, %%mm3\n\t"
|
|
|
+ "psraw $4, %%mm4\n\t"
|
|
|
+ "movq %%mm3, %%mm6\n\t"
|
|
|
+ "movq %%mm4, %%mm7\n\t"
|
|
|
+ "movslq %3, %%"REG_c"\n\t"
|
|
|
+ "mov %0, %%"REG_d" \n\t"\
|
|
|
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
+ ".p2align 4 \n\t" /* FIXME Unroll? */\
|
|
|
+ "1: \n\t"\
|
|
|
+ "movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\
|
|
|
+ "movq (%%"REG_S", %%"REG_c", 2), %%mm2 \n\t" /* srcData */\
|
|
|
+ "movq 8(%%"REG_S", %%"REG_c", 2), %%mm5 \n\t" /* srcData */\
|
|
|
+ "add $16, %%"REG_d" \n\t"\
|
|
|
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
+ "test %%"REG_S", %%"REG_S" \n\t"\
|
|
|
+ "pmulhw %%mm0, %%mm2 \n\t"\
|
|
|
+ "pmulhw %%mm0, %%mm5 \n\t"\
|
|
|
+ "paddw %%mm2, %%mm3 \n\t"\
|
|
|
+ "paddw %%mm5, %%mm4 \n\t"\
|
|
|
+ " jnz 1b \n\t"\
|
|
|
+ "psraw $3, %%mm3 \n\t"\
|
|
|
+ "psraw $3, %%mm4 \n\t"\
|
|
|
+ "packuswb %%mm4, %%mm3 \n\t"
|
|
|
+ MOVNTQ2 " %%mm3, (%1, %%"REG_c")\n\t"
|
|
|
+ "add $8, %%"REG_c" \n\t"\
|
|
|
+ "cmp %2, %%"REG_c" \n\t"\
|
|
|
+ "movq %%mm6, %%mm3\n\t"
|
|
|
+ "movq %%mm7, %%mm4\n\t"
|
|
|
+ "mov %0, %%"REG_d" \n\t"\
|
|
|
+ "mov (%%"REG_d"), %%"REG_S" \n\t"\
|
|
|
+ "jb 1b \n\t"\
|
|
|
+ :: "g" (filter),
|
|
|
+ "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset)
|
|
|
+ : "%"REG_d, "%"REG_S, "%"REG_c
|
|
|
+ );
|
|
|
}
|
|
|
|
|
|
static void RENAME(yuv2yuv1_ar)(const int16_t *src, uint8_t *dst, int dstW, const uint8_t *dither, int offset)
|
|
@@ -1869,7 +1878,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
|
|
|
{
|
|
|
enum PixelFormat srcFormat = c->srcFormat,
|
|
|
dstFormat = c->dstFormat;
|
|
|
-
|
|
|
+ c->use_mmx_vfilter= 0;
|
|
|
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
|
|
|
&& dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
|
|
|
c->yuv2plane1 = RENAME(yuv2yuv1_ar );
|
|
@@ -1886,7 +1895,10 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
|
|
|
}
|
|
|
}
|
|
|
} else {
|
|
|
- //c->yuv2yuv1 = RENAME(yuv2yuv1 );
|
|
|
+ int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
|
|
|
+ //c->yuv2plane1 = should_dither ? RENAME(yuv2yuv1_ar ) : RENAME(yuv2yuv1 );
|
|
|
+ c->use_mmx_vfilter= 1;
|
|
|
+ c->yuv2planeX = RENAME(yuv2yuvX );
|
|
|
if (!(c->flags & SWS_FULL_CHR_H_INT)) {
|
|
|
switch (c->dstFormat) {
|
|
|
case PIX_FMT_RGB32: c->yuv2packedX = RENAME(yuv2rgb32_X); break;
|