13 years ago · 3b2d285afb
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -220,7 +220,7 @@ yuv2yuvX_altivec_real(SwsContext *c,
 
				     }
			
 
				 }
			
 
				 
			
 
				-static void hScale_altivec_real(int16_t *dst, int dstW,
			
 
				+static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW,
			
 
				                                 const uint8_t *src, const int16_t *filter,
			
 
				                                 const int16_t *filterPos, int filterSize)
			
 
				 {
			
@@ -406,7 +406,9 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
 
				     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
			
 
				         return;
			
 
				 
			
 
				-    c->hScale       = hScale_altivec_real;
			
 
				+    if (c->scalingBpp == 8) {
			
 
				+        c->hScale       = hScale_altivec_real;
			
 
				+    }
			
 
				     if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
			
 
				         dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
			
 
				         !c->alpPixBuf) {
			
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -77,8 +77,7 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
 
				 typedef void (*yuv2planar1_fn) (struct SwsContext *c,
			
 
				                                 const int16_t *lumSrc, const int16_t *chrUSrc,
			
 
				                                 const int16_t *chrVSrc, const int16_t *alpSrc,
			
 
				-                                uint8_t *dest[4], int dstW, int chrDstW,
			
 
				-                                const uint8_t *lumDither, const uint8_t *chrDither);
			
 
				+                                uint8_t *dest[4], int dstW, int chrDstW);
			
 
				 /**
			
 
				  * Write one line of horizontally scaled Y/U/V/A to planar output
			
 
				  * with multi-point vertical scaling between input pixels.
			
@@ -101,7 +100,7 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
 
				                                 const int16_t *chrFilter, const int16_t **chrUSrc,
			
 
				                                 const int16_t **chrVSrc,  int chrFilterSize,
			
 
				                                 const int16_t **alpSrc, uint8_t *dest[4],
			
 
				-                                int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither);
			
 
				+                                int dstW, int chrDstW);
			
 
				 /**
			
 
				  * Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
			
 
				  * output without any additional vertical scaling (or point-scaling). Note
			
@@ -210,6 +209,7 @@ typedef struct SwsContext {
 
				     enum PixelFormat srcFormat;   ///< Source      pixel format.
			
 
				     int dstFormatBpp;             ///< Number of bits per pixel of the destination pixel format.
			
 
				     int srcFormatBpp;             ///< Number of bits per pixel of the source      pixel format.
			
 
				+    int scalingBpp;
			
 
				     int chrSrcHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source      image.
			
 
				     int chrSrcVSubSample;         ///< Binary logarithm of vertical   subsampling factor between luma/alpha and chroma planes in source      image.
			
 
				     int chrDstHSubSample;         ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
			
@@ -324,7 +324,7 @@ typedef struct SwsContext {
 
				 #define UV_OFF                "11*8+4*4*256*3+48"
			
 
				 #define UV_OFFx2              "11*8+4*4*256*3+56"
			
 
				 #define DITHER16              "11*8+4*4*256*3+64"
			
 
				-#define DITHER32              "11*8+4*4*256*3+64+16"
			
 
				+#define DITHER32              "11*8+4*4*256*3+80"
			
 
				 
			
 
				     DECLARE_ALIGNED(8, uint64_t, redDither);
			
 
				     DECLARE_ALIGNED(8, uint64_t, greenDither);
			
@@ -352,6 +352,8 @@ typedef struct SwsContext {
 
				     uint16_t dither16[8];
			
 
				     uint32_t dither32[8];
			
 
				 
			
 
				+    const uint8_t *chrDither8, *lumDither8;
			
 
				+
			
 
				 #if HAVE_ALTIVEC
			
 
				     vector signed short   CY;
			
 
				     vector signed short   CRV;
			
@@ -451,7 +453,7 @@ typedef struct SwsContext {
 
				      *                   (and input coefficients thus padded with zeroes)
			
 
				      *                   to simplify creating SIMD code.
			
 
				      */
			
 
				-    void (*hScale)(int16_t *dst, int dstW, const uint8_t *src,
			
 
				+    void (*hScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src,
			
 
				                    const int16_t *filter, const int16_t *filterPos,
			
 
				                    int filterSize);
			
 
				 
			
@@ -462,6 +464,15 @@ typedef struct SwsContext {
 
				     void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
			
 
				     void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
			
 
				 
			
 
				+    /**
			
 
				+     * dst[..] = (src[..] << 8) | src[..];
			
 
				+     */
			
 
				+    void (*scale8To16Rv)(uint16_t *dst, const uint8_t *src, int len);
			
 
				+    /**
			
 
				+     * dst[..] = src[..] >> 4;
			
 
				+     */
			
 
				+    void (*scale19To15Fw)(int16_t *dst, const int32_t *src, int len);
			
 
				+
			
 
				     int needs_hcscale; ///< Set if there are chroma planes to be converted.
			
 
				 
			
 
				 } SwsContext;
			
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -46,6 +46,7 @@
 
				 #include "libavutil/bswap.h"
			
 
				 #include "libavutil/opt.h"
			
 
				 #include "libavutil/pixdesc.h"
			
 
				+#include "libavutil/avassert.h"
			
 
				 
			
 
				 unsigned swscale_version(void)
			
 
				 {
			
@@ -777,7 +778,7 @@ SwsContext *sws_alloc_context(void)
 
				 
			
 
				 int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
			
 
				 {
			
 
				-    int i;
			
 
				+    int i, j;
			
 
				     int usesVFilter, usesHFilter;
			
 
				     int unscaled;
			
 
				     SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
			
@@ -785,7 +786,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
				     int srcH= c->srcH;
			
 
				     int dstW= c->dstW;
			
 
				     int dstH= c->dstH;
			
 
				-    int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16), dst_stride_px = dst_stride >> 1;
			
 
				+    int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16);
			
 
				     int flags, cpu_flags;
			
 
				     enum PixelFormat srcFormat= c->srcFormat;
			
 
				     enum PixelFormat dstFormat= c->dstFormat;
			
@@ -882,8 +883,14 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
				         }
			
 
				     }
			
 
				 
			
 
				+    c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
			
 
				+                          av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 15 ? 16 : 8;
			
 
				+
			
 
				+    if (c->scalingBpp == 16)
			
 
				+        dst_stride <<= 1;
			
 
				+    av_assert0(c->scalingBpp<=16);
			
 
				     FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
			
 
				-    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
			
 
				+    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) {
			
 
				         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
			
 
				         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
			
 
				             if (flags&SWS_PRINT_INFO)
			
@@ -909,7 +916,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
				             c->chrXInc+= 20;
			
 
				         }
			
 
				         //we don't use the x86 asm scaler if MMX is available
			
 
				-        else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
			
 
				+        else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->scalingBpp == 8) {
			
 
				             c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
			
 
				             c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
			
 
				         }
			
@@ -1040,12 +1047,12 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
				         FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+1, fail);
			
 
				         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
			
 
				     }
			
 
				-    c->uv_off = dst_stride_px;
			
 
				+    c->uv_off = dst_stride>>1;
			
 
				     c->uv_offx2 = dst_stride;
			
 
				     for (i=0; i<c->vChrBufSize; i++) {
			
 
				         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail);
			
 
				         c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
			
 
				-        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + dst_stride_px;
			
 
				+        c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + (dst_stride >> 1);
			
 
				     }
			
 
				     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
			
 
				         for (i=0; i<c->vLumBufSize; i++) {
			
@@ -1055,7 +1062,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
				 
			
 
				     //try to avoid drawing green stuff between the right end and the stride end
			
 
				     for (i=0; i<c->vChrBufSize; i++)
			
 
				-        memset(c->chrUPixBuf[i], 64, dst_stride*2+1);
			
 
				+        if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
			
 
				+            av_assert0(c->scalingBpp == 16);
			
 
				+            for(j=0; j<dst_stride/2+1; j++)
			
 
				+                ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
			
 
				+        } else
			
 
				+            for(j=0; j<dst_stride+1; j++)
			
 
				+                ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
			
 
				 
			
 
				     assert(c->chrDstH <= dstH);
			
 
				 
			
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -70,26 +70,62 @@
 
				         : "%"REG_d, "%"REG_S\
			
 
				     );
			
 
				 
			
 
				+#if !COMPILE_TEMPLATE_MMX2
			
 
				+static av_always_inline void
			
 
				+dither_8to16(SwsContext *c, const uint8_t *srcDither, int rot)
			
 
				+{
			
 
				+    if (rot) {
			
 
				+        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
			
 
				+                         "movq       (%0), %%mm3\n\t"
			
 
				+                         "movq      %%mm3, %%mm4\n\t"
			
 
				+                         "psrlq       $24, %%mm3\n\t"
			
 
				+                         "psllq       $40, %%mm4\n\t"
			
 
				+                         "por       %%mm4, %%mm3\n\t"
			
 
				+                         "movq      %%mm3, %%mm4\n\t"
			
 
				+                         "punpcklbw %%mm0, %%mm3\n\t"
			
 
				+                         "punpckhbw %%mm0, %%mm4\n\t"
			
 
				+                         "psraw        $4, %%mm3\n\t"
			
 
				+                         "psraw        $4, %%mm4\n\t"
			
 
				+                         "movq      %%mm3, "DITHER16"+0(%1)\n\t"
			
 
				+                         "movq      %%mm4, "DITHER16"+8(%1)\n\t"
			
 
				+                         :: "r"(srcDither), "r"(&c->redDither)
			
 
				+                         );
			
 
				+    } else {
			
 
				+        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
			
 
				+                         "movq       (%0), %%mm3\n\t"
			
 
				+                         "movq      %%mm3, %%mm4\n\t"
			
 
				+                         "punpcklbw %%mm0, %%mm3\n\t"
			
 
				+                         "punpckhbw %%mm0, %%mm4\n\t"
			
 
				+                         "psraw        $4, %%mm3\n\t"
			
 
				+                         "psraw        $4, %%mm4\n\t"
			
 
				+                         "movq      %%mm3, "DITHER16"+0(%1)\n\t"
			
 
				+                         "movq      %%mm4, "DITHER16"+8(%1)\n\t"
			
 
				+                         :: "r"(srcDither), "r"(&c->redDither)
			
 
				+                         );
			
 
				+    }
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
			
 
				                              const int16_t **lumSrc, int lumFilterSize,
			
 
				                              const int16_t *chrFilter, const int16_t **chrUSrc,
			
 
				                              const int16_t **chrVSrc,
			
 
				                              int chrFilterSize, const int16_t **alpSrc,
			
 
				-                             uint8_t *dest[4], int dstW, int chrDstW,
			
 
				-                             const uint8_t *lumDither, const uint8_t *chrDither)
			
 
				+                             uint8_t *dest[4], int dstW, int chrDstW)
			
 
				 {
			
 
				     int i;
			
 
				     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
			
 
				             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
			
 
				+    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
			
 
				 
			
 
				     if (uDest) {
			
 
				-        x86_reg uv_off = c->uv_off;
			
 
				-        for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4;
			
 
				+        x86_reg uv_off = c->uv_offx2 >> 1;
			
 
				+        dither_8to16(c, chrDither, 0);
			
 
				         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
			
 
				-        for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4;
			
 
				+        dither_8to16(c, chrDither, 1);
			
 
				         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
			
 
				     }
			
 
				-    for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4;
			
 
				+    dither_8to16(c, lumDither, 0);
			
 
				     if (CONFIG_SWSCALE_ALPHA && aDest) {
			
 
				         YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
			
 
				     }
			
@@ -104,10 +140,6 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
 
				         "movq                  "DITHER32"+8(%0), %%mm5      \n\t"\
			
 
				         "movq                 "DITHER32"+16(%0), %%mm6      \n\t"\
			
 
				         "movq                 "DITHER32"+24(%0), %%mm7      \n\t"\
			
 
				-        "pxor                             %%mm4, %%mm4      \n\t"\
			
 
				-        "pxor                             %%mm5, %%mm5      \n\t"\
			
 
				-        "pxor                             %%mm6, %%mm6      \n\t"\
			
 
				-        "pxor                             %%mm7, %%mm7      \n\t"\
			
 
				         "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
			
 
				         ".p2align                             4             \n\t"\
			
 
				         "1:                                                 \n\t"\
			
@@ -157,26 +189,87 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
 
				         : "%"REG_a, "%"REG_d, "%"REG_S\
			
 
				     );
			
 
				 
			
 
				+#if !COMPILE_TEMPLATE_MMX2
			
 
				+static av_always_inline void
			
 
				+dither_8to32(SwsContext *c, const uint8_t *srcDither, int rot)
			
 
				+{
			
 
				+int i;
			
 
				+if(rot) for(i=0; i<8; i++) c->dither32[i] = srcDither[(i+3)&7]<<12;
			
 
				+else    for(i=0; i<8; i++) c->dither32[i] = srcDither[i&7]<<12;
			
 
				+return;
			
 
				+
			
 
				+    if (rot) {
			
 
				+        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
			
 
				+                         "movq       (%0), %%mm4\n\t"
			
 
				+                         "movq      %%mm4, %%mm5\n\t"
			
 
				+                         "psrlq       $24, %%mm4\n\t"
			
 
				+                         "psllq       $40, %%mm5\n\t"
			
 
				+                         "por       %%mm5, %%mm4\n\t"
			
 
				+                         "movq      %%mm4, %%mm6\n\t"
			
 
				+                         "punpcklbw %%mm0, %%mm4\n\t"
			
 
				+                         "punpckhbw %%mm0, %%mm6\n\t"
			
 
				+                         "movq      %%mm4, %%mm5\n\t"
			
 
				+                         "movq      %%mm6, %%mm7\n\t"
			
 
				+                         "punpcklwd %%mm0, %%mm4\n\t"
			
 
				+                         "punpckhwd %%mm0, %%mm5\n\t"
			
 
				+                         "punpcklwd %%mm0, %%mm6\n\t"
			
 
				+                         "punpckhwd %%mm0, %%mm7\n\t"
			
 
				+                         "psllw       $12, %%mm4\n\t"
			
 
				+                         "psllw       $12, %%mm5\n\t"
			
 
				+                         "psllw       $12, %%mm6\n\t"
			
 
				+                         "psllw       $12, %%mm7\n\t"
			
 
				+                         "movq      %%mm4, "DITHER32"+0(%1)\n\t"
			
 
				+                         "movq      %%mm5, "DITHER32"+8(%1)\n\t"
			
 
				+                         "movq      %%mm6, "DITHER32"+16(%1)\n\t"
			
 
				+                         "movq      %%mm7, "DITHER32"+24(%1)\n\t"
			
 
				+                         :: "r"(srcDither), "r"(&c->redDither)
			
 
				+                         );
			
 
				+    } else {
			
 
				+        __asm__ volatile("pxor      %%mm0, %%mm0\n\t"
			
 
				+                         "movq       (%0), %%mm4\n\t"
			
 
				+                         "movq      %%mm4, %%mm6\n\t"
			
 
				+                         "punpcklbw %%mm0, %%mm4\n\t"
			
 
				+                         "punpckhbw %%mm0, %%mm6\n\t"
			
 
				+                         "movq      %%mm4, %%mm5\n\t"
			
 
				+                         "movq      %%mm6, %%mm7\n\t"
			
 
				+                         "punpcklwd %%mm0, %%mm4\n\t"
			
 
				+                         "punpckhwd %%mm0, %%mm5\n\t"
			
 
				+                         "punpcklwd %%mm0, %%mm6\n\t"
			
 
				+                         "punpckhwd %%mm0, %%mm7\n\t"
			
 
				+                         "psllw       $12, %%mm4\n\t"
			
 
				+                         "psllw       $12, %%mm5\n\t"
			
 
				+                         "psllw       $12, %%mm6\n\t"
			
 
				+                         "psllw       $12, %%mm7\n\t"
			
 
				+                         "movq      %%mm4, "DITHER32"+0(%1)\n\t"
			
 
				+                         "movq      %%mm5, "DITHER32"+8(%1)\n\t"
			
 
				+                         "movq      %%mm6, "DITHER32"+16(%1)\n\t"
			
 
				+                         "movq      %%mm7, "DITHER32"+24(%1)\n\t"
			
 
				+                         :: "r"(srcDither), "r"(&c->redDither)
			
 
				+                         );
			
 
				+    }
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
			
 
				                                 const int16_t **lumSrc, int lumFilterSize,
			
 
				                                 const int16_t *chrFilter, const int16_t **chrUSrc,
			
 
				                                 const int16_t **chrVSrc,
			
 
				                                 int chrFilterSize, const int16_t **alpSrc,
			
 
				-                                uint8_t *dest[4], int dstW, int chrDstW,
			
 
				-                                const uint8_t *lumDither, const uint8_t *chrDither)
			
 
				+                                uint8_t *dest[4], int dstW, int chrDstW)
			
 
				 {
			
 
				     int i;
			
 
				     uint8_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
			
 
				             *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
			
 
				+    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
			
 
				 
			
 
				     if (uDest) {
			
 
				-        x86_reg uv_off = c->uv_off;
			
 
				-        for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12;
			
 
				+        x86_reg uv_off = c->uv_offx2 >> 1;
			
 
				+        dither_8to32(c, chrDither, 0);
			
 
				         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
			
 
				-        for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12;
			
 
				+        dither_8to32(c, chrDither, 1);
			
 
				         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off)
			
 
				     }
			
 
				-    for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12;
			
 
				+    dither_8to32(c, lumDither, 0);
			
 
				     if (CONFIG_SWSCALE_ALPHA && aDest) {
			
 
				         YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0)
			
 
				     }
			
@@ -187,8 +280,7 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
 
				 static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
			
 
				                              const int16_t *chrUSrc, const int16_t *chrVSrc,
			
 
				                              const int16_t *alpSrc,
			
 
				-                             uint8_t *dst[4], int dstW, int chrDstW,
			
 
				-                             const uint8_t *lumDither, const uint8_t *chrDither)
			
 
				+                             uint8_t *dst[4], int dstW, int chrDstW)
			
 
				 {
			
 
				     int p= 4;
			
 
				     const int16_t *src[4]= {
			
@@ -222,8 +314,7 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
 
				 static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
			
 
				                                 const int16_t *chrUSrc, const int16_t *chrVSrc,
			
 
				                                 const int16_t *alpSrc,
			
 
				-                                uint8_t *dst[4], int dstW, int chrDstW,
			
 
				-                                const uint8_t *lumDither, const uint8_t *chrDither)
			
 
				+                                uint8_t *dst[4], int dstW, int chrDstW)
			
 
				 {
			
 
				     int p= 4;
			
 
				     const int16_t *src[4]= {
			
@@ -231,15 +322,16 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 
				         chrVSrc + chrDstW, alpSrc + dstW
			
 
				     };
			
 
				     x86_reg counter[4]= { dstW, chrDstW, chrDstW, dstW };
			
 
				+    const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
			
 
				 
			
 
				     while (p--) {
			
 
				         if (dst[p]) {
			
 
				             int i;
			
 
				-            for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i];
			
 
				+            for(i=0; i<8; i++) c->dither16[i] = (p == 2 || p == 3) ? lumDither[i] : chrDither[i];
			
 
				             __asm__ volatile(
			
 
				                 "mov %2, %%"REG_a"                    \n\t"
			
 
				-                "movq               0(%3), %%mm6      \n\t"
			
 
				-                "movq               8(%3), %%mm7      \n\t"
			
 
				+                "movq    "DITHER16"+0(%3), %%mm6      \n\t"
			
 
				+                "movq    "DITHER16"+8(%3), %%mm7      \n\t"
			
 
				                 ".p2align                4            \n\t" /* FIXME Unroll? */
			
 
				                 "1:                                   \n\t"
			
 
				                 "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"
			
@@ -253,7 +345,7 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 
				                 "add                   $8, %%"REG_a"  \n\t"
			
 
				                 "jnc                   1b             \n\t"
			
 
				                 :: "r" (src[p]), "r" (dst[p] + counter[p]),
			
 
				-                   "g" (-counter[p]), "r"(c->dither16)
			
 
				+                   "g" (-counter[p]), "r"(&c->redDither)
			
 
				                 : "%"REG_a
			
 
				             );
			
 
				         }
			
@@ -485,7 +577,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
			
 
				         YSCALEYUV2PACKEDX_ACCURATE
			
@@ -518,7 +610,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
			
 
				         YSCALEYUV2PACKEDX
			
@@ -575,7 +667,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX_ACCURATE
			
 
				     YSCALEYUV2RGBX
			
@@ -599,7 +691,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX
			
 
				     YSCALEYUV2RGBX
			
@@ -652,7 +744,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX_ACCURATE
			
 
				     YSCALEYUV2RGBX
			
@@ -676,7 +768,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX
			
 
				     YSCALEYUV2RGBX
			
@@ -809,7 +901,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX_ACCURATE
			
 
				     YSCALEYUV2RGBX
			
@@ -833,7 +925,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX
			
 
				     YSCALEYUV2RGBX
			
@@ -874,7 +966,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX_ACCURATE
			
 
				     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
			
@@ -895,7 +987,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
 
				 {
			
 
				     x86_reg dummy=0;
			
 
				     x86_reg dstW_reg = dstW;
			
 
				-    x86_reg uv_off = c->uv_off << 1;
			
 
				+    x86_reg uv_off = c->uv_offx2;
			
 
				 
			
 
				     YSCALEYUV2PACKEDX
			
 
				     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
			
@@ -1637,32 +1729,6 @@ static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
 
				     assert(src1 == src2);
			
 
				 }
			
 
				 
			
 
				-static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV,
			
 
				-                           const uint8_t *src1, const uint8_t *src2,
			
 
				-                           int width, uint32_t *unused)
			
 
				-{
			
 
				-    __asm__ volatile(
			
 
				-        "mov                    %0, %%"REG_a"       \n\t"
			
 
				-        "1:                                         \n\t"
			
 
				-        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
			
 
				-        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
			
 
				-        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
			
 
				-        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
			
 
				-        "psrlw                  $8, %%mm0           \n\t"
			
 
				-        "psrlw                  $8, %%mm1           \n\t"
			
 
				-        "psrlw                  $8, %%mm2           \n\t"
			
 
				-        "psrlw                  $8, %%mm3           \n\t"
			
 
				-        "packuswb            %%mm1, %%mm0           \n\t"
			
 
				-        "packuswb            %%mm3, %%mm2           \n\t"
			
 
				-        "movq                %%mm0, (%3, %%"REG_a") \n\t"
			
 
				-        "movq                %%mm2, (%4, %%"REG_a") \n\t"
			
 
				-        "add                    $8, %%"REG_a"       \n\t"
			
 
				-        " js                    1b                  \n\t"
			
 
				-        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
			
 
				-        : "%"REG_a
			
 
				-    );
			
 
				-}
			
 
				-
			
 
				 /* This is almost identical to the previous, end exists only because
			
 
				  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
			
 
				 static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
			
@@ -1712,33 +1778,6 @@ static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
 
				     assert(src1 == src2);
			
 
				 }
			
 
				 
			
 
				-static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV,
			
 
				-                           const uint8_t *src1, const uint8_t *src2,
			
 
				-                           int width, uint32_t *unused)
			
 
				-{
			
 
				-    __asm__ volatile(
			
 
				-        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
			
 
				-        "mov                    %0, %%"REG_a"       \n\t"
			
 
				-        "1:                                         \n\t"
			
 
				-        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
			
 
				-        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
			
 
				-        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
			
 
				-        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
			
 
				-        "pand                %%mm4, %%mm0           \n\t"
			
 
				-        "pand                %%mm4, %%mm1           \n\t"
			
 
				-        "pand                %%mm4, %%mm2           \n\t"
			
 
				-        "pand                %%mm4, %%mm3           \n\t"
			
 
				-        "packuswb            %%mm1, %%mm0           \n\t"
			
 
				-        "packuswb            %%mm3, %%mm2           \n\t"
			
 
				-        "movq                %%mm0, (%3, %%"REG_a") \n\t"
			
 
				-        "movq                %%mm2, (%4, %%"REG_a") \n\t"
			
 
				-        "add                    $8, %%"REG_a"       \n\t"
			
 
				-        " js                    1b                  \n\t"
			
 
				-        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
			
 
				-        : "%"REG_a
			
 
				-    );
			
 
				-}
			
 
				-
			
 
				 static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
			
 
				                                               const uint8_t *src, int width)
			
 
				 {
			
@@ -1921,7 +1960,7 @@ static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
 
				 
			
 
				 #if !COMPILE_TEMPLATE_MMX2
			
 
				 // bilinear / bicubic scaling
			
 
				-static void RENAME(hScale)(int16_t *dst, int dstW,
			
 
				+static void RENAME(hScale)(SwsContext *c, int16_t *dst, int dstW,
			
 
				                            const uint8_t *src, const int16_t *filter,
			
 
				                            const int16_t *filterPos, int filterSize)
			
 
				 {
			
@@ -2433,6 +2472,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 
				         }
			
 
				     }
			
 
				 
			
 
				+    if (c->scalingBpp == 8) {
			
 
				 #if !COMPILE_TEMPLATE_MMX2
			
 
				     c->hScale       = RENAME(hScale      );
			
 
				 #endif /* !COMPILE_TEMPLATE_MMX2 */
			
@@ -2450,6 +2490,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 
				 #if COMPILE_TEMPLATE_MMX2
			
 
				     }
			
 
				 #endif /* COMPILE_TEMPLATE_MMX2 */
			
 
				+    }
			
 
				 
			
 
				 #if !COMPILE_TEMPLATE_MMX2
			
 
				     switch(srcFormat) {
			
@@ -2457,13 +2498,10 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 
				         case PIX_FMT_UYVY422  : c->chrToYV12 = RENAME(uyvyToUV); break;
			
 
				         case PIX_FMT_NV12     : c->chrToYV12 = RENAME(nv12ToUV); break;
			
 
				         case PIX_FMT_NV21     : c->chrToYV12 = RENAME(nv21ToUV); break;
			
 
				-        case PIX_FMT_GRAY16LE :
			
 
				         case PIX_FMT_YUV420P9LE:
			
 
				         case PIX_FMT_YUV422P10LE:
			
 
				-        case PIX_FMT_YUV420P10LE:
			
 
				-        case PIX_FMT_YUV420P16LE:
			
 
				-        case PIX_FMT_YUV422P16LE:
			
 
				-        case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
			
 
				+        case PIX_FMT_YUV420P10LE: c->hScale16= RENAME(hScale16); break;
			
 
				+        default: break;
			
 
				     }
			
 
				 #endif /* !COMPILE_TEMPLATE_MMX2 */
			
 
				     if (!c->chrSrcHSubSample) {
			
@@ -2477,10 +2515,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 
				     switch (srcFormat) {
			
 
				 #if !COMPILE_TEMPLATE_MMX2
			
 
				     case PIX_FMT_YUYV422  :
			
 
				-    case PIX_FMT_Y400A    :
			
 
				-                            c->lumToYV12 = RENAME(yuy2ToY); break;
			
 
				-    case PIX_FMT_UYVY422  :
			
 
				-                            c->lumToYV12 = RENAME(uyvyToY); break;
			
 
				+    case PIX_FMT_Y400A    : c->lumToYV12 = RENAME(yuy2ToY); break;
			
 
				+    case PIX_FMT_UYVY422  : c->lumToYV12 = RENAME(uyvyToY); break;
			
 
				 #endif /* !COMPILE_TEMPLATE_MMX2 */
			
 
				     case PIX_FMT_BGR24    : c->lumToYV12 = RENAME(bgr24ToY); break;
			
 
				     case PIX_FMT_RGB24    : c->lumToYV12 = RENAME(rgb24ToY); break;
			
@@ -2494,6 +2530,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
 
				         }
			
 
				     }
			
 
				 #endif /* !COMPILE_TEMPLATE_MMX2 */
			
 
				-    if(isAnyRGB(c->srcFormat))
			
 
				+    if(isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
			
 
				         c->hScale16= RENAME(hScale16);
			
 
				+    if(c->scalingBpp != 8)
			
 
				+        c->hScale16 = NULL;
			
 
				 }
			
--- a/tests/ref/lavf/pixfmt
+++ b/tests/ref/lavf/pixfmt
@@ -28,9 +28,9 @@ efa7c0337cc00c796c6df615223716f1 *./tests/data/pixfmt/rgb565.yuv
 
				 304128 ./tests/data/pixfmt/rgb555.yuv
			
 
				 6be306b0cce5f8e6c271ea17fef9745b *./tests/data/pixfmt/gray.yuv
			
 
				 304128 ./tests/data/pixfmt/gray.yuv
			
 
				-31398104d2349dd48328a6862bc6711f *./tests/data/pixfmt/monow.yuv
			
 
				+6c719671e39f1bcf67b47eab98fa529b *./tests/data/pixfmt/monow.yuv
			
 
				 304128 ./tests/data/pixfmt/monow.yuv
			
 
				-31398104d2349dd48328a6862bc6711f *./tests/data/pixfmt/monob.yuv
			
 
				+6c719671e39f1bcf67b47eab98fa529b *./tests/data/pixfmt/monob.yuv
			
 
				 304128 ./tests/data/pixfmt/monob.yuv
			
 
				 00b85790df5740bab95e2559d81603a7 *./tests/data/pixfmt/yuv440p.yuv
			
 
				 304128 ./tests/data/pixfmt/yuv440p.yuv
			
--- a/tests/ref/lavfi/pixdesc
+++ b/tests/ref/lavfi/pixdesc
@@ -1,8 +1,8 @@
 
				 abgr                037bf9df6a765520ad6d490066bf4b89
			
 
				 argb                c442a8261c2265a07212ef0f72e35f5a
			
 
				 bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
			
 
				-bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
			
 
				-bgr48le             d022bfdd6a07d5dcc693799322a386b4
			
 
				+bgr48be             74dedaaacae8fd1ef46e05f78cf29d62
			
 
				+bgr48le             0eb7d30801eac6058814bddd330b3c76
			
 
				 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
			
 
				 bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
			
 
				 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
			
@@ -18,8 +18,8 @@ monow               9251497f3b0634f1165d12d5a289d943
 
				 nv12                e0af357888584d36eec5aa0f673793ef
			
 
				 nv21                9a3297f3b34baa038b1f37cb202b512f
			
 
				 rgb24               b41eba9651e1b5fe386289b506188105
			
 
				-rgb48be             460b6de89b156290a12d3941db8bd731
			
 
				-rgb48le             cd93cb34d15996987367dabda3a10128
			
 
				+rgb48be             e3bc84c9af376fb6d0f0293cc7b713a6
			
 
				+rgb48le             f51c0e71638a822458329abb2f4052c7
			
 
				 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
			
 
				 rgb555be            912a62c5e53bfcbac2a0340e10973cf2
			
 
				 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
			
@@ -40,12 +40,12 @@ yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 
				 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
			
 
				 yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
			
 
				 yuv422p10le         d0607c260a45c973e6639f4e449730ad
			
 
				-yuv422p16be         5499502e1c29534a158a1fe60e889f60
			
 
				-yuv422p16le         e3d61fde6978591596bc36b914386623
			
 
				+yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
			
 
				+yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
			
 
				 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
			
 
				 yuv444p             0a98447b78fd476aa39686da6a74fa2e
			
 
				-yuv444p16be         ea602a24b8e6969679265078bd8607b6
			
 
				-yuv444p16le         1262a0dc57ee147967fc896d04206313
			
 
				+yuv444p16be         af555dbaa401b142a995566864f47545
			
 
				+yuv444p16le         a803e8016997dad95c5b2a72f54c34d6
			
 
				 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
			
 
				 yuvj420p            32eec78ba51857b16ce9b813a49b7189
			
 
				 yuvj422p            0dfa0ed434f73be51428758c69e082cb
			
--- a/tests/ref/lavfi/pixfmts_copy
+++ b/tests/ref/lavfi/pixfmts_copy
@@ -1,8 +1,8 @@
 
				 abgr                037bf9df6a765520ad6d490066bf4b89
			
 
				 argb                c442a8261c2265a07212ef0f72e35f5a
			
 
				 bgr24               0d0cb38ab3fa0b2ec0865c14f78b217b
			
 
				-bgr48be             4ba0ff7fc9e011ea264610ad1585bb1f
			
 
				-bgr48le             d022bfdd6a07d5dcc693799322a386b4
			
 
				+bgr48be             74dedaaacae8fd1ef46e05f78cf29d62
			
 
				+bgr48le             0eb7d30801eac6058814bddd330b3c76
			
 
				 bgr4_byte           50d23cc82d9dcef2fd12adb81fb9b806
			
 
				 bgr555be            49f01b1f1f0c84fd9e776dd34cc3c280
			
 
				 bgr555le            378d6ac4223651a1adcbf94a3d0d807b
			
@@ -18,8 +18,8 @@ monow               9251497f3b0634f1165d12d5a289d943
 
				 nv12                e0af357888584d36eec5aa0f673793ef
			
 
				 nv21                9a3297f3b34baa038b1f37cb202b512f
			
 
				 rgb24               b41eba9651e1b5fe386289b506188105
			
 
				-rgb48be             460b6de89b156290a12d3941db8bd731
			
 
				-rgb48le             cd93cb34d15996987367dabda3a10128
			
 
				+rgb48be             e3bc84c9af376fb6d0f0293cc7b713a6
			
 
				+rgb48le             f51c0e71638a822458329abb2f4052c7
			
 
				 rgb4_byte           c93ba89b74c504e7f5ae9d9ab1546c73
			
 
				 rgb555be            912a62c5e53bfcbac2a0340e10973cf2
			
 
				 rgb555le            a937a0fc764fb57dc1b3af87cba0273c
			
@@ -40,12 +40,12 @@ yuv420p9le          16543fda8f87d94a6cf857d2e8d4461a
 
				 yuv422p             c9bba4529821d796a6ab09f6a5fd355a
			
 
				 yuv422p10be         bdc13b630fd668b34c6fe1aae28dfc71
			
 
				 yuv422p10le         d0607c260a45c973e6639f4e449730ad
			
 
				-yuv422p16be         5499502e1c29534a158a1fe60e889f60
			
 
				-yuv422p16le         e3d61fde6978591596bc36b914386623
			
 
				+yuv422p16be         dc9886f2fccf87cc54b27e071a2c251e
			
 
				+yuv422p16le         f181c8d8436f1233ba566d9bc88005ec
			
 
				 yuv440p             5a064afe2b453bb52cdb3f176b1aa1cf
			
 
				 yuv444p             0a98447b78fd476aa39686da6a74fa2e
			
 
				-yuv444p16be         ea602a24b8e6969679265078bd8607b6
			
 
				-yuv444p16le         1262a0dc57ee147967fc896d04206313
			
 
				+yuv444p16be         af555dbaa401b142a995566864f47545
			
 
				+yuv444p16le         a803e8016997dad95c5b2a72f54c34d6
			
 
				 yuva420p            a29884f3f3dfe1e00b961bc17bef3d47
			
 
				 yuvj420p            32eec78ba51857b16ce9b813a49b7189
			
 
				 yuvj422p            0dfa0ed434f73be51428758c69e082cb
			
--- a/tests/ref/lavfi/pixfmts_crop
+++ b/tests/ref/lavfi/pixfmts_crop
@@ -1,8 +1,8 @@
 
				 abgr                cd761690872843d1b7ab0c695393c751
			
 
				 argb                2ec6ef18769bcd651c2e8904d5a3ee67
			
 
				 bgr24               3450fd00cf1493d1ded75544d82ba3ec
			
 
				-bgr48be             90cb5d373a1123432d63c6a10c101afa
			
 
				-bgr48le             9371f54ceda9010f1199e86f4930ac3f
			
 
				+bgr48be             a9a7d177cef0914d3f1d266f00dff676
			
 
				+bgr48le             b475d1b529ed80c728ddbacd22d35281
			
 
				 bgr4_byte           2f6ac3cdd4676ab4e2982bdf0664945b
			
 
				 bgr555be            d3a7c273604723adeb7e5f5dd1c4272b
			
 
				 bgr555le            d22442fc13b464f9ba455b08df4e981f
			
@@ -14,8 +14,8 @@ gray                8c4850e66562a587a292dc728a65ea4a
 
				 gray16be            daa5a6b98fb4a280c57c57bff1a2ab5a
			
 
				 gray16le            84f5ea7259073edcb893113b42213c8e
			
 
				 rgb24               3b90ed64b687d3dc186c6ef521dc71a8
			
 
				-rgb48be             a808128041a1962deaa8620c7448feba
			
 
				-rgb48le             ce92d02cc322608d5be377cb1940677b
			
 
				+rgb48be             b8f9fd6aaa24d75275ee2f8b8a7b9e55
			
 
				+rgb48le             3e52e831a040f086c3ae983241172cce
			
 
				 rgb4_byte           6958029f73c6cdfed4f71020d816f027
			
 
				 rgb555be            41a7d1836837bc90f2cae19a9c9df3b3
			
 
				 rgb555le            eeb78f8ce6186fba55c941469e60ba67
			
@@ -29,12 +29,12 @@ yuv420p             bfea0188ddd4889787c403caae119cc7
 
				 yuv420p16be         8365eff38b8c329aeb95fc605fa229bb
			
 
				 yuv420p16le         5e8dd38d973d5854abe1ad4efad20cc1
			
 
				 yuv422p             f2f930a91fe00d4252c4720b5ecd8961
			
 
				-yuv422p16be         167e4338811a7d272925a4c6417d60da
			
 
				-yuv422p16le         3359395d5875d581fa1e975013d30114
			
 
				+yuv422p16be         93f9b6f33f9529db6de6a9f0ddd70eb5
			
 
				+yuv422p16le         2e66dcfec54ca6b57aa4bbd9ac234639
			
 
				 yuv440p             2472417d980e395ad6843cbb8b633b29
			
 
				 yuv444p             1f151980486848c96bc5585ced99003e
			
 
				-yuv444p16be         d69280c2856865d2ea94bd5292aac1c6
			
 
				-yuv444p16le         33f43e030bedf9723be4f63c3e9fc80e
			
 
				+yuv444p16be         e7d1ecf0c11a41b5db192f761f55bd3c
			
 
				+yuv444p16le         3298a0043d982e7cf1a33a1292fa11f0
			
 
				 yuva420p            7536753dfbc7932560fb50c921369a0e
			
 
				 yuvj420p            21f891093006d42d7683b0e1d773a657
			
 
				 yuvj422p            9a43d474c407590ad8f213880586b45e
			
--- a/tests/ref/lavfi/pixfmts_hflip
+++ b/tests/ref/lavfi/pixfmts_hflip
@@ -1,8 +1,8 @@
 
				 abgr                49468c6c9ceee5d52b08b1270a909323
			
 
				 argb                50ba9f16c6475530602f2983278b82d0
			
 
				 bgr24               cc53d2011d097972db0d22756c3699e3
			
 
				-bgr48be             11641cf0f4516a9aed98f7872720f801
			
 
				-bgr48le             b5440734eed128554dd9f83b34ba582f
			
 
				+bgr48be             90374bc92471f1bd4931d71ef8b73f50
			
 
				+bgr48le             696f628d0dd32121e60a0d61ac47d6e6
			
 
				 bgr4_byte           aac987e7d1a6a96477cfc0b48a4285de
			
 
				 bgr555be            bc07265898440116772200390d70c092
			
 
				 bgr555le            ccee08679bac84a1f960c6c9070c5538
			
@@ -14,8 +14,8 @@ gray                03efcb4ab52a24c0af0e03cfd26c9377
 
				 gray16be            9bcbca979601ddc4869f846f08f3d1dd
			
 
				 gray16le            c1b8965adcc7f847ee343149ff507073
			
 
				 rgb24               754f1722fc738590cc407ac65749bfe8
			
 
				-rgb48be             10743e1577dc3198dbbc7c0b3b8f429e
			
 
				-rgb48le             dd945a44f39119221407bf7a04f1bc49
			
 
				+rgb48be             2397b9d3c296ac15f8a2325a703f81c7
			
 
				+rgb48le             527043c72546d8b4bb1ce2dea4b294c3
			
 
				 rgb4_byte           c8a3f995fcf3e0919239ea2c413ddc29
			
 
				 rgb555be            045ce8607d3910586f4d97481dda8632
			
 
				 rgb555le            8778ee0cf58ce9ad1d99a1eca9f95e87
			
@@ -29,12 +29,12 @@ yuv420p             2d5c80f9ba2ddd85b2aeda3564cc7d64
 
				 yuv420p16be         758b0c1e2113b15e7afde48da4e4d024
			
 
				 yuv420p16le         480ccd951dcb806bc875d307e02e50a0
			
 
				 yuv422p             6e728f4eb9eae287c224f396d84be6ea
			
 
				-yuv422p16be         a05d43cd62b790087bd37083174557de
			
 
				-yuv422p16le         6954abebcbc62d81068d58d0c62bdd5b
			
 
				+yuv422p16be         8657d2c8d443940300fdb4028d555631
			
 
				+yuv422p16le         4ab27609981e50de5b1150125718ae76
			
 
				 yuv440p             a99e2b57ed601f39852715c9d675d0d3
			
 
				 yuv444p             947e47f7bb5fdccc659d19b7df2b6fc3
			
 
				-yuv444p16be         e5ef45bc3d2f5b0b2542d5151340c382
			
 
				-yuv444p16le         70793e3d66d0c23a0cdedabe9c24c2a7
			
 
				+yuv444p16be         a5154ce329db0d2caf0bd43f1347dba3
			
 
				+yuv444p16le         1f703308b90feb048191b3bccc695671
			
 
				 yuva420p            d83ec0c01498189f179ec574918185f1
			
 
				 yuvj420p            df3aaaec3bb157c3bde5f0365af30f4f
			
 
				 yuvj422p            d113871528d510a192797af59df9c05c