|
@@ -101,70 +101,69 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW,
|
|
|
const uint8_t *src, const int16_t *filter,
|
|
|
const int32_t *filterPos, int filterSize)
|
|
|
{
|
|
|
- register int i;
|
|
|
LOCAL_ALIGNED(16, int, tempo, [4]);
|
|
|
|
|
|
- switch (filterSize) {
|
|
|
- case 4:
|
|
|
- for (i = 0; i < dstW; i++) {
|
|
|
- register int srcPos = filterPos[i];
|
|
|
-
|
|
|
- vector unsigned char src_vF = unaligned_load(srcPos, src);
|
|
|
- vector signed short src_v, filter_v;
|
|
|
- vector signed int val_vEven, val_s;
|
|
|
- src_v = // vec_unpackh sign-extends...
|
|
|
- (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
|
|
- // now put our elements in the even slots
|
|
|
- src_v = vec_mergeh(src_v, (vector signed short)vzero);
|
|
|
- GET_VF4(i, filter_v, filter);
|
|
|
- val_vEven = vec_mule(src_v, filter_v);
|
|
|
- val_s = vec_sums(val_vEven, vzero);
|
|
|
- vec_st(val_s, 0, tempo);
|
|
|
- dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
|
|
- }
|
|
|
+ switch (filterSize) {
|
|
|
+ case 4:
|
|
|
+ for (register int i = 0; i < dstW; i++) {
|
|
|
+ register int srcPos = filterPos[i];
|
|
|
+
|
|
|
+ vector unsigned char src_vF = unaligned_load(srcPos, src);
|
|
|
+ vector signed short src_v, filter_v;
|
|
|
+ vector signed int val_vEven, val_s;
|
|
|
+ src_v = // vec_unpackh sign-extends...
|
|
|
+ (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
|
|
+ // now put our elements in the even slots
|
|
|
+ src_v = vec_mergeh(src_v, (vector signed short)vzero);
|
|
|
+ GET_VF4(i, filter_v, filter);
|
|
|
+ val_vEven = vec_mule(src_v, filter_v);
|
|
|
+ val_s = vec_sums(val_vEven, vzero);
|
|
|
+ vec_st(val_s, 0, tempo);
|
|
|
+ dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
|
|
+ }
|
|
|
break;
|
|
|
- case 8:
|
|
|
- for (i = 0; i < dstW; i++) {
|
|
|
- register int srcPos = filterPos[i];
|
|
|
- vector unsigned char src_vF, av_unused src_v0, av_unused src_v1;
|
|
|
- vector unsigned char av_unused permS;
|
|
|
- vector signed short src_v, filter_v;
|
|
|
- vector signed int val_v, val_s;
|
|
|
- FIRST_LOAD(src_v0, srcPos, src, permS);
|
|
|
- LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
|
|
|
- src_v = // vec_unpackh sign-extends...
|
|
|
- (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
|
|
- filter_v = vec_ld(i << 4, filter);
|
|
|
- val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
|
|
|
- val_s = vec_sums(val_v, vzero);
|
|
|
- vec_st(val_s, 0, tempo);
|
|
|
- dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
|
|
- }
|
|
|
+ case 8:
|
|
|
+ for (register int i = 0; i < dstW; i++) {
|
|
|
+ register int srcPos = filterPos[i];
|
|
|
+ vector unsigned char src_vF, av_unused src_v0, av_unused src_v1;
|
|
|
+ vector unsigned char av_unused permS;
|
|
|
+ vector signed short src_v, filter_v;
|
|
|
+ vector signed int val_v, val_s;
|
|
|
+ FIRST_LOAD(src_v0, srcPos, src, permS);
|
|
|
+ LOAD_SRCV8(srcPos, 0, src, permS, src_v0, src_v1, src_vF);
|
|
|
+ src_v = // vec_unpackh sign-extends...
|
|
|
+ (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
|
|
+ filter_v = vec_ld(i << 4, filter);
|
|
|
+ val_v = vec_msums(src_v, filter_v, (vector signed int)vzero);
|
|
|
+ val_s = vec_sums(val_v, vzero);
|
|
|
+ vec_st(val_s, 0, tempo);
|
|
|
+ dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
|
|
+ }
|
|
|
break;
|
|
|
|
|
|
- case 16:
|
|
|
- for (i = 0; i < dstW; i++) {
|
|
|
- register int srcPos = filterPos[i];
|
|
|
+ case 16:
|
|
|
+ for (register int i = 0; i < dstW; i++) {
|
|
|
+ register int srcPos = filterPos[i];
|
|
|
|
|
|
- vector unsigned char src_vF = unaligned_load(srcPos, src);
|
|
|
- vector signed short src_vA = // vec_unpackh sign-extends...
|
|
|
- (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
|
|
- vector signed short src_vB = // vec_unpackh sign-extends...
|
|
|
- (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
|
|
|
- vector signed short filter_v0 = vec_ld(i << 5, filter);
|
|
|
- vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
|
|
|
+ vector unsigned char src_vF = unaligned_load(srcPos, src);
|
|
|
+ vector signed short src_vA = // vec_unpackh sign-extends...
|
|
|
+ (vector signed short)(VEC_MERGEH((vector unsigned char)vzero, src_vF));
|
|
|
+ vector signed short src_vB = // vec_unpackh sign-extends...
|
|
|
+ (vector signed short)(VEC_MERGEL((vector unsigned char)vzero, src_vF));
|
|
|
+ vector signed short filter_v0 = vec_ld(i << 5, filter);
|
|
|
+ vector signed short filter_v1 = vec_ld((i << 5) + 16, filter);
|
|
|
|
|
|
- vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
|
|
|
- vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
|
|
|
+ vector signed int val_acc = vec_msums(src_vA, filter_v0, (vector signed int)vzero);
|
|
|
+ vector signed int val_v = vec_msums(src_vB, filter_v1, val_acc);
|
|
|
|
|
|
- vector signed int val_s = vec_sums(val_v, vzero);
|
|
|
+ vector signed int val_s = vec_sums(val_v, vzero);
|
|
|
|
|
|
- VEC_ST(val_s, 0, tempo);
|
|
|
- dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
|
|
- }
|
|
|
+ VEC_ST(val_s, 0, tempo);
|
|
|
+ dst[i] = FFMIN(tempo[3] >> 7, (1 << 15) - 1);
|
|
|
+ }
|
|
|
break;
|
|
|
|
|
|
- default:
|
|
|
+ default:
|
|
|
for (register int i = 0; i < dstW; i++) {
|
|
|
register int j;
|
|
|
register int srcPos = filterPos[i];
|
|
@@ -174,5 +173,5 @@ static void FUNC(hScale_real)(SwsContext *c, int16_t *dst, int dstW,
|
|
|
dst[i] = FFMIN(val >> 7, (1 << 15) - 1);
|
|
|
}
|
|
|
break;
|
|
|
- }
|
|
|
+ }
|
|
|
}
|