|
@@ -595,7 +595,7 @@ yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0,
|
|
|
int len_count = (dstW + 1) >> 1;
|
|
|
const void *r, *g, *b;
|
|
|
|
|
|
- if (uvalpha < 2048) {
|
|
|
+ if (uvalpha == 0) {
|
|
|
int count = 0;
|
|
|
int head = YUVRGB_TABLE_HEADROOM;
|
|
|
__m128i headroom = __lsx_vreplgr2vr_h(head);
|
|
@@ -659,61 +659,46 @@ yuv2rgb_1_template_lsx(SwsInternal *c, const int16_t *buf0,
|
|
|
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
|
|
|
int count = 0;
|
|
|
int HEADROOM = YUVRGB_TABLE_HEADROOM;
|
|
|
+ int uvalpha1 = 4096 - uvalpha;
|
|
|
__m128i headroom = __lsx_vreplgr2vr_w(HEADROOM);
|
|
|
+ __m128i uvalpha_tmp1 = __lsx_vreplgr2vr_h(uvalpha1);
|
|
|
+ __m128i uvalpha_tmp = __lsx_vreplgr2vr_h(uvalpha);
|
|
|
|
|
|
for (i = 0; i < len; i += 8) {
|
|
|
int Y1, Y2, U, V;
|
|
|
int i_dex = i << 1;
|
|
|
int c_dex = count << 1;
|
|
|
__m128i src_y, src_u0, src_v0, src_u1, src_v1;
|
|
|
- __m128i y_l, y_h, u1, u2, v1, v2;
|
|
|
+ __m128i y_l, y_h, u1, u2, v1, v2, u_ev, v_od;
|
|
|
|
|
|
DUP4_ARG2(__lsx_vldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex,
|
|
|
ubuf1, c_dex, src_y, src_u0, src_v0, src_u1);
|
|
|
src_v1 = __lsx_vldx(vbuf1, c_dex);
|
|
|
src_y = __lsx_vsrari_h(src_y, 7);
|
|
|
- u1 = __lsx_vaddwev_w_h(src_u0, src_u1);
|
|
|
- v1 = __lsx_vaddwod_w_h(src_u0, src_u1);
|
|
|
- u2 = __lsx_vaddwev_w_h(src_v0, src_v1);
|
|
|
- v2 = __lsx_vaddwod_w_h(src_v0, src_v1);
|
|
|
+
|
|
|
+ u_ev = __lsx_vmulwev_w_h(src_u0, uvalpha_tmp1);
|
|
|
+ v_od = __lsx_vmulwod_w_h(src_u0, uvalpha_tmp1);
|
|
|
+ u1 = __lsx_vmaddwev_w_h(u_ev, src_u1, uvalpha_tmp);
|
|
|
+ v1 = __lsx_vmaddwod_w_h(v_od, src_u1, uvalpha_tmp);
|
|
|
+ u_ev = __lsx_vmulwev_w_h(src_v0, uvalpha_tmp1);
|
|
|
+ v_od = __lsx_vmulwod_w_h(src_v0, uvalpha_tmp1);
|
|
|
+ u2 = __lsx_vmaddwev_w_h(u_ev, src_v1, uvalpha_tmp);
|
|
|
+ v2 = __lsx_vmaddwod_w_h(v_od, src_v1, uvalpha_tmp);
|
|
|
+
|
|
|
y_l = __lsx_vsllwil_w_h(src_y, 0);
|
|
|
y_h = __lsx_vexth_w_h(src_y);
|
|
|
- u1 = __lsx_vsrari_w(u1, 8);
|
|
|
- v1 = __lsx_vsrari_w(v1, 8);
|
|
|
- u2 = __lsx_vsrari_w(u2, 8);
|
|
|
- v2 = __lsx_vsrari_w(v2, 8);
|
|
|
+ u1 = __lsx_vsrari_w(u1, 19);
|
|
|
+ v1 = __lsx_vsrari_w(v1, 19);
|
|
|
+ u2 = __lsx_vsrari_w(u2, 19);
|
|
|
+ v2 = __lsx_vsrari_w(v2, 19);
|
|
|
u1 = __lsx_vadd_w(u1, headroom);
|
|
|
v1 = __lsx_vadd_w(v1, headroom);
|
|
|
u2 = __lsx_vadd_w(u2, headroom);
|
|
|
v2 = __lsx_vadd_w(v2, headroom);
|
|
|
- WRITE_YUV2RGB_LSX(y_l, y_l, u1, v1, 0, 1, 0, 0);
|
|
|
- WRITE_YUV2RGB_LSX(y_l, y_l, u2, v2, 2, 3, 0, 0);
|
|
|
- WRITE_YUV2RGB_LSX(y_h, y_h, u1, v1, 0, 1, 1, 1);
|
|
|
- WRITE_YUV2RGB_LSX(y_h, y_h, u2, v2, 2, 3, 1, 1);
|
|
|
- }
|
|
|
- if (dstW - i >= 4) {
|
|
|
- int Y1, Y2, U, V;
|
|
|
- int i_dex = i << 1;
|
|
|
- __m128i src_y, src_u0, src_v0, src_u1, src_v1;
|
|
|
- __m128i uv;
|
|
|
-
|
|
|
- src_y = __lsx_vldx(buf0, i_dex);
|
|
|
- src_u0 = __lsx_vldrepl_d((ubuf0 + count), 0);
|
|
|
- src_v0 = __lsx_vldrepl_d((vbuf0 + count), 0);
|
|
|
- src_u1 = __lsx_vldrepl_d((ubuf1 + count), 0);
|
|
|
- src_v1 = __lsx_vldrepl_d((vbuf1 + count), 0);
|
|
|
-
|
|
|
- src_u0 = __lsx_vilvl_h(src_u1, src_u0);
|
|
|
- src_v0 = __lsx_vilvl_h(src_v1, src_v0);
|
|
|
- src_y = __lsx_vsrari_h(src_y, 7);
|
|
|
- src_y = __lsx_vsllwil_w_h(src_y, 0);
|
|
|
- uv = __lsx_vilvl_h(src_v0, src_u0);
|
|
|
- uv = __lsx_vhaddw_w_h(uv, uv);
|
|
|
- uv = __lsx_vsrari_w(uv, 8);
|
|
|
- uv = __lsx_vadd_w(uv, headroom);
|
|
|
- WRITE_YUV2RGB_LSX(src_y, src_y, uv, uv, 0, 1, 0, 1);
|
|
|
- WRITE_YUV2RGB_LSX(src_y, src_y, uv, uv, 2, 3, 2, 3);
|
|
|
- i += 4;
|
|
|
+ WRITE_YUV2RGB_LSX(y_l, y_l, u1, u2, 0, 1, 0, 0);
|
|
|
+ WRITE_YUV2RGB_LSX(y_l, y_l, v1, v2, 2, 3, 0, 0);
|
|
|
+ WRITE_YUV2RGB_LSX(y_h, y_h, u1, u2, 0, 1, 1, 1);
|
|
|
+ WRITE_YUV2RGB_LSX(y_h, y_h, v1, v2, 2, 3, 1, 1);
|
|
|
}
|
|
|
for (; count < len_count; count++) {
|
|
|
int Y1 = (buf0[count * 2 ] + 64) >> 7;
|