|
@@ -378,24 +378,24 @@ cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
|
|
|
apply_noise_main:
|
|
|
%if ARCH_X86_64 == 0 || WIN64
|
|
|
mov kxd, m_maxm
|
|
|
-%define count kxq
|
|
|
+ DEFINE_ARGS Y, s_m, q_filt, noise, count
|
|
|
%else
|
|
|
-%define count m_maxq
|
|
|
+ DEFINE_ARGS Y, s_m, q_filt, noise, kx, count
|
|
|
%endif
|
|
|
movsxdifnidn noiseq, noised
|
|
|
dec noiseq
|
|
|
- shl count, 2
|
|
|
+ shl countd, 2
|
|
|
%ifdef PIC
|
|
|
lea NOISE_TABLE, [sbr_noise_table]
|
|
|
%endif
|
|
|
- lea Yq, [Yq + 2*count]
|
|
|
- add s_mq, count
|
|
|
- add q_filtq, count
|
|
|
+ lea Yq, [Yq + 2*countq]
|
|
|
+ add s_mq, countq
|
|
|
+ add q_filtq, countq
|
|
|
shl noiseq, 3
|
|
|
pxor m5, m5
|
|
|
- neg count
|
|
|
+ neg countq
|
|
|
.loop:
|
|
|
- mova m1, [q_filtq + count]
|
|
|
+ mova m1, [q_filtq + countq]
|
|
|
movu m3, [noiseq + NOISE_TABLE + 1*mmsize]
|
|
|
movu m4, [noiseq + NOISE_TABLE + 2*mmsize]
|
|
|
add noiseq, 2*mmsize
|
|
@@ -404,7 +404,7 @@ apply_noise_main:
|
|
|
punpckldq m1, m1
|
|
|
mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
|
|
|
mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
|
|
|
- mova m3, [s_mq + count]
|
|
|
+ mova m3, [s_mq + countq]
|
|
|
; TODO: replace by a vpermd in AVX2
|
|
|
punpckhdq m4, m3, m3
|
|
|
punpckldq m3, m3
|
|
@@ -414,15 +414,15 @@ apply_noise_main:
|
|
|
mulps m4, m0 ; s_m[m] * phi_sign
|
|
|
pand m1, m6
|
|
|
pand m2, m7
|
|
|
- movu m6, [Yq + 2*count]
|
|
|
- movu m7, [Yq + 2*count + mmsize]
|
|
|
+ movu m6, [Yq + 2*countq]
|
|
|
+ movu m7, [Yq + 2*countq + mmsize]
|
|
|
addps m3, m1
|
|
|
addps m4, m2
|
|
|
addps m6, m3
|
|
|
addps m7, m4
|
|
|
- movu [Yq + 2*count], m6
|
|
|
- movu [Yq + 2*count + mmsize], m7
|
|
|
- add count, mmsize
|
|
|
+ movu [Yq + 2*countq], m6
|
|
|
+ movu [Yq + 2*countq + mmsize], m7
|
|
|
+ add countq, mmsize
|
|
|
jl .loop
|
|
|
RET
|
|
|
|