|
- //===----------------------Hexagon builtin routine ------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- // Double Precision Divide
- #define A r1:0
- #define AH r1
- #define AL r0
- #define B r3:2
- #define BH r3
- #define BL r2
- #define Q r5:4
- #define QH r5
- #define QL r4
- #define PROD r7:6
- #define PRODHI r7
- #define PRODLO r6
- #define SFONE r8
- #define SFDEN r9
- #define SFERROR r10
- #define SFRECIP r11
- #define EXPBA r13:12
- #define EXPB r13
- #define EXPA r12
- #define REMSUB2 r15:14
- #define SIGN r28
- #define Q_POSITIVE p3
- #define NORMAL p2
- #define NO_OVF_UNF p1
- #define P_TMP p0
- #define RECIPEST_SHIFT 3
- #define QADJ 61
- #define DFCLASS_NORMAL 0x02
- #define DFCLASS_NUMBER 0x0F
- #define DFCLASS_INFINITE 0x08
- #define DFCLASS_ZERO 0x01
- #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
- #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
- #define DF_MANTBITS 52
- #define DF_EXPBITS 11
- #define SF_MANTBITS 23
- #define SF_EXPBITS 8
- #define DF_BIAS 0x3ff
- #define SR_ROUND_OFF 22
- #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
- #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
- #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
- #define END(TAG) .size TAG,.-TAG
- .text
- .global __hexagon_divdf3
- .type __hexagon_divdf3,@function
- Q6_ALIAS(divdf3)
- FAST_ALIAS(divdf3)
- FAST2_ALIAS(divdf3)
- .p2align 5
- __hexagon_divdf3:
- {
- NORMAL = dfclass(A,#DFCLASS_NORMAL)
- NORMAL = dfclass(B,#DFCLASS_NORMAL)
- EXPBA = combine(BH,AH)
- SIGN = xor(AH,BH)
- }
- #undef A
- #undef AH
- #undef AL
- #undef B
- #undef BH
- #undef BL
- #define REM r1:0
- #define REMHI r1
- #define REMLO r0
- #define DENOM r3:2
- #define DENOMHI r3
- #define DENOMLO r2
- {
- if (!NORMAL) jump .Ldiv_abnormal
- PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
- SFONE = ##0x3f800001
- }
- {
- SFDEN = or(SFONE,PRODLO)
- EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
- EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
- Q_POSITIVE = cmp.gt(SIGN,#-1)
- }
- #undef SIGN
- #define ONE r28
- .Ldenorm_continue:
- {
- SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
- SFERROR = and(SFONE,#-2)
- ONE = #1
- EXPA = sub(EXPA,EXPB)
- }
- #undef EXPB
- #define RECIPEST r13
- {
- SFERROR -= sfmpy(SFRECIP,SFDEN):lib
- REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
- RECIPEST = ##0x00800000 << RECIPEST_SHIFT
- }
- {
- SFRECIP += sfmpy(SFRECIP,SFERROR):lib
- DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
- SFERROR = and(SFONE,#-2)
- }
- {
- SFERROR -= sfmpy(SFRECIP,SFDEN):lib
- QH = #-DF_BIAS+1
- QL = #DF_BIAS-1
- }
- {
- SFRECIP += sfmpy(SFRECIP,SFERROR):lib
- NO_OVF_UNF = cmp.gt(EXPA,QH)
- NO_OVF_UNF = !cmp.gt(EXPA,QL)
- }
- {
- RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
- Q = #0
- EXPA = add(EXPA,#-QADJ)
- }
- #undef SFERROR
- #undef SFRECIP
- #define TMP r10
- #define TMP1 r11
- {
- RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
- }
- #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
- { \
- PROD = mpyu(RECIPEST,REMHI); \
- REM = asl(REM,# ## ( REMSHIFT )); \
- }; \
- { \
- PRODLO = # ## 0; \
- REM -= mpyu(PRODHI,DENOMLO); \
- REMSUB2 = mpyu(PRODHI,DENOMHI); \
- }; \
- { \
- Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
- REM -= asl(REMSUB2, # ## 32); \
- EXTRA \
- }
- DIV_ITER1B(ASL,14,15,)
- DIV_ITER1B(ASR,1,15,)
- DIV_ITER1B(ASR,16,15,)
- DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
- #undef REMSUB2
- #define TMPPAIR r15:14
- #define TMPPAIRHI r15
- #define TMPPAIRLO r14
- #undef RECIPEST
- #define EXPB r13
- {
- // compare or sub with carry
- TMPPAIR = sub(REM,DENOM)
- P_TMP = cmp.gtu(DENOM,REM)
- // set up amt to add to q
- if (!P_TMP.new) PRODLO = #2
- }
- {
- Q = add(Q,PROD)
- if (!P_TMP) REM = TMPPAIR
- TMPPAIR = #0
- }
- {
- P_TMP = cmp.eq(REM,TMPPAIR)
- if (!P_TMP.new) QL = or(QL,ONE)
- }
- {
- PROD = neg(Q)
- }
- {
- if (!Q_POSITIVE) Q = PROD
- }
- #undef REM
- #undef REMHI
- #undef REMLO
- #undef DENOM
- #undef DENOMLO
- #undef DENOMHI
- #define A r1:0
- #define AH r1
- #define AL r0
- #define B r3:2
- #define BH r3
- #define BL r2
- {
- A = convert_d2df(Q)
- if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
- }
- {
- AH += asl(EXPA,#DF_MANTBITS-32)
- jumpr r31
- }
- .Ldiv_ovf_unf:
- {
- AH += asl(EXPA,#DF_MANTBITS-32)
- EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
- }
- {
- PROD = abs(Q)
- EXPA = add(EXPA,EXPB)
- }
- {
- P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
- if (P_TMP.new) jump:nt .Ldiv_ovf
- }
- {
- P_TMP = cmp.gt(EXPA,#0)
- if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
- }
- // Underflow
- // We know what the infinite range exponent should be (EXPA)
- // Q is 2's complement, PROD is abs(Q)
- // Normalize Q, shift right, add a high bit, convert, change exponent
- #define FUDGE1 7 // how much to shift right
- #define FUDGE2 4 // how many guard/round to keep at lsbs
- {
- EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
- EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
- TMP = USR
- TMP1 = #63
- }
- {
- EXPB = min(EXPA,TMP1)
- TMP1 = or(TMP,#0x030)
- PROD = asl(PROD,EXPB)
- EXPA = #0
- }
- {
- TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
- PROD = lsr(PROD,EXPB) // shift out bits
- B = #1
- }
- {
- P_TMP = cmp.gtu(B,TMPPAIR)
- if (!P_TMP.new) PRODLO = or(BL,PRODLO)
- PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
- }
- {
- Q = neg(PROD)
- P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
- if (!P_TMP.new) TMP = TMP1
- }
- {
- USR = TMP
- if (Q_POSITIVE) Q = PROD
- TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
- }
- {
- A = convert_d2df(Q)
- }
- {
- AH += asl(TMP,#DF_MANTBITS-32)
- jumpr r31
- }
- .Lpossible_unf:
- // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
- // The answer is correct, but we need to raise Underflow
- {
- B = extractu(A,#63,#0)
- TMPPAIR = combine(##0x00100000,#0) // min normal
- TMP = #0x7FFF
- }
- {
- P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
- P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
- }
- #if (__HEXAGON_ARCH__ == 60)
- TMP = USR // If not, just return
- if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
- // Note that inexact is already set...
- #else
- {
- if (!P_TMP) jumpr r31 // If not, just return
- TMP = USR // Else, we want to set Unf+Inexact
- } // Note that inexact is already set...
- #endif
- {
- TMP = or(TMP,#0x30)
- }
- {
- USR = TMP
- }
- {
- p0 = dfcmp.eq(A,A)
- jumpr r31
- }
- .Ldiv_ovf:
- // Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
- {
- TMP = USR
- B = combine(##0x7fefffff,#-1)
- AH = mux(Q_POSITIVE,#0,#-1)
- }
- {
- PROD = combine(##0x7ff00000,#0)
- QH = extractu(TMP,#2,#SR_ROUND_OFF)
- TMP = or(TMP,#0x28)
- }
- {
- USR = TMP
- QH ^= lsr(AH,#31)
- QL = QH
- }
- {
- p0 = !cmp.eq(QL,#1) // if not round-to-zero
- p0 = !cmp.eq(QH,#2) // and not rounding the other way
- if (p0.new) B = PROD // go to inf
- p0 = dfcmp.eq(B,B) // get exceptions
- }
- {
- A = insert(B,#63,#0)
- jumpr r31
- }
- #undef ONE
- #define SIGN r28
- #undef NORMAL
- #undef NO_OVF_UNF
- #define P_INF p1
- #define P_ZERO p2
- .Ldiv_abnormal:
- {
- P_TMP = dfclass(A,#DFCLASS_NUMBER)
- P_TMP = dfclass(B,#DFCLASS_NUMBER)
- Q_POSITIVE = cmp.gt(SIGN,#-1)
- }
- {
- P_INF = dfclass(A,#DFCLASS_INFINITE)
- P_INF = dfclass(B,#DFCLASS_INFINITE)
- }
- {
- P_ZERO = dfclass(A,#DFCLASS_ZERO)
- P_ZERO = dfclass(B,#DFCLASS_ZERO)
- }
- {
- if (!P_TMP) jump .Ldiv_nan
- if (P_INF) jump .Ldiv_invalid
- }
- {
- if (P_ZERO) jump .Ldiv_invalid
- }
- {
- P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
- P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
- }
- {
- P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
- P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
- }
- {
- if (!P_ZERO) jump .Ldiv_zero_result
- if (!P_INF) jump .Ldiv_inf_result
- }
- // Now we've narrowed it down to (de)normal / (de)normal
- // Set up A/EXPA B/EXPB and go back
- #undef P_ZERO
- #undef P_INF
- #define P_TMP2 p1
- {
- P_TMP = dfclass(A,#DFCLASS_NORMAL)
- P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
- TMP = ##0x00100000
- }
- {
- EXPBA = combine(BH,AH)
- AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
- BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
- }
- {
- if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
- if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
- }
- {
- QH = add(clb(A),#-DF_EXPBITS)
- QL = add(clb(B),#-DF_EXPBITS)
- TMP = #1
- }
- {
- EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
- EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
- }
- {
- A = asl(A,QH)
- B = asl(B,QL)
- if (!P_TMP) EXPA = sub(TMP,QH)
- if (!P_TMP2) EXPB = sub(TMP,QL)
- } // recreate values needed by resume coke
- {
- PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
- }
- {
- SFDEN = or(SFONE,PRODLO)
- jump .Ldenorm_continue
- }
- .Ldiv_zero_result:
- {
- AH = xor(AH,BH)
- B = #0
- }
- {
- A = insert(B,#63,#0)
- jumpr r31
- }
- .Ldiv_inf_result:
- {
- p2 = dfclass(B,#DFCLASS_ZERO)
- p2 = dfclass(A,#DFCLASS_NONINFINITE)
- }
- {
- TMP = USR
- if (!p2) jump 1f
- AH = xor(AH,BH)
- }
- {
- TMP = or(TMP,#0x04) // DBZ
- }
- {
- USR = TMP
- }
- 1:
- {
- B = combine(##0x7ff00000,#0)
- p0 = dfcmp.uo(B,B) // take possible exception
- }
- {
- A = insert(B,#63,#0)
- jumpr r31
- }
- .Ldiv_nan:
- {
- p0 = dfclass(A,#0x10)
- p1 = dfclass(B,#0x10)
- if (!p0.new) A = B
- if (!p1.new) B = A
- }
- {
- QH = convert_df2sf(A) // get possible invalid exceptions
- QL = convert_df2sf(B)
- }
- {
- A = #-1
- jumpr r31
- }
- .Ldiv_invalid:
- {
- TMP = ##0x7f800001
- }
- {
- A = convert_sf2df(TMP) // get invalid, get DF qNaN
- jumpr r31
- }
- END(__hexagon_divdf3)
|