123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396 |
- //===----------------------Hexagon builtin routine ------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- // Double Precision Multiply
- #define A r1:0
- #define AH r1
- #define AL r0
- #define B r3:2
- #define BH r3
- #define BL r2
- #define EXPA r4
- #define EXPB r5
- #define EXPB_A r5:4
- #define ZTMP r7:6
- #define ZTMPH r7
- #define ZTMPL r6
- #define ATMP r13:12
- #define ATMPH r13
- #define ATMPL r12
- #define BTMP r9:8
- #define BTMPH r9
- #define BTMPL r8
- #define ATMP2 r11:10
- #define ATMP2H r11
- #define ATMP2L r10
- #define EXPDIFF r15
- #define EXTRACTOFF r14
- #define EXTRACTAMT r15:14
- #define TMP r28
- #define MANTBITS 52
- #define HI_MANTBITS 20
- #define EXPBITS 11
- #define BIAS 1024
- #define MANTISSA_TO_INT_BIAS 52
- #define SR_BIT_INEXACT 5
- #ifndef SR_ROUND_OFF
- #define SR_ROUND_OFF 22
- #endif
- #define NORMAL p3
- #define BIGB p2
- #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
- #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
- #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
- #define END(TAG) .size TAG,.-TAG
- .text
- .global __hexagon_adddf3
- .global __hexagon_subdf3
- .type __hexagon_adddf3, @function
- .type __hexagon_subdf3, @function
- Q6_ALIAS(adddf3)
- FAST_ALIAS(adddf3)
- FAST2_ALIAS(adddf3)
- Q6_ALIAS(subdf3)
- FAST_ALIAS(subdf3)
- FAST2_ALIAS(subdf3)
- .p2align 5
- __hexagon_adddf3:
- {
- EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
- EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
- ATMP = combine(##0x20000000,#0)
- }
- {
- NORMAL = dfclass(A,#2)
- NORMAL = dfclass(B,#2)
- BTMP = ATMP
- BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
- }
- {
- if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
- if (BIGB) A = B // if B >> A, swap A and B
- if (BIGB) B = A // If B >> A, swap A and B
- if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
- }
- {
- ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
- BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
- EXPDIFF = sub(EXPA,EXPB)
- ZTMP = combine(#62,#1)
- }
- #undef BIGB
- #undef NORMAL
- #define B_POS p3
- #define A_POS p2
- #define NO_STICKIES p1
- .Ladd_continue:
- {
- EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
- // will collapse to sticky bit
- ATMP2 = neg(ATMP)
- A_POS = cmp.gt(AH,#-1)
- EXTRACTOFF = #0
- }
- {
- if (!A_POS) ATMP = ATMP2
- ATMP2 = extractu(BTMP,EXTRACTAMT)
- BTMP = ASR(BTMP,EXPDIFF)
- #undef EXTRACTAMT
- #undef EXPDIFF
- #undef EXTRACTOFF
- #define ZERO r15:14
- ZERO = #0
- }
- {
- NO_STICKIES = cmp.eq(ATMP2,ZERO)
- if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
- EXPB = add(EXPA,#-BIAS-60)
- B_POS = cmp.gt(BH,#-1)
- }
- {
- ATMP = add(ATMP,BTMP) // ADD!!!
- ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
- ZTMP = combine(#54,##2045)
- }
- {
- p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
- p0 = !cmp.gtu(EXPA,ZTMPL)
- if (!p0.new) jump:nt .Ladd_ovf_unf
- if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
- }
- {
- A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
- p0 = cmp.eq(ATMPH,#0)
- p0 = cmp.eq(ATMPL,#0)
- if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
- }
- {
- AH += asl(EXPB,#HI_MANTBITS)
- jumpr r31
- }
- .falign
- __hexagon_subdf3:
- {
- BH = togglebit(BH,#31)
- jump __qdsp_adddf3
- }
- .falign
- .Ladd_zero:
- // True zero, full cancellation
- // +0 unless round towards negative infinity
- {
- TMP = USR
- A = #0
- BH = #1
- }
- {
- TMP = extractu(TMP,#2,#22)
- BH = asl(BH,#31)
- }
- {
- p0 = cmp.eq(TMP,#2)
- if (p0.new) AH = xor(AH,BH)
- jumpr r31
- }
- .falign
- .Ladd_ovf_unf:
- // Overflow or Denormal is possible
- // Good news: Underflow flag is not possible!
- // ATMP has 2's complement value
- //
- // EXPA has A's exponent, EXPB has EXPA-BIAS-60
- //
- // Convert, extract exponent, add adjustment.
- // If > 2046, overflow
- // If <= 0, denormal
- //
- // Note that we've not done our zero check yet, so do that too
- {
- A = convert_d2df(ATMP)
- p0 = cmp.eq(ATMPH,#0)
- p0 = cmp.eq(ATMPL,#0)
- if (p0.new) jump:nt .Ladd_zero
- }
- {
- TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
- AH += asl(EXPB,#HI_MANTBITS)
- }
- {
- EXPB = add(EXPB,TMP)
- B = combine(##0x00100000,#0)
- }
- {
- p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
- if (p0.new) jump:nt .Ladd_ovf
- }
- {
- p0 = cmp.gt(EXPB,#0)
- if (p0.new) jumpr:t r31
- TMP = sub(#1,EXPB)
- }
- {
- B = insert(A,#MANTBITS,#0)
- A = ATMP
- }
- {
- B = lsr(B,TMP)
- }
- {
- A = insert(B,#63,#0)
- jumpr r31
- }
- .falign
- .Ladd_ovf:
- // We get either max finite value or infinity. Either way, overflow+inexact
- {
- A = ATMP // 2's complement value
- TMP = USR
- ATMP = combine(##0x7fefffff,#-1) // positive max finite
- }
- {
- EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
- TMP = or(TMP,#0x28) // inexact + overflow
- BTMP = combine(##0x7ff00000,#0) // positive infinity
- }
- {
- USR = TMP
- EXPB ^= lsr(AH,#31) // Does sign match rounding?
- TMP = EXPB // unmodified rounding mode
- }
- {
- p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
- p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
- if (p0.new) ATMP = BTMP // we should get infinity
- }
- {
- A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
- }
- {
- p0 = dfcmp.eq(A,A)
- jumpr r31
- }
- .Ladd_abnormal:
- {
- ATMP = extractu(A,#63,#0) // strip off sign
- BTMP = extractu(B,#63,#0) // strip off sign
- }
- {
- p3 = cmp.gtu(ATMP,BTMP)
- if (!p3.new) A = B // sort values
- if (!p3.new) B = A // sort values
- }
- {
- // Any NaN --> NaN, possibly raise invalid if sNaN
- p0 = dfclass(A,#0x0f) // A not NaN?
- if (!p0.new) jump:nt .Linvalid_nan_add
- if (!p3) ATMP = BTMP
- if (!p3) BTMP = ATMP
- }
- {
- // Infinity + non-infinity number is infinity
- // Infinity + infinity --> inf or nan
- p1 = dfclass(A,#0x08) // A is infinity
- if (p1.new) jump:nt .Linf_add
- }
- {
- p2 = dfclass(B,#0x01) // B is zero
- if (p2.new) jump:nt .LB_zero // so return A or special 0+0
- ATMP = #0
- }
- // We are left with adding one or more subnormals
- {
- p0 = dfclass(A,#4)
- if (p0.new) jump:nt .Ladd_two_subnormal
- ATMP = combine(##0x20000000,#0)
- }
- {
- EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
- EXPB = #1
- // BTMP already ABS(B)
- BTMP = asl(BTMP,#EXPBITS-2)
- }
- #undef ZERO
- #define EXTRACTOFF r14
- #define EXPDIFF r15
- {
- ATMP = insert(A,#MANTBITS,#EXPBITS-2)
- EXPDIFF = sub(EXPA,EXPB)
- ZTMP = combine(#62,#1)
- jump .Ladd_continue
- }
- .Ladd_two_subnormal:
- {
- ATMP = extractu(A,#63,#0)
- BTMP = extractu(B,#63,#0)
- }
- {
- ATMP = neg(ATMP)
- BTMP = neg(BTMP)
- p0 = cmp.gt(AH,#-1)
- p1 = cmp.gt(BH,#-1)
- }
- {
- if (p0) ATMP = A
- if (p1) BTMP = B
- }
- {
- ATMP = add(ATMP,BTMP)
- }
- {
- BTMP = neg(ATMP)
- p0 = cmp.gt(ATMPH,#-1)
- B = #0
- }
- {
- if (!p0) A = BTMP
- if (p0) A = ATMP
- BH = ##0x80000000
- }
- {
- if (!p0) AH = or(AH,BH)
- p0 = dfcmp.eq(A,B)
- if (p0.new) jump:nt .Lzero_plus_zero
- }
- {
- jumpr r31
- }
- .Linvalid_nan_add:
- {
- TMP = convert_df2sf(A) // will generate invalid if sNaN
- p0 = dfclass(B,#0x0f) // if B is not NaN
- if (p0.new) B = A // make it whatever A is
- }
- {
- BL = convert_df2sf(B) // will generate invalid if sNaN
- A = #-1
- jumpr r31
- }
- .falign
- .LB_zero:
- {
- p0 = dfcmp.eq(ATMP,A) // is A also zero?
- if (!p0.new) jumpr:t r31 // If not, just return A
- }
- // 0 + 0 is special
- // if equal integral values, they have the same sign, which is fine for all rounding
- // modes.
- // If unequal in sign, we get +0 for all rounding modes except round down
- .Lzero_plus_zero:
- {
- p0 = cmp.eq(A,B)
- if (p0.new) jumpr:t r31
- }
- {
- TMP = USR
- }
- {
- TMP = extractu(TMP,#2,#SR_ROUND_OFF)
- A = #0
- }
- {
- p0 = cmp.eq(TMP,#2)
- if (p0.new) AH = ##0x80000000
- jumpr r31
- }
- .Linf_add:
- // adding infinities is only OK if they are equal
- {
- p0 = !cmp.eq(AH,BH) // Do they have different signs
- p0 = dfclass(B,#8) // And is B also infinite?
- if (!p0.new) jumpr:t r31 // If not, just a normal inf
- }
- {
- BL = ##0x7f800001 // sNAN
- }
- {
- A = convert_sf2df(BL) // trigger invalid, set NaN
- jumpr r31
- }
- END(__hexagon_adddf3)
|