dfaddsub.S 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. //===----------------------Hexagon builtin routine ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Double Precision Multiply
  9. #define A r1:0
  10. #define AH r1
  11. #define AL r0
  12. #define B r3:2
  13. #define BH r3
  14. #define BL r2
  15. #define EXPA r4
  16. #define EXPB r5
  17. #define EXPB_A r5:4
  18. #define ZTMP r7:6
  19. #define ZTMPH r7
  20. #define ZTMPL r6
  21. #define ATMP r13:12
  22. #define ATMPH r13
  23. #define ATMPL r12
  24. #define BTMP r9:8
  25. #define BTMPH r9
  26. #define BTMPL r8
  27. #define ATMP2 r11:10
  28. #define ATMP2H r11
  29. #define ATMP2L r10
  30. #define EXPDIFF r15
  31. #define EXTRACTOFF r14
  32. #define EXTRACTAMT r15:14
  33. #define TMP r28
  34. #define MANTBITS 52
  35. #define HI_MANTBITS 20
  36. #define EXPBITS 11
  37. #define BIAS 1024
  38. #define MANTISSA_TO_INT_BIAS 52
  39. #define SR_BIT_INEXACT 5
  40. #ifndef SR_ROUND_OFF
  41. #define SR_ROUND_OFF 22
  42. #endif
  43. #define NORMAL p3
  44. #define BIGB p2
  45. #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  46. #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  47. #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  48. #define END(TAG) .size TAG,.-TAG
  49. .text
  50. .global __hexagon_adddf3
  51. .global __hexagon_subdf3
  52. .type __hexagon_adddf3, @function
  53. .type __hexagon_subdf3, @function
  54. Q6_ALIAS(adddf3)
  55. FAST_ALIAS(adddf3)
  56. FAST2_ALIAS(adddf3)
  57. Q6_ALIAS(subdf3)
  58. FAST_ALIAS(subdf3)
  59. FAST2_ALIAS(subdf3)
  60. .p2align 5
  61. __hexagon_adddf3:
  62. {
  63. EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
  64. EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
  65. ATMP = combine(##0x20000000,#0)
  66. }
  67. {
  68. NORMAL = dfclass(A,#2)
  69. NORMAL = dfclass(B,#2)
  70. BTMP = ATMP
  71. BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
  72. }
  73. {
  74. if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
  75. if (BIGB) A = B // if B >> A, swap A and B
  76. if (BIGB) B = A // If B >> A, swap A and B
  77. if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
  78. }
  79. {
  80. ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
  81. BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
  82. EXPDIFF = sub(EXPA,EXPB)
  83. ZTMP = combine(#62,#1)
  84. }
  85. #undef BIGB
  86. #undef NORMAL
  87. #define B_POS p3
  88. #define A_POS p2
  89. #define NO_STICKIES p1
  90. .Ladd_continue:
  91. {
  92. EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
  93. // will collapse to sticky bit
  94. ATMP2 = neg(ATMP)
  95. A_POS = cmp.gt(AH,#-1)
  96. EXTRACTOFF = #0
  97. }
  98. {
  99. if (!A_POS) ATMP = ATMP2
  100. ATMP2 = extractu(BTMP,EXTRACTAMT)
  101. BTMP = ASR(BTMP,EXPDIFF)
  102. #undef EXTRACTAMT
  103. #undef EXPDIFF
  104. #undef EXTRACTOFF
  105. #define ZERO r15:14
  106. ZERO = #0
  107. }
  108. {
  109. NO_STICKIES = cmp.eq(ATMP2,ZERO)
  110. if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
  111. EXPB = add(EXPA,#-BIAS-60)
  112. B_POS = cmp.gt(BH,#-1)
  113. }
  114. {
  115. ATMP = add(ATMP,BTMP) // ADD!!!
  116. ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
  117. ZTMP = combine(#54,##2045)
  118. }
  119. {
  120. p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
  121. p0 = !cmp.gtu(EXPA,ZTMPL)
  122. if (!p0.new) jump:nt .Ladd_ovf_unf
  123. if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
  124. }
  125. {
  126. A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
  127. p0 = cmp.eq(ATMPH,#0)
  128. p0 = cmp.eq(ATMPL,#0)
  129. if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
  130. }
  131. {
  132. AH += asl(EXPB,#HI_MANTBITS)
  133. jumpr r31
  134. }
  135. .falign
  136. __hexagon_subdf3:
  137. {
  138. BH = togglebit(BH,#31)
  139. jump __qdsp_adddf3
  140. }
  141. .falign
  142. .Ladd_zero:
  143. // True zero, full cancellation
  144. // +0 unless round towards negative infinity
  145. {
  146. TMP = USR
  147. A = #0
  148. BH = #1
  149. }
  150. {
  151. TMP = extractu(TMP,#2,#22)
  152. BH = asl(BH,#31)
  153. }
  154. {
  155. p0 = cmp.eq(TMP,#2)
  156. if (p0.new) AH = xor(AH,BH)
  157. jumpr r31
  158. }
  159. .falign
  160. .Ladd_ovf_unf:
  161. // Overflow or Denormal is possible
  162. // Good news: Underflow flag is not possible!
  163. // ATMP has 2's complement value
  164. //
  165. // EXPA has A's exponent, EXPB has EXPA-BIAS-60
  166. //
  167. // Convert, extract exponent, add adjustment.
  168. // If > 2046, overflow
  169. // If <= 0, denormal
  170. //
  171. // Note that we've not done our zero check yet, so do that too
  172. {
  173. A = convert_d2df(ATMP)
  174. p0 = cmp.eq(ATMPH,#0)
  175. p0 = cmp.eq(ATMPL,#0)
  176. if (p0.new) jump:nt .Ladd_zero
  177. }
  178. {
  179. TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
  180. AH += asl(EXPB,#HI_MANTBITS)
  181. }
  182. {
  183. EXPB = add(EXPB,TMP)
  184. B = combine(##0x00100000,#0)
  185. }
  186. {
  187. p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
  188. if (p0.new) jump:nt .Ladd_ovf
  189. }
  190. {
  191. p0 = cmp.gt(EXPB,#0)
  192. if (p0.new) jumpr:t r31
  193. TMP = sub(#1,EXPB)
  194. }
  195. {
  196. B = insert(A,#MANTBITS,#0)
  197. A = ATMP
  198. }
  199. {
  200. B = lsr(B,TMP)
  201. }
  202. {
  203. A = insert(B,#63,#0)
  204. jumpr r31
  205. }
  206. .falign
  207. .Ladd_ovf:
  208. // We get either max finite value or infinity. Either way, overflow+inexact
  209. {
  210. A = ATMP // 2's complement value
  211. TMP = USR
  212. ATMP = combine(##0x7fefffff,#-1) // positive max finite
  213. }
  214. {
  215. EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
  216. TMP = or(TMP,#0x28) // inexact + overflow
  217. BTMP = combine(##0x7ff00000,#0) // positive infinity
  218. }
  219. {
  220. USR = TMP
  221. EXPB ^= lsr(AH,#31) // Does sign match rounding?
  222. TMP = EXPB // unmodified rounding mode
  223. }
  224. {
  225. p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
  226. p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
  227. if (p0.new) ATMP = BTMP // we should get infinity
  228. }
  229. {
  230. A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
  231. }
  232. {
  233. p0 = dfcmp.eq(A,A)
  234. jumpr r31
  235. }
  236. .Ladd_abnormal:
  237. {
  238. ATMP = extractu(A,#63,#0) // strip off sign
  239. BTMP = extractu(B,#63,#0) // strip off sign
  240. }
  241. {
  242. p3 = cmp.gtu(ATMP,BTMP)
  243. if (!p3.new) A = B // sort values
  244. if (!p3.new) B = A // sort values
  245. }
  246. {
  247. // Any NaN --> NaN, possibly raise invalid if sNaN
  248. p0 = dfclass(A,#0x0f) // A not NaN?
  249. if (!p0.new) jump:nt .Linvalid_nan_add
  250. if (!p3) ATMP = BTMP
  251. if (!p3) BTMP = ATMP
  252. }
  253. {
  254. // Infinity + non-infinity number is infinity
  255. // Infinity + infinity --> inf or nan
  256. p1 = dfclass(A,#0x08) // A is infinity
  257. if (p1.new) jump:nt .Linf_add
  258. }
  259. {
  260. p2 = dfclass(B,#0x01) // B is zero
  261. if (p2.new) jump:nt .LB_zero // so return A or special 0+0
  262. ATMP = #0
  263. }
  264. // We are left with adding one or more subnormals
  265. {
  266. p0 = dfclass(A,#4)
  267. if (p0.new) jump:nt .Ladd_two_subnormal
  268. ATMP = combine(##0x20000000,#0)
  269. }
  270. {
  271. EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
  272. EXPB = #1
  273. // BTMP already ABS(B)
  274. BTMP = asl(BTMP,#EXPBITS-2)
  275. }
  276. #undef ZERO
  277. #define EXTRACTOFF r14
  278. #define EXPDIFF r15
  279. {
  280. ATMP = insert(A,#MANTBITS,#EXPBITS-2)
  281. EXPDIFF = sub(EXPA,EXPB)
  282. ZTMP = combine(#62,#1)
  283. jump .Ladd_continue
  284. }
  285. .Ladd_two_subnormal:
  286. {
  287. ATMP = extractu(A,#63,#0)
  288. BTMP = extractu(B,#63,#0)
  289. }
  290. {
  291. ATMP = neg(ATMP)
  292. BTMP = neg(BTMP)
  293. p0 = cmp.gt(AH,#-1)
  294. p1 = cmp.gt(BH,#-1)
  295. }
  296. {
  297. if (p0) ATMP = A
  298. if (p1) BTMP = B
  299. }
  300. {
  301. ATMP = add(ATMP,BTMP)
  302. }
  303. {
  304. BTMP = neg(ATMP)
  305. p0 = cmp.gt(ATMPH,#-1)
  306. B = #0
  307. }
  308. {
  309. if (!p0) A = BTMP
  310. if (p0) A = ATMP
  311. BH = ##0x80000000
  312. }
  313. {
  314. if (!p0) AH = or(AH,BH)
  315. p0 = dfcmp.eq(A,B)
  316. if (p0.new) jump:nt .Lzero_plus_zero
  317. }
  318. {
  319. jumpr r31
  320. }
  321. .Linvalid_nan_add:
  322. {
  323. TMP = convert_df2sf(A) // will generate invalid if sNaN
  324. p0 = dfclass(B,#0x0f) // if B is not NaN
  325. if (p0.new) B = A // make it whatever A is
  326. }
  327. {
  328. BL = convert_df2sf(B) // will generate invalid if sNaN
  329. A = #-1
  330. jumpr r31
  331. }
  332. .falign
  333. .LB_zero:
  334. {
  335. p0 = dfcmp.eq(ATMP,A) // is A also zero?
  336. if (!p0.new) jumpr:t r31 // If not, just return A
  337. }
  338. // 0 + 0 is special
  339. // if equal integral values, they have the same sign, which is fine for all rounding
  340. // modes.
  341. // If unequal in sign, we get +0 for all rounding modes except round down
  342. .Lzero_plus_zero:
  343. {
  344. p0 = cmp.eq(A,B)
  345. if (p0.new) jumpr:t r31
  346. }
  347. {
  348. TMP = USR
  349. }
  350. {
  351. TMP = extractu(TMP,#2,#SR_ROUND_OFF)
  352. A = #0
  353. }
  354. {
  355. p0 = cmp.eq(TMP,#2)
  356. if (p0.new) AH = ##0x80000000
  357. jumpr r31
  358. }
  359. .Linf_add:
  360. // adding infinities is only OK if they are equal
  361. {
  362. p0 = !cmp.eq(AH,BH) // Do they have different signs
  363. p0 = dfclass(B,#8) // And is B also infinite?
  364. if (!p0.new) jumpr:t r31 // If not, just a normal inf
  365. }
  366. {
  367. BL = ##0x7f800001 // sNAN
  368. }
  369. {
  370. A = convert_sf2df(BL) // trigger invalid, set NaN
  371. jumpr r31
  372. }
  373. END(__hexagon_adddf3)