dffma.S 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. //===----------------------Hexagon builtin routine ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  9. #define END(TAG) .size TAG,.-TAG
  10. // Double Precision Multiply
  11. #define A r1:0
  12. #define AH r1
  13. #define AL r0
  14. #define B r3:2
  15. #define BH r3
  16. #define BL r2
  17. #define C r5:4
  18. #define CH r5
  19. #define CL r4
  20. #define BTMP r15:14
  21. #define BTMPH r15
  22. #define BTMPL r14
  23. #define ATMP r13:12
  24. #define ATMPH r13
  25. #define ATMPL r12
  26. #define CTMP r11:10
  27. #define CTMPH r11
  28. #define CTMPL r10
  29. #define PP_LL r9:8
  30. #define PP_LL_H r9
  31. #define PP_LL_L r8
  32. #define PP_ODD r7:6
  33. #define PP_ODD_H r7
  34. #define PP_ODD_L r6
  35. #define PP_HH r17:16
  36. #define PP_HH_H r17
  37. #define PP_HH_L r16
  38. #define EXPA r18
  39. #define EXPB r19
  40. #define EXPBA r19:18
  41. #define TMP r28
  42. #define P_TMP p0
  43. #define PROD_NEG p3
  44. #define EXACT p2
  45. #define SWAP p1
  46. #define MANTBITS 52
  47. #define HI_MANTBITS 20
  48. #define EXPBITS 11
  49. #define BIAS 1023
  50. #define STACKSPACE 32
  51. #define ADJUST 4
  52. #define FUDGE 7
  53. #define FUDGE2 3
  54. #ifndef SR_ROUND_OFF
  55. #define SR_ROUND_OFF 22
  56. #endif
  57. // First, classify for normal values, and abort if abnormal
  58. //
  59. // Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
  60. //
  61. // Since we know that the 2 MSBs of the H registers is zero, we should never carry
  62. // the partial products that involve the H registers
  63. //
  64. // Try to buy X slots, at the expense of latency if needed
  65. //
  66. // We will have PP_HH with the upper bits of the product, PP_LL with the lower
  67. // PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
  68. // PP_HH can have a minimum of 0x0100_0000_0000_0000
  69. //
  70. // 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
  71. //
  72. // We need to align CTMP.
  73. // If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
  74. // If CTMP << PP align CTMP and add 128 bits. Then compute sticky
  75. // If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation.
  76. //
  77. // Convert partial product and CTMP to 2's complement prior to addition
  78. //
  79. // After we add, we need to normalize into upper 64 bits, then compute sticky.
  80. .text
  81. .global __hexagon_fmadf4
  82. .type __hexagon_fmadf4,@function
  83. .global __hexagon_fmadf5
  84. .type __hexagon_fmadf5,@function
  85. Q6_ALIAS(fmadf5)
  86. .p2align 5
  87. __hexagon_fmadf4:
  88. __hexagon_fmadf5:
  89. .Lfma_begin:
  90. {
  91. P_TMP = dfclass(A,#2)
  92. P_TMP = dfclass(B,#2)
  93. ATMP = #0
  94. BTMP = #0
  95. }
  96. {
  97. ATMP = insert(A,#MANTBITS,#EXPBITS-3)
  98. BTMP = insert(B,#MANTBITS,#EXPBITS-3)
  99. PP_ODD_H = ##0x10000000
  100. allocframe(#STACKSPACE)
  101. }
  102. {
  103. PP_LL = mpyu(ATMPL,BTMPL)
  104. if (!P_TMP) jump .Lfma_abnormal_ab
  105. ATMPH = or(ATMPH,PP_ODD_H)
  106. BTMPH = or(BTMPH,PP_ODD_H)
  107. }
  108. {
  109. P_TMP = dfclass(C,#2)
  110. if (!P_TMP.new) jump:nt .Lfma_abnormal_c
  111. CTMP = combine(PP_ODD_H,#0)
  112. PP_ODD = combine(#0,PP_LL_H)
  113. }
  114. .Lfma_abnormal_c_restart:
  115. {
  116. PP_ODD += mpyu(BTMPL,ATMPH)
  117. CTMP = insert(C,#MANTBITS,#EXPBITS-3)
  118. memd(r29+#0) = PP_HH
  119. memd(r29+#8) = EXPBA
  120. }
  121. {
  122. PP_ODD += mpyu(ATMPL,BTMPH)
  123. EXPBA = neg(CTMP)
  124. P_TMP = cmp.gt(CH,#-1)
  125. TMP = xor(AH,BH)
  126. }
  127. {
  128. EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
  129. EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
  130. PP_HH = combine(#0,PP_ODD_H)
  131. if (!P_TMP) CTMP = EXPBA
  132. }
  133. {
  134. PP_HH += mpyu(ATMPH,BTMPH)
  135. PP_LL = combine(PP_ODD_L,PP_LL_L)
  136. #undef PP_ODD
  137. #undef PP_ODD_H
  138. #undef PP_ODD_L
  139. #undef ATMP
  140. #undef ATMPL
  141. #undef ATMPH
  142. #undef BTMP
  143. #undef BTMPL
  144. #undef BTMPH
  145. #define RIGHTLEFTSHIFT r13:12
  146. #define RIGHTSHIFT r13
  147. #define LEFTSHIFT r12
  148. EXPA = add(EXPA,EXPB)
  149. #undef EXPB
  150. #undef EXPBA
  151. #define EXPC r19
  152. #define EXPCA r19:18
  153. EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
  154. }
  155. // PP_HH:PP_LL now has product
  156. // CTMP is negated
  157. // EXPA,B,C are extracted
  158. // We need to negate PP
  159. // Since we will be adding with carry later, if we need to negate,
  160. // just invert all bits now, which we can do conditionally and in parallel
  161. #define PP_HH_TMP r15:14
  162. #define PP_LL_TMP r7:6
  163. {
  164. EXPA = add(EXPA,#-BIAS+(ADJUST))
  165. PROD_NEG = !cmp.gt(TMP,#-1)
  166. PP_LL_TMP = #0
  167. PP_HH_TMP = #0
  168. }
  169. {
  170. PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
  171. P_TMP = !cmp.gt(TMP,#-1)
  172. SWAP = cmp.gt(EXPC,EXPA) // If C >> PP
  173. if (SWAP.new) EXPCA = combine(EXPA,EXPC)
  174. }
  175. {
  176. PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
  177. if (P_TMP) PP_LL = PP_LL_TMP
  178. #undef PP_LL_TMP
  179. #define CTMP2 r7:6
  180. #define CTMP2H r7
  181. #define CTMP2L r6
  182. CTMP2 = #0
  183. EXPC = sub(EXPA,EXPC)
  184. }
  185. {
  186. if (P_TMP) PP_HH = PP_HH_TMP
  187. P_TMP = cmp.gt(EXPC,#63)
  188. if (SWAP) PP_LL = CTMP2
  189. if (SWAP) CTMP2 = PP_LL
  190. }
  191. #undef PP_HH_TMP
  192. //#define ONE r15:14
  193. //#define S_ONE r14
  194. #define ZERO r15:14
  195. #define S_ZERO r15
  196. #undef PROD_NEG
  197. #define P_CARRY p3
  198. {
  199. if (SWAP) PP_HH = CTMP // Swap C and PP
  200. if (SWAP) CTMP = PP_HH
  201. if (P_TMP) EXPC = add(EXPC,#-64)
  202. TMP = #63
  203. }
  204. {
  205. // If diff > 63, pre-shift-right by 64...
  206. if (P_TMP) CTMP2 = CTMP
  207. TMP = asr(CTMPH,#31)
  208. RIGHTSHIFT = min(EXPC,TMP)
  209. LEFTSHIFT = #0
  210. }
  211. #undef C
  212. #undef CH
  213. #undef CL
  214. #define STICKIES r5:4
  215. #define STICKIESH r5
  216. #define STICKIESL r4
  217. {
  218. if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64
  219. STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
  220. CTMP2 = lsr(CTMP2,RIGHTSHIFT)
  221. LEFTSHIFT = sub(#64,RIGHTSHIFT)
  222. }
  223. {
  224. ZERO = #0
  225. TMP = #-2
  226. CTMP2 |= lsl(CTMP,LEFTSHIFT)
  227. CTMP = asr(CTMP,RIGHTSHIFT)
  228. }
  229. {
  230. P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift
  231. if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
  232. #undef ZERO
  233. #define ONE r15:14
  234. #define S_ONE r14
  235. ONE = #1
  236. STICKIES = #0
  237. }
  238. {
  239. PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky
  240. }
  241. {
  242. PP_HH = add(CTMP,PP_HH,P_CARRY):carry
  243. TMP = #62
  244. }
  245. // PP_HH:PP_LL now holds the sum
  246. // We may need to normalize left, up to ??? bits.
  247. //
  248. // I think that if we have massive cancellation, the range we normalize by
  249. // is still limited
  250. {
  251. LEFTSHIFT = add(clb(PP_HH),#-2)
  252. if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits?
  253. }
  254. // We had all sign bits, shift left by 62.
  255. {
  256. CTMP = extractu(PP_LL,#62,#2)
  257. PP_LL = asl(PP_LL,#62)
  258. EXPA = add(EXPA,#-62) // And adjust exponent of result
  259. }
  260. {
  261. PP_HH = insert(CTMP,#62,#0) // Then shift 63
  262. }
  263. {
  264. LEFTSHIFT = add(clb(PP_HH),#-2)
  265. }
  266. .falign
  267. 1:
  268. {
  269. CTMP = asl(PP_HH,LEFTSHIFT)
  270. STICKIES |= asl(PP_LL,LEFTSHIFT)
  271. RIGHTSHIFT = sub(#64,LEFTSHIFT)
  272. EXPA = sub(EXPA,LEFTSHIFT)
  273. }
  274. {
  275. CTMP |= lsr(PP_LL,RIGHTSHIFT)
  276. EXACT = cmp.gtu(ONE,STICKIES)
  277. TMP = #BIAS+BIAS-2
  278. }
  279. {
  280. if (!EXACT) CTMPL = or(CTMPL,S_ONE)
  281. // If EXPA is overflow/underflow, jump to ovf_unf
  282. P_TMP = !cmp.gt(EXPA,TMP)
  283. P_TMP = cmp.gt(EXPA,#1)
  284. if (!P_TMP.new) jump:nt .Lfma_ovf_unf
  285. }
  286. {
  287. // XXX: FIXME: should PP_HH for check of zero be CTMP?
  288. P_TMP = cmp.gtu(ONE,CTMP) // is result true zero?
  289. A = convert_d2df(CTMP)
  290. EXPA = add(EXPA,#-BIAS-60)
  291. PP_HH = memd(r29+#0)
  292. }
  293. {
  294. AH += asl(EXPA,#HI_MANTBITS)
  295. EXPCA = memd(r29+#8)
  296. if (!P_TMP) dealloc_return // not zero, return
  297. }
  298. .Ladd_yields_zero:
  299. // We had full cancellation. Return +/- zero (-0 when round-down)
  300. {
  301. TMP = USR
  302. A = #0
  303. }
  304. {
  305. TMP = extractu(TMP,#2,#SR_ROUND_OFF)
  306. PP_HH = memd(r29+#0)
  307. EXPCA = memd(r29+#8)
  308. }
  309. {
  310. p0 = cmp.eq(TMP,#2)
  311. if (p0.new) AH = ##0x80000000
  312. dealloc_return
  313. }
  314. #undef RIGHTLEFTSHIFT
  315. #undef RIGHTSHIFT
  316. #undef LEFTSHIFT
  317. #undef CTMP2
  318. #undef CTMP2H
  319. #undef CTMP2L
  320. .Lfma_ovf_unf:
  321. {
  322. p0 = cmp.gtu(ONE,CTMP)
  323. if (p0.new) jump:nt .Ladd_yields_zero
  324. }
  325. {
  326. A = convert_d2df(CTMP)
  327. EXPA = add(EXPA,#-BIAS-60)
  328. TMP = EXPA
  329. }
  330. #define NEW_EXPB r7
  331. #define NEW_EXPA r6
  332. {
  333. AH += asl(EXPA,#HI_MANTBITS)
  334. NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
  335. }
  336. {
  337. NEW_EXPA = add(EXPA,NEW_EXPB)
  338. PP_HH = memd(r29+#0)
  339. EXPCA = memd(r29+#8)
  340. #undef PP_HH
  341. #undef PP_HH_H
  342. #undef PP_HH_L
  343. #undef EXPCA
  344. #undef EXPC
  345. #undef EXPA
  346. #undef PP_LL
  347. #undef PP_LL_H
  348. #undef PP_LL_L
  349. #define EXPA r6
  350. #define EXPB r7
  351. #define EXPBA r7:6
  352. #define ATMP r9:8
  353. #define ATMPH r9
  354. #define ATMPL r8
  355. #undef NEW_EXPB
  356. #undef NEW_EXPA
  357. ATMP = abs(CTMP)
  358. }
  359. {
  360. p0 = cmp.gt(EXPA,##BIAS+BIAS)
  361. if (p0.new) jump:nt .Lfma_ovf
  362. }
  363. {
  364. p0 = cmp.gt(EXPA,#0)
  365. if (p0.new) jump:nt .Lpossible_unf
  366. }
  367. {
  368. // TMP has original EXPA.
  369. // ATMP is corresponding value
  370. // Normalize ATMP and shift right to correct location
  371. EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize
  372. EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize
  373. p3 = cmp.gt(CTMPH,#-1)
  374. }
  375. // Underflow
  376. // We know that the infinte range exponent should be EXPA
  377. // CTMP is 2's complement, ATMP is abs(CTMP)
  378. {
  379. EXPA = add(EXPA,EXPB) // how much to shift back right
  380. ATMP = asl(ATMP,EXPB) // shift left
  381. AH = USR
  382. TMP = #63
  383. }
  384. {
  385. EXPB = min(EXPA,TMP)
  386. EXPA = #0
  387. AL = #0x0030
  388. }
  389. {
  390. B = extractu(ATMP,EXPBA)
  391. ATMP = asr(ATMP,EXPB)
  392. }
  393. {
  394. p0 = cmp.gtu(ONE,B)
  395. if (!p0.new) ATMPL = or(ATMPL,S_ONE)
  396. ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
  397. }
  398. {
  399. CTMP = neg(ATMP)
  400. p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
  401. if (!p1.new) AH = or(AH,AL)
  402. B = #0
  403. }
  404. {
  405. if (p3) CTMP = ATMP
  406. USR = AH
  407. TMP = #-BIAS-(MANTBITS+FUDGE2)
  408. }
  409. {
  410. A = convert_d2df(CTMP)
  411. }
  412. {
  413. AH += asl(TMP,#HI_MANTBITS)
  414. dealloc_return
  415. }
  416. .Lpossible_unf:
  417. {
  418. TMP = ##0x7fefffff
  419. ATMP = abs(CTMP)
  420. }
  421. {
  422. p0 = cmp.eq(AL,#0)
  423. p0 = bitsclr(AH,TMP)
  424. if (!p0.new) dealloc_return:t
  425. TMP = #0x7fff
  426. }
  427. {
  428. p0 = bitsset(ATMPH,TMP)
  429. BH = USR
  430. BL = #0x0030
  431. }
  432. {
  433. if (p0) BH = or(BH,BL)
  434. }
  435. {
  436. USR = BH
  437. }
  438. {
  439. p0 = dfcmp.eq(A,A)
  440. dealloc_return
  441. }
  442. .Lfma_ovf:
  443. {
  444. TMP = USR
  445. CTMP = combine(##0x7fefffff,#-1)
  446. A = CTMP
  447. }
  448. {
  449. ATMP = combine(##0x7ff00000,#0)
  450. BH = extractu(TMP,#2,#SR_ROUND_OFF)
  451. TMP = or(TMP,#0x28)
  452. }
  453. {
  454. USR = TMP
  455. BH ^= lsr(AH,#31)
  456. BL = BH
  457. }
  458. {
  459. p0 = !cmp.eq(BL,#1)
  460. p0 = !cmp.eq(BH,#2)
  461. }
  462. {
  463. p0 = dfcmp.eq(ATMP,ATMP)
  464. if (p0.new) CTMP = ATMP
  465. }
  466. {
  467. A = insert(CTMP,#63,#0)
  468. dealloc_return
  469. }
  470. #undef CTMP
  471. #undef CTMPH
  472. #undef CTMPL
  473. #define BTMP r11:10
  474. #define BTMPH r11
  475. #define BTMPL r10
  476. #undef STICKIES
  477. #undef STICKIESH
  478. #undef STICKIESL
  479. #define C r5:4
  480. #define CH r5
  481. #define CL r4
  482. .Lfma_abnormal_ab:
  483. {
  484. ATMP = extractu(A,#63,#0)
  485. BTMP = extractu(B,#63,#0)
  486. deallocframe
  487. }
  488. {
  489. p3 = cmp.gtu(ATMP,BTMP)
  490. if (!p3.new) A = B // sort values
  491. if (!p3.new) B = A
  492. }
  493. {
  494. p0 = dfclass(A,#0x0f) // A NaN?
  495. if (!p0.new) jump:nt .Lnan
  496. if (!p3) ATMP = BTMP
  497. if (!p3) BTMP = ATMP
  498. }
  499. {
  500. p1 = dfclass(A,#0x08) // A is infinity
  501. p1 = dfclass(B,#0x0e) // B is nonzero
  502. }
  503. {
  504. p0 = dfclass(A,#0x08) // a is inf
  505. p0 = dfclass(B,#0x01) // b is zero
  506. }
  507. {
  508. if (p1) jump .Lab_inf
  509. p2 = dfclass(B,#0x01)
  510. }
  511. {
  512. if (p0) jump .Linvalid
  513. if (p2) jump .Lab_true_zero
  514. TMP = ##0x7c000000
  515. }
  516. // We are left with a normal or subnormal times a subnormal, A > B
  517. // If A and B are both very small, we will go to a single sticky bit; replace
  518. // A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
  519. // if A and B might multiply to something bigger, decrease A exp and increase B exp
  520. // and start over
  521. {
  522. p0 = bitsclr(AH,TMP)
  523. if (p0.new) jump:nt .Lfma_ab_tiny
  524. }
  525. {
  526. TMP = add(clb(BTMP),#-EXPBITS)
  527. }
  528. {
  529. BTMP = asl(BTMP,TMP)
  530. }
  531. {
  532. B = insert(BTMP,#63,#0)
  533. AH -= asl(TMP,#HI_MANTBITS)
  534. }
  535. jump .Lfma_begin
  536. .Lfma_ab_tiny:
  537. ATMP = combine(##0x00100000,#0)
  538. {
  539. A = insert(ATMP,#63,#0)
  540. B = insert(ATMP,#63,#0)
  541. }
  542. jump .Lfma_begin
  543. .Lab_inf:
  544. {
  545. B = lsr(B,#63)
  546. p0 = dfclass(C,#0x10)
  547. }
  548. {
  549. A ^= asl(B,#63)
  550. if (p0) jump .Lnan
  551. }
  552. {
  553. p1 = dfclass(C,#0x08)
  554. if (p1.new) jump:nt .Lfma_inf_plus_inf
  555. }
  556. // A*B is +/- inf, C is finite. Return A
  557. {
  558. jumpr r31
  559. }
  560. .falign
  561. .Lfma_inf_plus_inf:
  562. { // adding infinities of different signs is invalid
  563. p0 = dfcmp.eq(A,C)
  564. if (!p0.new) jump:nt .Linvalid
  565. }
  566. {
  567. jumpr r31
  568. }
  569. .Lnan:
  570. {
  571. p0 = dfclass(B,#0x10)
  572. p1 = dfclass(C,#0x10)
  573. if (!p0.new) B = A
  574. if (!p1.new) C = A
  575. }
  576. { // find sNaNs
  577. BH = convert_df2sf(B)
  578. BL = convert_df2sf(C)
  579. }
  580. {
  581. BH = convert_df2sf(A)
  582. A = #-1
  583. jumpr r31
  584. }
  585. .Linvalid:
  586. {
  587. TMP = ##0x7f800001 // sp snan
  588. }
  589. {
  590. A = convert_sf2df(TMP)
  591. jumpr r31
  592. }
  593. .Lab_true_zero:
  594. // B is zero, A is finite number
  595. {
  596. p0 = dfclass(C,#0x10)
  597. if (p0.new) jump:nt .Lnan
  598. if (p0.new) A = C
  599. }
  600. {
  601. p0 = dfcmp.eq(B,C) // is C also zero?
  602. AH = lsr(AH,#31) // get sign
  603. }
  604. {
  605. BH ^= asl(AH,#31) // form correctly signed zero in B
  606. if (!p0) A = C // If C is not zero, return C
  607. if (!p0) jumpr r31
  608. }
  609. // B has correctly signed zero, C is also zero
  610. .Lzero_plus_zero:
  611. {
  612. p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0
  613. if (p0.new) jumpr:t r31
  614. A = B
  615. }
  616. {
  617. TMP = USR
  618. }
  619. {
  620. TMP = extractu(TMP,#2,#SR_ROUND_OFF)
  621. A = #0
  622. }
  623. {
  624. p0 = cmp.eq(TMP,#2)
  625. if (p0.new) AH = ##0x80000000
  626. jumpr r31
  627. }
  628. #undef BTMP
  629. #undef BTMPH
  630. #undef BTMPL
  631. #define CTMP r11:10
  632. .falign
  633. .Lfma_abnormal_c:
  634. // We know that AB is normal * normal
  635. // C is not normal: zero, subnormal, inf, or NaN.
  636. {
  637. p0 = dfclass(C,#0x10) // is C NaN?
  638. if (p0.new) jump:nt .Lnan
  639. if (p0.new) A = C // move NaN to A
  640. deallocframe
  641. }
  642. {
  643. p0 = dfclass(C,#0x08) // is C inf?
  644. if (p0.new) A = C // return C
  645. if (p0.new) jumpr:nt r31
  646. }
  647. // zero or subnormal
  648. // If we have a zero, and we know AB is normal*normal, we can just call normal multiply
  649. {
  650. p0 = dfclass(C,#0x01) // is C zero?
  651. if (p0.new) jump:nt __hexagon_muldf3
  652. TMP = #1
  653. }
  654. // Left with: subnormal
  655. // Adjust C and jump back to restart
  656. {
  657. allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame
  658. CTMP = #0
  659. CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
  660. jump .Lfma_abnormal_c_restart
  661. }
  662. END(fma)