dfdiv.S 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. //===----------------------Hexagon builtin routine ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Double Precision Divide
  9. #define A r1:0
  10. #define AH r1
  11. #define AL r0
  12. #define B r3:2
  13. #define BH r3
  14. #define BL r2
  15. #define Q r5:4
  16. #define QH r5
  17. #define QL r4
  18. #define PROD r7:6
  19. #define PRODHI r7
  20. #define PRODLO r6
  21. #define SFONE r8
  22. #define SFDEN r9
  23. #define SFERROR r10
  24. #define SFRECIP r11
  25. #define EXPBA r13:12
  26. #define EXPB r13
  27. #define EXPA r12
  28. #define REMSUB2 r15:14
  29. #define SIGN r28
  30. #define Q_POSITIVE p3
  31. #define NORMAL p2
  32. #define NO_OVF_UNF p1
  33. #define P_TMP p0
  34. #define RECIPEST_SHIFT 3
  35. #define QADJ 61
  36. #define DFCLASS_NORMAL 0x02
  37. #define DFCLASS_NUMBER 0x0F
  38. #define DFCLASS_INFINITE 0x08
  39. #define DFCLASS_ZERO 0x01
  40. #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
  41. #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
  42. #define DF_MANTBITS 52
  43. #define DF_EXPBITS 11
  44. #define SF_MANTBITS 23
  45. #define SF_EXPBITS 8
  46. #define DF_BIAS 0x3ff
  47. #define SR_ROUND_OFF 22
  48. #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  49. #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  50. #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  51. #define END(TAG) .size TAG,.-TAG
  52. .text
  53. .global __hexagon_divdf3
  54. .type __hexagon_divdf3,@function
  55. Q6_ALIAS(divdf3)
  56. FAST_ALIAS(divdf3)
  57. FAST2_ALIAS(divdf3)
  58. .p2align 5
  59. __hexagon_divdf3:
  60. {
  61. NORMAL = dfclass(A,#DFCLASS_NORMAL)
  62. NORMAL = dfclass(B,#DFCLASS_NORMAL)
  63. EXPBA = combine(BH,AH)
  64. SIGN = xor(AH,BH)
  65. }
  66. #undef A
  67. #undef AH
  68. #undef AL
  69. #undef B
  70. #undef BH
  71. #undef BL
  72. #define REM r1:0
  73. #define REMHI r1
  74. #define REMLO r0
  75. #define DENOM r3:2
  76. #define DENOMHI r3
  77. #define DENOMLO r2
  78. {
  79. if (!NORMAL) jump .Ldiv_abnormal
  80. PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
  81. SFONE = ##0x3f800001
  82. }
  83. {
  84. SFDEN = or(SFONE,PRODLO)
  85. EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
  86. EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
  87. Q_POSITIVE = cmp.gt(SIGN,#-1)
  88. }
  89. #undef SIGN
  90. #define ONE r28
  91. .Ldenorm_continue:
  92. {
  93. SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
  94. SFERROR = and(SFONE,#-2)
  95. ONE = #1
  96. EXPA = sub(EXPA,EXPB)
  97. }
  98. #undef EXPB
  99. #define RECIPEST r13
  100. {
  101. SFERROR -= sfmpy(SFRECIP,SFDEN):lib
  102. REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
  103. RECIPEST = ##0x00800000 << RECIPEST_SHIFT
  104. }
  105. {
  106. SFRECIP += sfmpy(SFRECIP,SFERROR):lib
  107. DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
  108. SFERROR = and(SFONE,#-2)
  109. }
  110. {
  111. SFERROR -= sfmpy(SFRECIP,SFDEN):lib
  112. QH = #-DF_BIAS+1
  113. QL = #DF_BIAS-1
  114. }
  115. {
  116. SFRECIP += sfmpy(SFRECIP,SFERROR):lib
  117. NO_OVF_UNF = cmp.gt(EXPA,QH)
  118. NO_OVF_UNF = !cmp.gt(EXPA,QL)
  119. }
  120. {
  121. RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
  122. Q = #0
  123. EXPA = add(EXPA,#-QADJ)
  124. }
  125. #undef SFERROR
  126. #undef SFRECIP
  127. #define TMP r10
  128. #define TMP1 r11
  129. {
  130. RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
  131. }
  132. #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
  133. { \
  134. PROD = mpyu(RECIPEST,REMHI); \
  135. REM = asl(REM,# ## ( REMSHIFT )); \
  136. }; \
  137. { \
  138. PRODLO = # ## 0; \
  139. REM -= mpyu(PRODHI,DENOMLO); \
  140. REMSUB2 = mpyu(PRODHI,DENOMHI); \
  141. }; \
  142. { \
  143. Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
  144. REM -= asl(REMSUB2, # ## 32); \
  145. EXTRA \
  146. }
  147. DIV_ITER1B(ASL,14,15,)
  148. DIV_ITER1B(ASR,1,15,)
  149. DIV_ITER1B(ASR,16,15,)
  150. DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
  151. #undef REMSUB2
  152. #define TMPPAIR r15:14
  153. #define TMPPAIRHI r15
  154. #define TMPPAIRLO r14
  155. #undef RECIPEST
  156. #define EXPB r13
  157. {
  158. // compare or sub with carry
  159. TMPPAIR = sub(REM,DENOM)
  160. P_TMP = cmp.gtu(DENOM,REM)
  161. // set up amt to add to q
  162. if (!P_TMP.new) PRODLO = #2
  163. }
  164. {
  165. Q = add(Q,PROD)
  166. if (!P_TMP) REM = TMPPAIR
  167. TMPPAIR = #0
  168. }
  169. {
  170. P_TMP = cmp.eq(REM,TMPPAIR)
  171. if (!P_TMP.new) QL = or(QL,ONE)
  172. }
  173. {
  174. PROD = neg(Q)
  175. }
  176. {
  177. if (!Q_POSITIVE) Q = PROD
  178. }
  179. #undef REM
  180. #undef REMHI
  181. #undef REMLO
  182. #undef DENOM
  183. #undef DENOMLO
  184. #undef DENOMHI
  185. #define A r1:0
  186. #define AH r1
  187. #define AL r0
  188. #define B r3:2
  189. #define BH r3
  190. #define BL r2
  191. {
  192. A = convert_d2df(Q)
  193. if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
  194. }
  195. {
  196. AH += asl(EXPA,#DF_MANTBITS-32)
  197. jumpr r31
  198. }
  199. .Ldiv_ovf_unf:
  200. {
  201. AH += asl(EXPA,#DF_MANTBITS-32)
  202. EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
  203. }
  204. {
  205. PROD = abs(Q)
  206. EXPA = add(EXPA,EXPB)
  207. }
  208. {
  209. P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
  210. if (P_TMP.new) jump:nt .Ldiv_ovf
  211. }
  212. {
  213. P_TMP = cmp.gt(EXPA,#0)
  214. if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
  215. }
  216. // Underflow
  217. // We know what the infinite range exponent should be (EXPA)
  218. // Q is 2's complement, PROD is abs(Q)
  219. // Normalize Q, shift right, add a high bit, convert, change exponent
  220. #define FUDGE1 7 // how much to shift right
  221. #define FUDGE2 4 // how many guard/round to keep at lsbs
  222. {
  223. EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
  224. EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
  225. TMP = USR
  226. TMP1 = #63
  227. }
  228. {
  229. EXPB = min(EXPA,TMP1)
  230. TMP1 = or(TMP,#0x030)
  231. PROD = asl(PROD,EXPB)
  232. EXPA = #0
  233. }
  234. {
  235. TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
  236. PROD = lsr(PROD,EXPB) // shift out bits
  237. B = #1
  238. }
  239. {
  240. P_TMP = cmp.gtu(B,TMPPAIR)
  241. if (!P_TMP.new) PRODLO = or(BL,PRODLO)
  242. PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
  243. }
  244. {
  245. Q = neg(PROD)
  246. P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
  247. if (!P_TMP.new) TMP = TMP1
  248. }
  249. {
  250. USR = TMP
  251. if (Q_POSITIVE) Q = PROD
  252. TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
  253. }
  254. {
  255. A = convert_d2df(Q)
  256. }
  257. {
  258. AH += asl(TMP,#DF_MANTBITS-32)
  259. jumpr r31
  260. }
  261. .Lpossible_unf:
  262. // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal
  263. // The answer is correct, but we need to raise Underflow
  264. {
  265. B = extractu(A,#63,#0)
  266. TMPPAIR = combine(##0x00100000,#0) // min normal
  267. TMP = #0x7FFF
  268. }
  269. {
  270. P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
  271. P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
  272. }
  273. #if (__HEXAGON_ARCH__ == 60)
  274. TMP = USR // If not, just return
  275. if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
  276. // Note that inexact is already set...
  277. #else
  278. {
  279. if (!P_TMP) jumpr r31 // If not, just return
  280. TMP = USR // Else, we want to set Unf+Inexact
  281. } // Note that inexact is already set...
  282. #endif
  283. {
  284. TMP = or(TMP,#0x30)
  285. }
  286. {
  287. USR = TMP
  288. }
  289. {
  290. p0 = dfcmp.eq(A,A)
  291. jumpr r31
  292. }
  293. .Ldiv_ovf:
  294. // Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
  295. {
  296. TMP = USR
  297. B = combine(##0x7fefffff,#-1)
  298. AH = mux(Q_POSITIVE,#0,#-1)
  299. }
  300. {
  301. PROD = combine(##0x7ff00000,#0)
  302. QH = extractu(TMP,#2,#SR_ROUND_OFF)
  303. TMP = or(TMP,#0x28)
  304. }
  305. {
  306. USR = TMP
  307. QH ^= lsr(AH,#31)
  308. QL = QH
  309. }
  310. {
  311. p0 = !cmp.eq(QL,#1) // if not round-to-zero
  312. p0 = !cmp.eq(QH,#2) // and not rounding the other way
  313. if (p0.new) B = PROD // go to inf
  314. p0 = dfcmp.eq(B,B) // get exceptions
  315. }
  316. {
  317. A = insert(B,#63,#0)
  318. jumpr r31
  319. }
  320. #undef ONE
  321. #define SIGN r28
  322. #undef NORMAL
  323. #undef NO_OVF_UNF
  324. #define P_INF p1
  325. #define P_ZERO p2
  326. .Ldiv_abnormal:
  327. {
  328. P_TMP = dfclass(A,#DFCLASS_NUMBER)
  329. P_TMP = dfclass(B,#DFCLASS_NUMBER)
  330. Q_POSITIVE = cmp.gt(SIGN,#-1)
  331. }
  332. {
  333. P_INF = dfclass(A,#DFCLASS_INFINITE)
  334. P_INF = dfclass(B,#DFCLASS_INFINITE)
  335. }
  336. {
  337. P_ZERO = dfclass(A,#DFCLASS_ZERO)
  338. P_ZERO = dfclass(B,#DFCLASS_ZERO)
  339. }
  340. {
  341. if (!P_TMP) jump .Ldiv_nan
  342. if (P_INF) jump .Ldiv_invalid
  343. }
  344. {
  345. if (P_ZERO) jump .Ldiv_invalid
  346. }
  347. {
  348. P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
  349. P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
  350. }
  351. {
  352. P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
  353. P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
  354. }
  355. {
  356. if (!P_ZERO) jump .Ldiv_zero_result
  357. if (!P_INF) jump .Ldiv_inf_result
  358. }
  359. // Now we've narrowed it down to (de)normal / (de)normal
  360. // Set up A/EXPA B/EXPB and go back
  361. #undef P_ZERO
  362. #undef P_INF
  363. #define P_TMP2 p1
  364. {
  365. P_TMP = dfclass(A,#DFCLASS_NORMAL)
  366. P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
  367. TMP = ##0x00100000
  368. }
  369. {
  370. EXPBA = combine(BH,AH)
  371. AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
  372. BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
  373. }
  374. {
  375. if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
  376. if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
  377. }
  378. {
  379. QH = add(clb(A),#-DF_EXPBITS)
  380. QL = add(clb(B),#-DF_EXPBITS)
  381. TMP = #1
  382. }
  383. {
  384. EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
  385. EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
  386. }
  387. {
  388. A = asl(A,QH)
  389. B = asl(B,QL)
  390. if (!P_TMP) EXPA = sub(TMP,QH)
  391. if (!P_TMP2) EXPB = sub(TMP,QL)
  392. } // recreate values needed by resume coke
  393. {
  394. PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
  395. }
  396. {
  397. SFDEN = or(SFONE,PRODLO)
  398. jump .Ldenorm_continue
  399. }
  400. .Ldiv_zero_result:
  401. {
  402. AH = xor(AH,BH)
  403. B = #0
  404. }
  405. {
  406. A = insert(B,#63,#0)
  407. jumpr r31
  408. }
  409. .Ldiv_inf_result:
  410. {
  411. p2 = dfclass(B,#DFCLASS_ZERO)
  412. p2 = dfclass(A,#DFCLASS_NONINFINITE)
  413. }
  414. {
  415. TMP = USR
  416. if (!p2) jump 1f
  417. AH = xor(AH,BH)
  418. }
  419. {
  420. TMP = or(TMP,#0x04) // DBZ
  421. }
  422. {
  423. USR = TMP
  424. }
  425. 1:
  426. {
  427. B = combine(##0x7ff00000,#0)
  428. p0 = dfcmp.uo(B,B) // take possible exception
  429. }
  430. {
  431. A = insert(B,#63,#0)
  432. jumpr r31
  433. }
  434. .Ldiv_nan:
  435. {
  436. p0 = dfclass(A,#0x10)
  437. p1 = dfclass(B,#0x10)
  438. if (!p0.new) A = B
  439. if (!p1.new) B = A
  440. }
  441. {
  442. QH = convert_df2sf(A) // get possible invalid exceptions
  443. QL = convert_df2sf(B)
  444. }
  445. {
  446. A = #-1
  447. jumpr r31
  448. }
  449. .Ldiv_invalid:
  450. {
  451. TMP = ##0x7f800001
  452. }
  453. {
  454. A = convert_sf2df(TMP) // get invalid, get DF qNaN
  455. jumpr r31
  456. }
  457. END(__hexagon_divdf3)