fastmath2_dlib_asm.S 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. //===----------------------Hexagon builtin routine ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /* ==================================================================== */
  9. /* FUNCTIONS Optimized double floating point operators */
  10. /* ==================================================================== */
  11. /* c = dadd_asm(a, b) */
  12. /* ==================================================================== *
  13. fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) {
  14. fast2_QDOUBLE c;
  15. lint manta = a & MANTMASK;
  16. int expa = Q6_R_sxth_R(a) ;
  17. lint mantb = b & MANTMASK;
  18. int expb = Q6_R_sxth_R(b) ;
  19. int exp, expdiff, j, k, hi, lo, cn;
  20. lint mant;
  21. expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
  22. expdiff = Q6_R_sxth_R(expdiff) ;
  23. if (expdiff > 63) { expdiff = 62;}
  24. if (expa > expb) {
  25. exp = expa + 1;
  26. expa = 1;
  27. expb = expdiff + 1;
  28. } else {
  29. exp = expb + 1;
  30. expb = 1;
  31. expa = expdiff + 1;
  32. }
  33. mant = (manta>>expa) + (mantb>>expb);
  34. hi = (int) (mant>>32);
  35. lo = (int) (mant);
  36. k = Q6_R_normamt_R(hi);
  37. if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
  38. mant = (mant << k);
  39. cn = (mant == 0x8000000000000000LL);
  40. exp = exp - k + cn;
  41. if (mant == 0 || mant == -1) exp = 0x8001;
  42. c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
  43. return(c);
  44. }
  45. * ==================================================================== */
  46. .text
  47. .global fast2_dadd_asm
  48. .type fast2_dadd_asm, @function
  49. fast2_dadd_asm:
  50. #define manta R0
  51. #define mantexpa R1:0
  52. #define lmanta R1:0
  53. #define mantb R2
  54. #define mantexpb R3:2
  55. #define lmantb R3:2
  56. #define expa R4
  57. #define expb R5
  58. #define mantexpd R7:6
  59. #define expd R6
  60. #define exp R8
  61. #define c63 R9
  62. #define lmant R1:0
  63. #define manth R1
  64. #define mantl R0
  65. #define minmin R11:10 // exactly 0x000000000000008001LL
  66. #define minminl R10
  67. #define k R4
  68. #define ce P0
  69. .falign
  70. {
  71. mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
  72. c63 = #62
  73. expa = SXTH(manta)
  74. expb = SXTH(mantb)
  75. } {
  76. expd = SXTH(expd)
  77. ce = CMP.GT(expa, expb);
  78. if ( ce.new) exp = add(expa, #1)
  79. if (!ce.new) exp = add(expb, #1)
  80. } {
  81. if ( ce) expa = #1
  82. if (!ce) expb = #1
  83. manta.L = #0
  84. expd = MIN(expd, c63)
  85. } {
  86. if (!ce) expa = add(expd, #1)
  87. if ( ce) expb = add(expd, #1)
  88. mantb.L = #0
  89. minmin = #0
  90. } {
  91. lmanta = ASR(lmanta, expa)
  92. lmantb = ASR(lmantb, expb)
  93. } {
  94. lmant = add(lmanta, lmantb)
  95. minminl.L = #0x8001
  96. } {
  97. k = clb(lmant)
  98. c63 = #58
  99. } {
  100. k = add(k, #-1)
  101. p0 = cmp.gt(k, c63)
  102. } {
  103. mantexpa = ASL(lmant, k)
  104. exp = SUB(exp, k)
  105. if(p0) jump .Ldenorma
  106. } {
  107. manta = insert(exp, #16, #0)
  108. jumpr r31
  109. }
  110. .Ldenorma:
  111. {
  112. mantexpa = minmin
  113. jumpr r31
  114. }
  115. /* =================================================================== *
  116. fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) {
  117. fast2_QDOUBLE c;
  118. lint manta = a & MANTMASK;
  119. int expa = Q6_R_sxth_R(a) ;
  120. lint mantb = b & MANTMASK;
  121. int expb = Q6_R_sxth_R(b) ;
  122. int exp, expdiff, j, k;
  123. lint mant;
  124. expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
  125. expdiff = Q6_R_sxth_R(expdiff) ;
  126. if (expdiff > 63) { expdiff = 62;}
  127. if (expa > expb) {
  128. exp = expa + 1;
  129. expa = 1;
  130. expb = expdiff + 1;
  131. } else {
  132. exp = expb + 1;
  133. expb = 1;
  134. expa = expdiff + 1;
  135. }
  136. mant = (manta>>expa) - (mantb>>expb);
  137. k = Q6_R_clb_P(mant)-1;
  138. mant = (mant << k);
  139. exp = exp - k;
  140. if (mant == 0 || mant == -1) exp = 0x8001;
  141. c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
  142. return(c);
  143. }
  144. * ==================================================================== */
  145. .text
  146. .global fast2_dsub_asm
  147. .type fast2_dsub_asm, @function
  148. fast2_dsub_asm:
  149. #define manta R0
  150. #define mantexpa R1:0
  151. #define lmanta R1:0
  152. #define mantb R2
  153. #define mantexpb R3:2
  154. #define lmantb R3:2
  155. #define expa R4
  156. #define expb R5
  157. #define mantexpd R7:6
  158. #define expd R6
  159. #define exp R8
  160. #define c63 R9
  161. #define lmant R1:0
  162. #define manth R1
  163. #define mantl R0
  164. #define minmin R11:10 // exactly 0x000000000000008001LL
  165. #define minminl R10
  166. #define k R4
  167. #define ce P0
  168. .falign
  169. {
  170. mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
  171. c63 = #62
  172. expa = SXTH(manta)
  173. expb = SXTH(mantb)
  174. } {
  175. expd = SXTH(expd)
  176. ce = CMP.GT(expa, expb);
  177. if ( ce.new) exp = add(expa, #1)
  178. if (!ce.new) exp = add(expb, #1)
  179. } {
  180. if ( ce) expa = #1
  181. if (!ce) expb = #1
  182. manta.L = #0
  183. expd = MIN(expd, c63)
  184. } {
  185. if (!ce) expa = add(expd, #1)
  186. if ( ce) expb = add(expd, #1)
  187. mantb.L = #0
  188. minmin = #0
  189. } {
  190. lmanta = ASR(lmanta, expa)
  191. lmantb = ASR(lmantb, expb)
  192. } {
  193. lmant = sub(lmanta, lmantb)
  194. minminl.L = #0x8001
  195. } {
  196. k = clb(lmant)
  197. c63 = #58
  198. } {
  199. k = add(k, #-1)
  200. p0 = cmp.gt(k, c63)
  201. } {
  202. mantexpa = ASL(lmant, k)
  203. exp = SUB(exp, k)
  204. if(p0) jump .Ldenorm
  205. } {
  206. manta = insert(exp, #16, #0)
  207. jumpr r31
  208. }
  209. .Ldenorm:
  210. {
  211. mantexpa = minmin
  212. jumpr r31
  213. }
  214. /* ==================================================================== *
  215. fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) {
  216. fast2_QDOUBLE c;
  217. lint manta = a & MANTMASK;
  218. int expa = Q6_R_sxth_R(a) ;
  219. lint mantb = b & MANTMASK;
  220. int expb = Q6_R_sxth_R(b) ;
  221. int exp, k;
  222. lint mant;
  223. int hia, hib, hi, lo;
  224. unsigned int loa, lob;
  225. hia = (int)(a >> 32);
  226. loa = Q6_R_extractu_RII((int)manta, 31, 1);
  227. hib = (int)(b >> 32);
  228. lob = Q6_R_extractu_RII((int)mantb, 31, 1);
  229. mant = Q6_P_mpy_RR(hia, lob);
  230. mant = Q6_P_mpyacc_RR(mant,hib, loa);
  231. mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
  232. hi = (int) (mant>>32);
  233. k = Q6_R_normamt_R(hi);
  234. mant = mant << k;
  235. exp = expa + expb - k;
  236. if (mant == 0 || mant == -1) exp = 0x8001;
  237. c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
  238. return(c);
  239. }
  240. * ==================================================================== */
  241. .text
  242. .global fast2_dmpy_asm
  243. .type fast2_dmpy_asm, @function
  244. fast2_dmpy_asm:
  245. #define mantal R0
  246. #define mantah R1
  247. #define mantexpa R1:0
  248. #define mantbl R2
  249. #define mantbh R3
  250. #define mantexpb R3:2
  251. #define expa R4
  252. #define expb R5
  253. #define c8001 R12
  254. #define mantexpd R7:6
  255. #define mantdh R7
  256. #define exp R8
  257. #define lmantc R11:10
  258. #define kb R9
  259. #define guard R11
  260. #define mantal_ R12
  261. #define mantbl_ R13
  262. #define min R15:14
  263. #define minh R15
  264. .falign
  265. {
  266. mantbl_= lsr(mantbl, #16)
  267. expb = sxth(mantbl)
  268. expa = sxth(mantal)
  269. mantal_= lsr(mantal, #16)
  270. }
  271. {
  272. lmantc = mpy(mantah, mantbh)
  273. mantexpd = mpy(mantah, mantbl_)
  274. mantal.L = #0x0
  275. min = #0
  276. }
  277. {
  278. lmantc = add(lmantc, lmantc)
  279. mantexpd+= mpy(mantbh, mantal_)
  280. mantbl.L = #0x0
  281. minh.H = #0x8000
  282. }
  283. {
  284. mantexpd = asr(mantexpd, #15)
  285. c8001.L = #0x8001
  286. p1 = cmp.eq(mantexpa, mantexpb)
  287. }
  288. {
  289. mantexpd = add(mantexpd, lmantc)
  290. exp = add(expa, expb)
  291. p2 = cmp.eq(mantexpa, min)
  292. }
  293. {
  294. kb = clb(mantexpd)
  295. mantexpb = abs(mantexpd)
  296. guard = #58
  297. }
  298. {
  299. p1 = and(p1, p2)
  300. exp = sub(exp, kb)
  301. kb = add(kb, #-1)
  302. p0 = cmp.gt(kb, guard)
  303. }
  304. {
  305. exp = add(exp, #1)
  306. mantexpa = asl(mantexpd, kb)
  307. if(p1) jump .Lsat //rarely happens
  308. }
  309. {
  310. mantal = insert(exp,#16, #0)
  311. if(!p0) jumpr r31
  312. }
  313. {
  314. mantal = insert(c8001,#16, #0)
  315. jumpr r31
  316. }
  317. .Lsat:
  318. {
  319. mantexpa = #-1
  320. }
  321. {
  322. mantexpa = lsr(mantexpa, #1)
  323. }
  324. {
  325. mantal = insert(exp,#16, #0)
  326. jumpr r31
  327. }
  328. /* ==================================================================== *
  329. int fast2_qd2f(fast2_QDOUBLE a) {
  330. int exp;
  331. long long int manta;
  332. int ic, rnd, mantb;
  333. manta = a>>32;
  334. exp = Q6_R_sxth_R(a) ;
  335. ic = 0x80000000 & manta;
  336. manta = Q6_R_abs_R_sat(manta);
  337. mantb = (manta + rnd)>>7;
  338. rnd = 0x40
  339. exp = (exp + 126);
  340. if((manta & 0xff) == rnd) rnd = 0x00;
  341. if((manta & 0x7fffffc0) == 0x7fffffc0) {
  342. manta = 0x0; exp++;
  343. } else {
  344. manta= mantb & 0x007fffff;
  345. }
  346. exp = (exp << 23) & 0x7fffffc0;
  347. ic = Q6_R_addacc_RR(ic, exp, manta);
  348. return (ic);
  349. }
  350. * ==================================================================== */
  351. .text
  352. .global fast2_qd2f_asm
  353. .type fast2_qd2f_asm, @function
  354. fast2_qd2f_asm:
  355. #define mantah R1
  356. #define mantal R0
  357. #define cff R0
  358. #define mant R3
  359. #define expo R4
  360. #define rnd R5
  361. #define mask R6
  362. #define c07f R7
  363. #define c80 R0
  364. #define mantb R2
  365. #define ic R0
  366. .falign
  367. {
  368. mant = abs(mantah):sat
  369. expo = sxth(mantal)
  370. rnd = #0x40
  371. mask.L = #0xffc0
  372. }
  373. {
  374. cff = extractu(mant, #8, #0)
  375. p2 = cmp.gt(expo, #126)
  376. p3 = cmp.ge(expo, #-126)
  377. mask.H = #0x7fff
  378. }
  379. {
  380. p1 = cmp.eq(cff,#0x40)
  381. if(p1.new) rnd = #0
  382. expo = add(expo, #126)
  383. if(!p3) jump .Lmin
  384. }
  385. {
  386. p0 = bitsset(mant, mask)
  387. c80.L = #0x0000
  388. mantb = add(mant, rnd)
  389. c07f = lsr(mask, #8)
  390. }
  391. {
  392. if(p0) expo = add(expo, #1)
  393. if(p0) mant = #0
  394. mantb = lsr(mantb, #7)
  395. c80.H = #0x8000
  396. }
  397. {
  398. ic = and(c80, mantah)
  399. mask &= asl(expo, #23)
  400. if(!p0) mant = and(mantb, c07f)
  401. if(p2) jump .Lmax
  402. }
  403. {
  404. ic += add(mask, mant)
  405. jumpr r31
  406. }
  407. .Lmax:
  408. {
  409. ic.L = #0xffff;
  410. }
  411. {
  412. ic.H = #0x7f7f;
  413. jumpr r31
  414. }
  415. .Lmin:
  416. {
  417. ic = #0x0
  418. jumpr r31
  419. }
  420. /* ==================================================================== *
  421. fast2_QDOUBLE fast2_f2qd(int ia) {
  422. lint exp;
  423. lint mant;
  424. fast2_QDOUBLE c;
  425. mant = ((ia << 7) | 0x40000000)&0x7fffff80 ;
  426. if (ia & 0x80000000) mant = -mant;
  427. exp = ((ia >> 23) & 0xFFLL) - 126;
  428. c = (mant<<32) | Q6_R_zxth_R(exp);;
  429. return(c);
  430. }
  431. * ==================================================================== */
  432. .text
  433. .global fast2_f2qd_asm
  434. .type fast2_f2qd_asm, @function
  435. fast2_f2qd_asm:
  436. #define ia R0
  437. #define mag R3
  438. #define mantr R1
  439. #define expr R0
  440. #define zero R2
  441. #define maxneg R5:4
  442. #define maxnegl R4
  443. .falign
  444. {
  445. mantr = asl(ia, #7)
  446. p0 = tstbit(ia, #31)
  447. maxneg = #0
  448. mag = add(ia,ia)
  449. }
  450. {
  451. mantr = setbit(mantr, #30)
  452. expr= extractu(ia,#8,#23)
  453. maxnegl.L = #0x8001
  454. p1 = cmp.eq(mag, #0)
  455. }
  456. {
  457. mantr= extractu(mantr, #31, #0)
  458. expr= add(expr, #-126)
  459. zero = #0
  460. if(p1) jump .Lminqd
  461. }
  462. {
  463. expr = zxth(expr)
  464. if(p0) mantr= sub(zero, mantr)
  465. jumpr r31
  466. }
  467. .Lminqd:
  468. {
  469. R1:0 = maxneg
  470. jumpr r31
  471. }