addsf3.S 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. //===-- addsf3.S - Adds two single precision floating pointer numbers-----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the __addsf3 (single precision floating pointer number
  10. // addition with the IEEE-754 default rounding (to nearest, ties to even)
  11. // function for the ARM Thumb1 ISA.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "../assembly.h"
  15. #define significandBits 23
  16. #define typeWidth 32
  17. .syntax unified
  18. .text
  19. .thumb
  20. .p2align 2
  21. DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)
  22. DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)
  23. push {r4, r5, r6, r7, lr}
  24. // Get the absolute value of a and b.
  25. lsls r2, r0, #1
  26. lsls r3, r1, #1
  27. lsrs r2, r2, #1 // aAbs
  28. beq LOCAL_LABEL(a_zero_nan_inf)
  29. lsrs r3, r3, #1 // bAbs
  30. beq LOCAL_LABEL(zero_nan_inf)
  31. // Detect if a or b is infinity or Nan.
  32. lsrs r6, r2, #(significandBits)
  33. lsrs r7, r3, #(significandBits)
  34. cmp r6, #0xFF
  35. beq LOCAL_LABEL(zero_nan_inf)
  36. cmp r7, #0xFF
  37. beq LOCAL_LABEL(zero_nan_inf)
  38. // Swap Rep and Abs so that a and aAbs has the larger absolute value.
  39. cmp r2, r3
  40. bhs LOCAL_LABEL(no_swap)
  41. movs r4, r0
  42. movs r5, r2
  43. movs r0, r1
  44. movs r2, r3
  45. movs r1, r4
  46. movs r3, r5
  47. LOCAL_LABEL(no_swap):
  48. // Get the significands and shift them to give us round, guard and sticky.
  49. lsls r4, r0, #(typeWidth - significandBits)
  50. lsrs r4, r4, #(typeWidth - significandBits - 3) // aSignificand << 3
  51. lsls r5, r1, #(typeWidth - significandBits)
  52. lsrs r5, r5, #(typeWidth - significandBits - 3) // bSignificand << 3
  53. // Get the implicitBit.
  54. movs r6, #1
  55. lsls r6, r6, #(significandBits + 3)
  56. // Get aExponent and set implicit bit if necessary.
  57. lsrs r2, r2, #(significandBits)
  58. beq LOCAL_LABEL(a_done_implicit_bit)
  59. orrs r4, r6
  60. LOCAL_LABEL(a_done_implicit_bit):
  61. // Get bExponent and set implicit bit if necessary.
  62. lsrs r3, r3, #(significandBits)
  63. beq LOCAL_LABEL(b_done_implicit_bit)
  64. orrs r5, r6
  65. LOCAL_LABEL(b_done_implicit_bit):
  66. // Get the difference in exponents.
  67. subs r6, r2, r3
  68. beq LOCAL_LABEL(done_align)
  69. // If b is denormal, then a must be normal as align > 0, and we only need to
  70. // right shift bSignificand by (align - 1) bits.
  71. cmp r3, #0
  72. bne 1f
  73. subs r6, r6, #1
  74. 1:
  75. // No longer needs bExponent. r3 is dead here.
  76. // Set sticky bits of b: sticky = bSignificand << (typeWidth - align).
  77. movs r3, #(typeWidth)
  78. subs r3, r3, r6
  79. movs r7, r5
  80. lsls r7, r3
  81. beq 1f
  82. movs r7, #1
  83. 1:
  84. // bSignificand = bSignificand >> align | sticky;
  85. lsrs r5, r6
  86. orrs r5, r7
  87. bne LOCAL_LABEL(done_align)
  88. movs r5, #1 // sticky; b is known to be non-zero.
  89. LOCAL_LABEL(done_align):
  90. // isSubtraction = (aRep ^ bRep) >> 31;
  91. movs r7, r0
  92. eors r7, r1
  93. lsrs r7, #31
  94. bne LOCAL_LABEL(do_substraction)
  95. // Same sign, do Addition.
  96. // aSignificand += bSignificand;
  97. adds r4, r4, r5
  98. // Check carry bit.
  99. movs r6, #1
  100. lsls r6, r6, #(significandBits + 3 + 1)
  101. movs r7, r4
  102. ands r7, r6
  103. beq LOCAL_LABEL(form_result)
  104. // If the addition carried up, we need to right-shift the result and
  105. // adjust the exponent.
  106. movs r7, r4
  107. movs r6, #1
  108. ands r7, r6 // sticky = aSignificand & 1;
  109. lsrs r4, #1
  110. orrs r4, r7 // result Significand
  111. adds r2, #1 // result Exponent
  112. // If we have overflowed the type, return +/- infinity.
  113. cmp r2, 0xFF
  114. beq LOCAL_LABEL(ret_inf)
  115. LOCAL_LABEL(form_result):
  116. // Shift the sign, exponent and significand into place.
  117. lsrs r0, #(typeWidth - 1)
  118. lsls r0, #(typeWidth - 1) // Get Sign.
  119. lsls r2, #(significandBits)
  120. orrs r0, r2
  121. movs r1, r4
  122. lsls r4, #(typeWidth - significandBits - 3)
  123. lsrs r4, #(typeWidth - significandBits)
  124. orrs r0, r4
  125. // Final rounding. The result may overflow to infinity, but that is the
  126. // correct result in that case.
  127. // roundGuardSticky = aSignificand & 0x7;
  128. movs r2, #0x7
  129. ands r1, r2
  130. // if (roundGuardSticky > 0x4) result++;
  131. cmp r1, #0x4
  132. blt LOCAL_LABEL(done_round)
  133. beq 1f
  134. adds r0, #1
  135. pop {r4, r5, r6, r7, pc}
  136. 1:
  137. // if (roundGuardSticky == 0x4) result += result & 1;
  138. movs r1, r0
  139. lsrs r1, #1
  140. bcc LOCAL_LABEL(done_round)
  141. adds r0, r0, #1
  142. LOCAL_LABEL(done_round):
  143. pop {r4, r5, r6, r7, pc}
  144. LOCAL_LABEL(do_substraction):
  145. subs r4, r4, r5 // aSignificand -= bSignificand;
  146. beq LOCAL_LABEL(ret_zero)
  147. movs r6, r4
  148. cmp r2, 0
  149. beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.
  150. // If partial cancellation occured, we need to left-shift the result
  151. // and adjust the exponent:
  152. lsrs r6, r6, #(significandBits + 3)
  153. bne LOCAL_LABEL(form_result)
  154. push {r0, r1, r2, r3}
  155. movs r0, r4
  156. bl SYMBOL_NAME(__clzsi2)
  157. movs r5, r0
  158. pop {r0, r1, r2, r3}
  159. // shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
  160. subs r5, r5, #(typeWidth - significandBits - 3 - 1)
  161. // aSignificand <<= shift; aExponent -= shift;
  162. lsls r4, r5
  163. subs r2, r2, r5
  164. bgt LOCAL_LABEL(form_result)
  165. // Do normalization if aExponent <= 0.
  166. movs r6, #1
  167. subs r6, r6, r2 // 1 - aExponent;
  168. movs r2, #0 // aExponent = 0;
  169. movs r3, #(typeWidth) // bExponent is dead.
  170. subs r3, r3, r6
  171. movs r7, r4
  172. lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align))
  173. beq 1f
  174. movs r7, #1
  175. 1:
  176. lsrs r4, r6 // aSignificand >> shift
  177. orrs r4, r7
  178. b LOCAL_LABEL(form_result)
  179. LOCAL_LABEL(ret_zero):
  180. movs r0, #0
  181. pop {r4, r5, r6, r7, pc}
  182. LOCAL_LABEL(a_zero_nan_inf):
  183. lsrs r3, r3, #1
  184. LOCAL_LABEL(zero_nan_inf):
  185. // Here r2 has aAbs, r3 has bAbs
  186. movs r4, #0xFF
  187. lsls r4, r4, #(significandBits) // Make +inf.
  188. cmp r2, r4
  189. bhi LOCAL_LABEL(a_is_nan)
  190. cmp r3, r4
  191. bhi LOCAL_LABEL(b_is_nan)
  192. cmp r2, r4
  193. bne LOCAL_LABEL(a_is_rational)
  194. // aAbs is INF.
  195. eors r1, r0 // aRep ^ bRep.
  196. movs r6, #1
  197. lsls r6, r6, #(typeWidth - 1) // get sign mask.
  198. cmp r1, r6 // if they only differ on sign bit, it's -INF + INF
  199. beq LOCAL_LABEL(a_is_nan)
  200. pop {r4, r5, r6, r7, pc}
  201. LOCAL_LABEL(a_is_rational):
  202. cmp r3, r4
  203. bne LOCAL_LABEL(b_is_rational)
  204. movs r0, r1
  205. pop {r4, r5, r6, r7, pc}
  206. LOCAL_LABEL(b_is_rational):
  207. // either a or b or both are zero.
  208. adds r4, r2, r3
  209. beq LOCAL_LABEL(both_zero)
  210. cmp r2, #0 // is absA 0 ?
  211. beq LOCAL_LABEL(ret_b)
  212. pop {r4, r5, r6, r7, pc}
  213. LOCAL_LABEL(both_zero):
  214. ands r0, r1 // +0 + -0 = +0
  215. pop {r4, r5, r6, r7, pc}
  216. LOCAL_LABEL(ret_b):
  217. movs r0, r1
  218. LOCAL_LABEL(ret):
  219. pop {r4, r5, r6, r7, pc}
  220. LOCAL_LABEL(b_is_nan):
  221. movs r0, r1
  222. LOCAL_LABEL(a_is_nan):
  223. movs r1, #1
  224. lsls r1, r1, #(significandBits -1) // r1 is quiet bit.
  225. orrs r0, r1
  226. pop {r4, r5, r6, r7, pc}
  227. LOCAL_LABEL(ret_inf):
  228. movs r4, #0xFF
  229. lsls r4, r4, #(significandBits)
  230. orrs r0, r4
  231. lsrs r0, r0, #(significandBits)
  232. lsls r0, r0, #(significandBits)
  233. pop {r4, r5, r6, r7, pc}
  234. END_COMPILERRT_FUNCTION(__addsf3)
  235. NO_EXEC_STACK_DIRECTIVE