floatundixf.S 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  2. // See https://llvm.org/LICENSE.txt for license information.
  3. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  4. #include "../assembly.h"
  5. // xf_float __floatundixf(du_int a);
  6. #ifdef __x86_64__
  7. CONST_SECTION
  8. .balign 16
  9. twop64:
  10. .quad 0x43f0000000000000
  11. #define REL_ADDR(_a) (_a)(%rip)
  12. .text
  13. .balign 4
  14. DEFINE_COMPILERRT_FUNCTION(__floatundixf)
  15. movq %rdi, -8(%rsp)
  16. fildq -8(%rsp)
  17. test %rdi, %rdi
  18. js 1f
  19. ret
  20. 1: faddl REL_ADDR(twop64)
  21. ret
  22. END_COMPILERRT_FUNCTION(__floatundixf)
  23. #endif // __x86_64__
  24. /* Branch-free implementation is ever so slightly slower, but more beautiful.
  25. It is likely superior for inlining, so I kept it around for future reference.
  26. #ifdef __x86_64__
  27. CONST_SECTION
  28. .balign 4
  29. twop52:
  30. .quad 0x4330000000000000
  31. twop84_plus_twop52_neg:
  32. .quad 0xc530000000100000
  33. twop84:
  34. .quad 0x4530000000000000
  35. #define REL_ADDR(_a) (_a)(%rip)
  36. .text
  37. .balign 4
  38. DEFINE_COMPILERRT_FUNCTION(__floatundixf)
  39. movl %edi, %esi // low 32 bits of input
  40. shrq $32, %rdi // hi 32 bits of input
  41. orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double)
  42. orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double)
  43. movq %rdi, -8(%rsp)
  44. movq %rsi, -16(%rsp)
  45. fldl REL_ADDR(twop84_plus_twop52_neg)
  46. faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs)
  47. faddl -16(%rsp) // hi + lo (as double extended)
  48. ret
  49. END_COMPILERRT_FUNCTION(__floatundixf)
  50. #endif // __x86_64__
  51. */
  52. NO_EXEC_STACK_DIRECTIVE