sfsqrt_opt.S 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. //===----------------------Hexagon builtin routine ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. .macro FUNCTION_BEGIN name
  9. .text
  10. .p2align 5
  11. .globl \name
  12. .type \name, @function
  13. \name:
  14. .endm
  15. .macro FUNCTION_END name
  16. .size \name, . - \name
  17. .endm
  18. #define RIN r0
  19. #define S r0
  20. #define H r1
  21. #define D r2
  22. #define E r3
  23. #define HALF r4
  24. #define R r5
  25. #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
  26. #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
  27. #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
  28. FUNCTION_BEGIN __hexagon_sqrtf
  29. {
  30. E,p0 = sfinvsqrta(RIN)
  31. R = sffixupr(RIN)
  32. HALF = ##0x3f000000 // 0.5
  33. r1:0 = combine(#0,#0) // clear S/H
  34. }
  35. {
  36. S += sfmpy(E,R):lib // S0
  37. H += sfmpy(E,HALF):lib // H0
  38. D = HALF
  39. E = R
  40. }
  41. {
  42. D -= sfmpy(S,H):lib // d0
  43. p1 = sfclass(R,#1) // is zero?
  44. //E -= sfmpy(S,S):lib // e0
  45. }
  46. {
  47. S += sfmpy(S,D):lib // S1
  48. H += sfmpy(H,D):lib // H1
  49. D = HALF
  50. E = R
  51. }
  52. {
  53. D -= sfmpy(S,H):lib // d0
  54. E -= sfmpy(S,S):lib // e0
  55. }
  56. {
  57. S += sfmpy(H,E):lib // S2
  58. H += sfmpy(H,D):lib // H2
  59. D = HALF
  60. E = R
  61. }
  62. {
  63. //D -= sfmpy(S,H):lib // d2
  64. E -= sfmpy(S,S):lib // e2
  65. if (p1) r0 = or(r0,R) // sqrt(-0.0) = -0.0
  66. }
  67. {
  68. S += sfmpy(H,E,p0):scale // S3
  69. jumpr r31
  70. }
  71. FUNCTION_END __hexagon_sqrtf
  72. Q6_ALIAS(sqrtf)
  73. FAST_ALIAS(sqrtf)
  74. FAST2_ALIAS(sqrtf)