common_entry_exit_legacy.S 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. //===----------------------Hexagon builtin routine ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Functions that implement common sequences in function prologues and epilogues
  9. // used to save code size
  10. .macro FUNCTION_BEGIN name
  11. .text
  12. .globl \name
  13. .type \name, @function
  14. .falign
  15. \name:
  16. .endm
  17. .macro FUNCTION_END name
  18. .size \name, . - \name
  19. .endm
  20. .macro FALLTHROUGH_TAIL_CALL name0 name1
  21. .size \name0, . - \name0
  22. .globl \name1
  23. .type \name1, @function
  24. .falign
  25. \name1:
  26. .endm
  27. // Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
  28. // fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48.
  29. // The compiler knows that the __save_* functions clobber LR. No other
  30. // registers should be used without informing the compiler.
  31. // Since we can only issue one store per packet, we don't hurt performance by
  32. // simply jumping to the right point in this sequence of stores.
  33. FUNCTION_BEGIN __save_r27_through_r16
  34. memd(fp+#-48) = r17:16
  35. FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
  36. memd(fp+#-40) = r19:18
  37. FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
  38. memd(fp+#-32) = r21:20
  39. FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
  40. memd(fp+#-24) = r23:22
  41. FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
  42. memd(fp+#-16) = r25:24
  43. {
  44. memd(fp+#-8) = r27:26
  45. jumpr lr
  46. }
  47. FUNCTION_END __save_r27_through_r24
  48. // For each of the *_before_sibcall functions, jumpr lr is executed in parallel
  49. // with deallocframe. That way, the return gets the old value of lr, which is
  50. // where these functions need to return, and at the same time, lr gets the value
  51. // it needs going into the sibcall.
  52. FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
  53. {
  54. r21:20 = memd(fp+#-32)
  55. r23:22 = memd(fp+#-24)
  56. }
  57. FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
  58. {
  59. r25:24 = memd(fp+#-16)
  60. jump __restore_r27_through_r26_and_deallocframe_before_sibcall
  61. }
  62. FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
  63. FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
  64. r17:16 = memd(fp+#-48)
  65. FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
  66. {
  67. r19:18 = memd(fp+#-40)
  68. r21:20 = memd(fp+#-32)
  69. }
  70. FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
  71. {
  72. r23:22 = memd(fp+#-24)
  73. r25:24 = memd(fp+#-16)
  74. }
  75. FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
  76. {
  77. r27:26 = memd(fp+#-8)
  78. deallocframe
  79. jumpr lr
  80. }
  81. FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
  82. // Here we use the extra load bandwidth to restore LR early, allowing the return
  83. // to occur in parallel with the deallocframe.
  84. FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
  85. {
  86. r17:16 = memd(fp+#-48)
  87. r19:18 = memd(fp+#-40)
  88. }
  89. FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
  90. {
  91. r21:20 = memd(fp+#-32)
  92. r23:22 = memd(fp+#-24)
  93. }
  94. FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
  95. {
  96. lr = memw(fp+#4)
  97. r25:24 = memd(fp+#-16)
  98. }
  99. {
  100. r27:26 = memd(fp+#-8)
  101. deallocframe
  102. jumpr lr
  103. }
  104. FUNCTION_END __restore_r27_through_r24_and_deallocframe
  105. // Here the load bandwidth is maximized for all three functions.
  106. FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
  107. {
  108. r19:18 = memd(fp+#-40)
  109. r21:20 = memd(fp+#-32)
  110. }
  111. FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
  112. {
  113. r23:22 = memd(fp+#-24)
  114. r25:24 = memd(fp+#-16)
  115. }
  116. FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
  117. {
  118. r27:26 = memd(fp+#-8)
  119. deallocframe
  120. }
  121. jumpr lr
  122. FUNCTION_END __restore_r27_through_r26_and_deallocframe