mips_macro.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // MIPS common macros
  11. #ifndef WEBP_DSP_MIPS_MACRO_H_
  12. #define WEBP_DSP_MIPS_MACRO_H_
  13. #if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
  14. #define WORK_AROUND_GCC
  15. #endif
  16. #define STR(s) #s
  17. #define XSTR(s) STR(s)
  18. // O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0]
  19. // O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0]
  20. // O - output
  21. // I - input (macro doesn't change it)
  22. #define ADD_SUB_HALVES(O0, O1, \
  23. I0, I1) \
  24. "addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
  25. "subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t"
  26. // O - output
  27. // I - input (macro doesn't change it)
  28. // I[0/1] - offset in bytes
  29. #define LOAD_IN_X2(O0, O1, \
  30. I0, I1) \
  31. "lh %[" #O0 "], " #I0 "(%[in]) \n\t" \
  32. "lh %[" #O1 "], " #I1 "(%[in]) \n\t"
  33. // I0 - location
  34. // I1..I9 - offsets in bytes
  35. #define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \
  36. I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \
  37. "ulw %[" #O0 "], " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "]) \n\t" \
  38. "ulw %[" #O1 "], " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "]) \n\t" \
  39. "ulw %[" #O2 "], " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "]) \n\t" \
  40. "ulw %[" #O3 "], " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "]) \n\t"
  41. // O - output
  42. // IO - input/output
  43. // I - input (macro doesn't change it)
  44. #define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, \
  45. IO0, IO1, IO2, IO3, \
  46. I0, I1, I2, I3, I4, I5, I6, I7) \
  47. "mul %[" #O0 "], %[" #I0 "], %[kC2] \n\t" \
  48. "mul %[" #O1 "], %[" #I0 "], %[kC1] \n\t" \
  49. "mul %[" #O2 "], %[" #I1 "], %[kC2] \n\t" \
  50. "mul %[" #O3 "], %[" #I1 "], %[kC1] \n\t" \
  51. "mul %[" #O4 "], %[" #I2 "], %[kC2] \n\t" \
  52. "mul %[" #O5 "], %[" #I2 "], %[kC1] \n\t" \
  53. "mul %[" #O6 "], %[" #I3 "], %[kC2] \n\t" \
  54. "mul %[" #O7 "], %[" #I3 "], %[kC1] \n\t" \
  55. "sra %[" #O0 "], %[" #O0 "], 16 \n\t" \
  56. "sra %[" #O1 "], %[" #O1 "], 16 \n\t" \
  57. "sra %[" #O2 "], %[" #O2 "], 16 \n\t" \
  58. "sra %[" #O3 "], %[" #O3 "], 16 \n\t" \
  59. "sra %[" #O4 "], %[" #O4 "], 16 \n\t" \
  60. "sra %[" #O5 "], %[" #O5 "], 16 \n\t" \
  61. "sra %[" #O6 "], %[" #O6 "], 16 \n\t" \
  62. "sra %[" #O7 "], %[" #O7 "], 16 \n\t" \
  63. "addu %[" #IO0 "], %[" #IO0 "], %[" #I4 "] \n\t" \
  64. "addu %[" #IO1 "], %[" #IO1 "], %[" #I5 "] \n\t" \
  65. "subu %[" #IO2 "], %[" #IO2 "], %[" #I6 "] \n\t" \
  66. "subu %[" #IO3 "], %[" #IO3 "], %[" #I7 "] \n\t"
  67. // O - output
  68. // I - input (macro doesn't change it)
  69. #define INSERT_HALF_X2(O0, O1, \
  70. I0, I1) \
  71. "ins %[" #O0 "], %[" #I0 "], 16, 16 \n\t" \
  72. "ins %[" #O1 "], %[" #I1 "], 16, 16 \n\t"
  73. // O - output
  74. // I - input (macro doesn't change it)
  75. #define SRA_16(O0, O1, O2, O3, \
  76. I0, I1, I2, I3) \
  77. "sra %[" #O0 "], %[" #I0 "], 16 \n\t" \
  78. "sra %[" #O1 "], %[" #I1 "], 16 \n\t" \
  79. "sra %[" #O2 "], %[" #I2 "], 16 \n\t" \
  80. "sra %[" #O3 "], %[" #I3 "], 16 \n\t"
  81. // temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
  82. // temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
  83. // temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3]
  84. // temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3]
  85. // O - output
  86. // I - input (macro doesn't change it)
  87. #define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, \
  88. I0, I1, I2, I3, I4, I5, I6, I7) \
  89. "addq.ph %[" #O0 "], %[" #I0 "], %[" #I4 "] \n\t" \
  90. "subq.ph %[" #O1 "], %[" #I0 "], %[" #I4 "] \n\t" \
  91. "addq.ph %[" #O2 "], %[" #I1 "], %[" #I5 "] \n\t" \
  92. "subq.ph %[" #O3 "], %[" #I1 "], %[" #I5 "] \n\t" \
  93. "addq.ph %[" #O4 "], %[" #I2 "], %[" #I6 "] \n\t" \
  94. "subq.ph %[" #O5 "], %[" #I2 "], %[" #I6 "] \n\t" \
  95. "addq.ph %[" #O6 "], %[" #I3 "], %[" #I7 "] \n\t" \
  96. "subq.ph %[" #O7 "], %[" #I3 "], %[" #I7 "] \n\t" \
  97. "shra.ph %[" #O0 "], %[" #O0 "], 3 \n\t" \
  98. "shra.ph %[" #O1 "], %[" #O1 "], 3 \n\t" \
  99. "shra.ph %[" #O2 "], %[" #O2 "], 3 \n\t" \
  100. "shra.ph %[" #O3 "], %[" #O3 "], 3 \n\t" \
  101. "shra.ph %[" #O4 "], %[" #O4 "], 3 \n\t" \
  102. "shra.ph %[" #O5 "], %[" #O5 "], 3 \n\t" \
  103. "shra.ph %[" #O6 "], %[" #O6 "], 3 \n\t" \
  104. "shra.ph %[" #O7 "], %[" #O7 "], 3 \n\t"
  105. // precrq.ph.w temp0, temp8, temp2
  106. // temp0 = temp8[31..16] | temp2[31..16]
  107. // ins temp2, temp8, 16, 16
  108. // temp2 = temp8[31..16] | temp2[15..0]
  109. // O - output
  110. // IO - input/output
  111. // I - input (macro doesn't change it)
  112. #define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, \
  113. IO0, IO1, IO2, IO3, \
  114. I0, I1, I2, I3) \
  115. "precrq.ph.w %[" #O0 "], %[" #I0 "], %[" #IO0 "] \n\t" \
  116. "precrq.ph.w %[" #O1 "], %[" #I1 "], %[" #IO1 "] \n\t" \
  117. "ins %[" #IO0 "], %[" #I0 "], 16, 16 \n\t" \
  118. "ins %[" #IO1 "], %[" #I1 "], 16, 16 \n\t" \
  119. "precrq.ph.w %[" #O2 "], %[" #I2 "], %[" #IO2 "] \n\t" \
  120. "precrq.ph.w %[" #O3 "], %[" #I3 "], %[" #IO3 "] \n\t" \
  121. "ins %[" #IO2 "], %[" #I2 "], 16, 16 \n\t" \
  122. "ins %[" #IO3 "], %[" #I3 "], 16, 16 \n\t"
  123. // preceu.ph.qbr temp0, temp8
  124. // temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
  125. // preceu.ph.qbl temp1, temp8
  126. // temp1 = temp8[23..16] | temp8[7..0] | 0 | 0
  127. // O - output
  128. // I - input (macro doesn't change it)
  129. #define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, \
  130. I0, I1, I2, I3) \
  131. "preceu.ph.qbr %[" #O0 "], %[" #I0 "] \n\t" \
  132. "preceu.ph.qbl %[" #O1 "], %[" #I0 "] \n\t" \
  133. "preceu.ph.qbr %[" #O2 "], %[" #I1 "] \n\t" \
  134. "preceu.ph.qbl %[" #O3 "], %[" #I1 "] \n\t" \
  135. "preceu.ph.qbr %[" #O4 "], %[" #I2 "] \n\t" \
  136. "preceu.ph.qbl %[" #O5 "], %[" #I2 "] \n\t" \
  137. "preceu.ph.qbr %[" #O6 "], %[" #I3 "] \n\t" \
  138. "preceu.ph.qbl %[" #O7 "], %[" #I3 "] \n\t"
  139. // temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
  140. // temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
  141. // temp1..temp7 same as temp0
  142. // precrqu_s.qb.ph temp0, temp1, temp0:
  143. // temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
  144. // store temp0 to dst
  145. // IO - input/output
  146. // I - input (macro doesn't change it)
  147. #define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \
  148. I0, I1, I2, I3, I4, I5, I6, I7, \
  149. I8, I9, I10, I11, I12, I13) \
  150. "addq.ph %[" #IO0 "], %[" #IO0 "], %[" #I0 "] \n\t" \
  151. "addq.ph %[" #IO1 "], %[" #IO1 "], %[" #I1 "] \n\t" \
  152. "addq.ph %[" #IO2 "], %[" #IO2 "], %[" #I2 "] \n\t" \
  153. "addq.ph %[" #IO3 "], %[" #IO3 "], %[" #I3 "] \n\t" \
  154. "addq.ph %[" #IO4 "], %[" #IO4 "], %[" #I4 "] \n\t" \
  155. "addq.ph %[" #IO5 "], %[" #IO5 "], %[" #I5 "] \n\t" \
  156. "addq.ph %[" #IO6 "], %[" #IO6 "], %[" #I6 "] \n\t" \
  157. "addq.ph %[" #IO7 "], %[" #IO7 "], %[" #I7 "] \n\t" \
  158. "shll_s.ph %[" #IO0 "], %[" #IO0 "], 7 \n\t" \
  159. "shll_s.ph %[" #IO1 "], %[" #IO1 "], 7 \n\t" \
  160. "shll_s.ph %[" #IO2 "], %[" #IO2 "], 7 \n\t" \
  161. "shll_s.ph %[" #IO3 "], %[" #IO3 "], 7 \n\t" \
  162. "shll_s.ph %[" #IO4 "], %[" #IO4 "], 7 \n\t" \
  163. "shll_s.ph %[" #IO5 "], %[" #IO5 "], 7 \n\t" \
  164. "shll_s.ph %[" #IO6 "], %[" #IO6 "], 7 \n\t" \
  165. "shll_s.ph %[" #IO7 "], %[" #IO7 "], 7 \n\t" \
  166. "precrqu_s.qb.ph %[" #IO0 "], %[" #IO1 "], %[" #IO0 "] \n\t" \
  167. "precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \
  168. "precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \
  169. "precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \
  170. "usw %[" #IO0 "], " XSTR(I13) "*" #I9 "(%[" #I8 "]) \n\t" \
  171. "usw %[" #IO2 "], " XSTR(I13) "*" #I10 "(%[" #I8 "]) \n\t" \
  172. "usw %[" #IO4 "], " XSTR(I13) "*" #I11 "(%[" #I8 "]) \n\t" \
  173. "usw %[" #IO6 "], " XSTR(I13) "*" #I12 "(%[" #I8 "]) \n\t"
  174. #define OUTPUT_EARLY_CLOBBER_REGS_10() \
  175. : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \
  176. [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), \
  177. [temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), \
  178. [temp10]"=&r"(temp10)
  179. #define OUTPUT_EARLY_CLOBBER_REGS_18() \
  180. OUTPUT_EARLY_CLOBBER_REGS_10(), \
  181. [temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), \
  182. [temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), \
  183. [temp17]"=&r"(temp17), [temp18]"=&r"(temp18)
  184. #endif // WEBP_DSP_MIPS_MACRO_H_