alpha_processing_mips_dsp_r2.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Utilities for processing transparent channel.
  11. //
  12. // Author(s): Branimir Vasic (branimir.vasic@imgtec.com)
  13. // Djordje Pesut (djordje.pesut@imgtec.com)
  14. #include "./dsp.h"
  15. #if defined(WEBP_USE_MIPS_DSP_R2)
  16. static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
  17. int width, int height,
  18. uint8_t* dst, int dst_stride) {
  19. uint32_t alpha_mask = 0xffffffff;
  20. int i, j, temp0;
  21. for (j = 0; j < height; ++j) {
  22. uint8_t* pdst = dst;
  23. const uint8_t* palpha = alpha;
  24. for (i = 0; i < (width >> 2); ++i) {
  25. int temp1, temp2, temp3;
  26. __asm__ volatile (
  27. "ulw %[temp0], 0(%[palpha]) \n\t"
  28. "addiu %[palpha], %[palpha], 4 \n\t"
  29. "addiu %[pdst], %[pdst], 16 \n\t"
  30. "srl %[temp1], %[temp0], 8 \n\t"
  31. "srl %[temp2], %[temp0], 16 \n\t"
  32. "srl %[temp3], %[temp0], 24 \n\t"
  33. "and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
  34. "sb %[temp0], -16(%[pdst]) \n\t"
  35. "sb %[temp1], -12(%[pdst]) \n\t"
  36. "sb %[temp2], -8(%[pdst]) \n\t"
  37. "sb %[temp3], -4(%[pdst]) \n\t"
  38. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  39. [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
  40. [alpha_mask]"+r"(alpha_mask)
  41. :
  42. : "memory"
  43. );
  44. }
  45. for (i = 0; i < (width & 3); ++i) {
  46. __asm__ volatile (
  47. "lbu %[temp0], 0(%[palpha]) \n\t"
  48. "addiu %[palpha], %[palpha], 1 \n\t"
  49. "sb %[temp0], 0(%[pdst]) \n\t"
  50. "and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
  51. "addiu %[pdst], %[pdst], 4 \n\t"
  52. : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
  53. [alpha_mask]"+r"(alpha_mask)
  54. :
  55. : "memory"
  56. );
  57. }
  58. alpha += alpha_stride;
  59. dst += dst_stride;
  60. }
  61. __asm__ volatile (
  62. "ext %[temp0], %[alpha_mask], 0, 16 \n\t"
  63. "srl %[alpha_mask], %[alpha_mask], 16 \n\t"
  64. "and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
  65. "ext %[temp0], %[alpha_mask], 0, 8 \n\t"
  66. "srl %[alpha_mask], %[alpha_mask], 8 \n\t"
  67. "and %[alpha_mask], %[alpha_mask], %[temp0] \n\t"
  68. : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
  69. :
  70. );
  71. return (alpha_mask != 0xff);
  72. }
  73. static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
  74. int inverse) {
  75. int x;
  76. const uint32_t c_00ffffff = 0x00ffffffu;
  77. const uint32_t c_ff000000 = 0xff000000u;
  78. const uint32_t c_8000000 = 0x00800000u;
  79. const uint32_t c_8000080 = 0x00800080u;
  80. for (x = 0; x < width; ++x) {
  81. const uint32_t argb = ptr[x];
  82. if (argb < 0xff000000u) { // alpha < 255
  83. if (argb <= 0x00ffffffu) { // alpha == 0
  84. ptr[x] = 0;
  85. } else {
  86. int temp0, temp1, temp2, temp3, alpha;
  87. __asm__ volatile (
  88. "srl %[alpha], %[argb], 24 \n\t"
  89. "replv.qb %[temp0], %[alpha] \n\t"
  90. "and %[temp0], %[temp0], %[c_00ffffff] \n\t"
  91. "beqz %[inverse], 0f \n\t"
  92. "divu $zero, %[c_ff000000], %[alpha] \n\t"
  93. "mflo %[temp0] \n\t"
  94. "0: \n\t"
  95. "andi %[temp1], %[argb], 0xff \n\t"
  96. "ext %[temp2], %[argb], 8, 8 \n\t"
  97. "ext %[temp3], %[argb], 16, 8 \n\t"
  98. "mul %[temp1], %[temp1], %[temp0] \n\t"
  99. "mul %[temp2], %[temp2], %[temp0] \n\t"
  100. "mul %[temp3], %[temp3], %[temp0] \n\t"
  101. "precrq.ph.w %[temp1], %[temp2], %[temp1] \n\t"
  102. "addu %[temp3], %[temp3], %[c_8000000] \n\t"
  103. "addu %[temp1], %[temp1], %[c_8000080] \n\t"
  104. "precrq.ph.w %[temp3], %[argb], %[temp3] \n\t"
  105. "precrq.qb.ph %[temp1], %[temp3], %[temp1] \n\t"
  106. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  107. [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
  108. : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
  109. [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
  110. [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
  111. : "memory", "hi", "lo"
  112. );
  113. ptr[x] = temp1;
  114. }
  115. }
  116. }
  117. }
  118. #ifdef WORDS_BIGENDIAN
  119. static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
  120. const uint8_t* g, const uint8_t* b, int len,
  121. uint32_t* out) {
  122. int temp0, temp1, temp2, temp3, offset;
  123. const int rest = len & 1;
  124. const uint32_t* const loop_end = out + len - rest;
  125. const int step = 4;
  126. __asm__ volatile (
  127. "xor %[offset], %[offset], %[offset] \n\t"
  128. "beq %[loop_end], %[out], 0f \n\t"
  129. "2: \n\t"
  130. "lbux %[temp0], %[offset](%[a]) \n\t"
  131. "lbux %[temp1], %[offset](%[r]) \n\t"
  132. "lbux %[temp2], %[offset](%[g]) \n\t"
  133. "lbux %[temp3], %[offset](%[b]) \n\t"
  134. "ins %[temp1], %[temp0], 16, 16 \n\t"
  135. "ins %[temp3], %[temp2], 16, 16 \n\t"
  136. "addiu %[out], %[out], 4 \n\t"
  137. "precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
  138. "sw %[temp0], -4(%[out]) \n\t"
  139. "addu %[offset], %[offset], %[step] \n\t"
  140. "bne %[loop_end], %[out], 2b \n\t"
  141. "0: \n\t"
  142. "beq %[rest], $zero, 1f \n\t"
  143. "lbux %[temp0], %[offset](%[a]) \n\t"
  144. "lbux %[temp1], %[offset](%[r]) \n\t"
  145. "lbux %[temp2], %[offset](%[g]) \n\t"
  146. "lbux %[temp3], %[offset](%[b]) \n\t"
  147. "ins %[temp1], %[temp0], 16, 16 \n\t"
  148. "ins %[temp3], %[temp2], 16, 16 \n\t"
  149. "precr.qb.ph %[temp0], %[temp1], %[temp3] \n\t"
  150. "sw %[temp0], 0(%[out]) \n\t"
  151. "1: \n\t"
  152. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  153. [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
  154. : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
  155. [loop_end]"r"(loop_end), [rest]"r"(rest)
  156. : "memory"
  157. );
  158. }
  159. #endif // WORDS_BIGENDIAN
  160. static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
  161. const uint8_t* b, int len, int step,
  162. uint32_t* out) {
  163. int temp0, temp1, temp2, offset;
  164. const int rest = len & 1;
  165. const int a = 0xff;
  166. const uint32_t* const loop_end = out + len - rest;
  167. __asm__ volatile (
  168. "xor %[offset], %[offset], %[offset] \n\t"
  169. "beq %[loop_end], %[out], 0f \n\t"
  170. "2: \n\t"
  171. "lbux %[temp0], %[offset](%[r]) \n\t"
  172. "lbux %[temp1], %[offset](%[g]) \n\t"
  173. "lbux %[temp2], %[offset](%[b]) \n\t"
  174. "ins %[temp0], %[a], 16, 16 \n\t"
  175. "ins %[temp2], %[temp1], 16, 16 \n\t"
  176. "addiu %[out], %[out], 4 \n\t"
  177. "precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
  178. "sw %[temp0], -4(%[out]) \n\t"
  179. "addu %[offset], %[offset], %[step] \n\t"
  180. "bne %[loop_end], %[out], 2b \n\t"
  181. "0: \n\t"
  182. "beq %[rest], $zero, 1f \n\t"
  183. "lbux %[temp0], %[offset](%[r]) \n\t"
  184. "lbux %[temp1], %[offset](%[g]) \n\t"
  185. "lbux %[temp2], %[offset](%[b]) \n\t"
  186. "ins %[temp0], %[a], 16, 16 \n\t"
  187. "ins %[temp2], %[temp1], 16, 16 \n\t"
  188. "precr.qb.ph %[temp0], %[temp0], %[temp2] \n\t"
  189. "sw %[temp0], 0(%[out]) \n\t"
  190. "1: \n\t"
  191. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  192. [offset]"=&r"(offset), [out]"+&r"(out)
  193. : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
  194. [loop_end]"r"(loop_end), [rest]"r"(rest)
  195. : "memory"
  196. );
  197. }
  198. //------------------------------------------------------------------------------
  199. // Entry point
  200. extern void WebPInitAlphaProcessingMIPSdspR2(void);
  201. WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
  202. WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
  203. WebPMultARGBRow = MultARGBRow_MIPSdspR2;
  204. #ifdef WORDS_BIGENDIAN
  205. WebPPackARGB = PackARGB_MIPSdspR2;
  206. #endif
  207. WebPPackRGB = PackRGB_MIPSdspR2;
  208. }
  209. #else // !WEBP_USE_MIPS_DSP_R2
  210. WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
  211. #endif // WEBP_USE_MIPS_DSP_R2