lossless_mips_dsp_r2.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701
  1. // Copyright 2014 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // Image transforms and color space conversion methods for lossless decoder.
  11. //
  12. // Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
  13. // Jovan Zelincevic (jovan.zelincevic@imgtec.com)
  14. #include "./dsp.h"
  15. #if defined(WEBP_USE_MIPS_DSP_R2)
  16. #include "./lossless.h"
  17. #include "./lossless_common.h"
  18. #define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
  19. static void FUNC_NAME(const TYPE* src, \
  20. const uint32_t* const color_map, \
  21. TYPE* dst, int y_start, int y_end, \
  22. int width) { \
  23. int y; \
  24. for (y = y_start; y < y_end; ++y) { \
  25. int x; \
  26. for (x = 0; x < (width >> 2); ++x) { \
  27. int tmp1, tmp2, tmp3, tmp4; \
  28. __asm__ volatile ( \
  29. ".ifc " #TYPE ", uint8_t \n\t" \
  30. "lbu %[tmp1], 0(%[src]) \n\t" \
  31. "lbu %[tmp2], 1(%[src]) \n\t" \
  32. "lbu %[tmp3], 2(%[src]) \n\t" \
  33. "lbu %[tmp4], 3(%[src]) \n\t" \
  34. "addiu %[src], %[src], 4 \n\t" \
  35. ".endif \n\t" \
  36. ".ifc " #TYPE ", uint32_t \n\t" \
  37. "lw %[tmp1], 0(%[src]) \n\t" \
  38. "lw %[tmp2], 4(%[src]) \n\t" \
  39. "lw %[tmp3], 8(%[src]) \n\t" \
  40. "lw %[tmp4], 12(%[src]) \n\t" \
  41. "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
  42. "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
  43. "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
  44. "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
  45. "addiu %[src], %[src], 16 \n\t" \
  46. ".endif \n\t" \
  47. "sll %[tmp1], %[tmp1], 2 \n\t" \
  48. "sll %[tmp2], %[tmp2], 2 \n\t" \
  49. "sll %[tmp3], %[tmp3], 2 \n\t" \
  50. "sll %[tmp4], %[tmp4], 2 \n\t" \
  51. "lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \
  52. "lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
  53. "lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
  54. "lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
  55. ".ifc " #TYPE ", uint8_t \n\t" \
  56. "ext %[tmp1], %[tmp1], 8, 8 \n\t" \
  57. "ext %[tmp2], %[tmp2], 8, 8 \n\t" \
  58. "ext %[tmp3], %[tmp3], 8, 8 \n\t" \
  59. "ext %[tmp4], %[tmp4], 8, 8 \n\t" \
  60. "sb %[tmp1], 0(%[dst]) \n\t" \
  61. "sb %[tmp2], 1(%[dst]) \n\t" \
  62. "sb %[tmp3], 2(%[dst]) \n\t" \
  63. "sb %[tmp4], 3(%[dst]) \n\t" \
  64. "addiu %[dst], %[dst], 4 \n\t" \
  65. ".endif \n\t" \
  66. ".ifc " #TYPE ", uint32_t \n\t" \
  67. "sw %[tmp1], 0(%[dst]) \n\t" \
  68. "sw %[tmp2], 4(%[dst]) \n\t" \
  69. "sw %[tmp3], 8(%[dst]) \n\t" \
  70. "sw %[tmp4], 12(%[dst]) \n\t" \
  71. "addiu %[dst], %[dst], 16 \n\t" \
  72. ".endif \n\t" \
  73. : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \
  74. [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \
  75. : [color_map]"r"(color_map) \
  76. : "memory" \
  77. ); \
  78. } \
  79. for (x = 0; x < (width & 3); ++x) { \
  80. *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
  81. } \
  82. } \
  83. }
  84. MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
  85. MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
  86. #undef MAP_COLOR_FUNCS
  87. static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
  88. uint32_t c2) {
  89. int temp0, temp1, temp2, temp3, temp4, temp5;
  90. __asm__ volatile (
  91. "preceu.ph.qbr %[temp1], %[c0] \n\t"
  92. "preceu.ph.qbl %[temp2], %[c0] \n\t"
  93. "preceu.ph.qbr %[temp3], %[c1] \n\t"
  94. "preceu.ph.qbl %[temp4], %[c1] \n\t"
  95. "preceu.ph.qbr %[temp5], %[c2] \n\t"
  96. "preceu.ph.qbl %[temp0], %[c2] \n\t"
  97. "subq.ph %[temp3], %[temp3], %[temp5] \n\t"
  98. "subq.ph %[temp4], %[temp4], %[temp0] \n\t"
  99. "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
  100. "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
  101. "shll_s.ph %[temp1], %[temp1], 7 \n\t"
  102. "shll_s.ph %[temp2], %[temp2], 7 \n\t"
  103. "precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"
  104. : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  105. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
  106. : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
  107. : "memory"
  108. );
  109. return temp2;
  110. }
  111. static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
  112. uint32_t c2) {
  113. int temp0, temp1, temp2, temp3, temp4, temp5;
  114. __asm__ volatile (
  115. "adduh.qb %[temp5], %[c0], %[c1] \n\t"
  116. "preceu.ph.qbr %[temp3], %[c2] \n\t"
  117. "preceu.ph.qbr %[temp1], %[temp5] \n\t"
  118. "preceu.ph.qbl %[temp2], %[temp5] \n\t"
  119. "preceu.ph.qbl %[temp4], %[c2] \n\t"
  120. "subq.ph %[temp3], %[temp1], %[temp3] \n\t"
  121. "subq.ph %[temp4], %[temp2], %[temp4] \n\t"
  122. "shrl.ph %[temp5], %[temp3], 15 \n\t"
  123. "shrl.ph %[temp0], %[temp4], 15 \n\t"
  124. "addq.ph %[temp3], %[temp3], %[temp5] \n\t"
  125. "addq.ph %[temp4], %[temp0], %[temp4] \n\t"
  126. "shra.ph %[temp3], %[temp3], 1 \n\t"
  127. "shra.ph %[temp4], %[temp4], 1 \n\t"
  128. "addq.ph %[temp1], %[temp1], %[temp3] \n\t"
  129. "addq.ph %[temp2], %[temp2], %[temp4] \n\t"
  130. "shll_s.ph %[temp1], %[temp1], 7 \n\t"
  131. "shll_s.ph %[temp2], %[temp2], 7 \n\t"
  132. "precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"
  133. : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  134. [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
  135. : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
  136. : "memory"
  137. );
  138. return temp1;
  139. }
  140. static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
  141. int temp0, temp1, temp2, temp3, temp4, temp5;
  142. __asm__ volatile (
  143. "cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"
  144. "pick.qb %[temp1], %[b], %[c] \n\t"
  145. "pick.qb %[temp2], %[c], %[b] \n\t"
  146. "cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"
  147. "pick.qb %[temp4], %[a], %[c] \n\t"
  148. "pick.qb %[temp5], %[c], %[a] \n\t"
  149. "subu.qb %[temp3], %[temp1], %[temp2] \n\t"
  150. "subu.qb %[temp0], %[temp4], %[temp5] \n\t"
  151. "raddu.w.qb %[temp3], %[temp3] \n\t"
  152. "raddu.w.qb %[temp0], %[temp0] \n\t"
  153. "subu %[temp3], %[temp3], %[temp0] \n\t"
  154. "slti %[temp0], %[temp3], 0x1 \n\t"
  155. "movz %[a], %[b], %[temp0] \n\t"
  156. : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
  157. [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
  158. [a]"+&r"(a)
  159. : [b]"r"(b), [c]"r"(c)
  160. );
  161. return a;
  162. }
  163. static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
  164. __asm__ volatile (
  165. "adduh.qb %[a0], %[a0], %[a1] \n\t"
  166. : [a0]"+r"(a0)
  167. : [a1]"r"(a1)
  168. );
  169. return a0;
  170. }
  171. static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
  172. return Average2(Average2(a0, a2), a1);
  173. }
  174. static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
  175. uint32_t a2, uint32_t a3) {
  176. return Average2(Average2(a0, a1), Average2(a2, a3));
  177. }
  178. static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
  179. const uint32_t* const top) {
  180. return Average3(*left, top[0], top[1]);
  181. }
  182. static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
  183. const uint32_t* const top) {
  184. return Average2(*left, top[-1]);
  185. }
  186. static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
  187. const uint32_t* const top) {
  188. return Average2(*left, top[0]);
  189. }
  190. static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
  191. const uint32_t* const top) {
  192. (void)left;
  193. return Average2(top[-1], top[0]);
  194. }
  195. static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
  196. const uint32_t* const top) {
  197. (void)left;
  198. return Average2(top[0], top[1]);
  199. }
  200. static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
  201. const uint32_t* const top) {
  202. return Average4(*left, top[-1], top[0], top[1]);
  203. }
  204. static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
  205. const uint32_t* const top) {
  206. return Select(top[0], *left, top[-1]);
  207. }
  208. static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
  209. const uint32_t* const top) {
  210. return ClampedAddSubtractFull(*left, top[0], top[-1]);
  211. }
  212. static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
  213. const uint32_t* const top) {
  214. return ClampedAddSubtractHalf(*left, top[0], top[-1]);
  215. }
  216. // Add green to blue and red channels (i.e. perform the inverse transform of
  217. // 'subtract green').
  218. static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
  219. uint32_t* dst) {
  220. uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
  221. const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
  222. const uint32_t* const p_loop2_end = src + num_pixels;
  223. __asm__ volatile (
  224. ".set push \n\t"
  225. ".set noreorder \n\t"
  226. "beq %[src], %[p_loop1_end], 3f \n\t"
  227. " nop \n\t"
  228. "0: \n\t"
  229. "lw %[temp0], 0(%[src]) \n\t"
  230. "lw %[temp1], 4(%[src]) \n\t"
  231. "lw %[temp2], 8(%[src]) \n\t"
  232. "lw %[temp3], 12(%[src]) \n\t"
  233. "ext %[temp4], %[temp0], 8, 8 \n\t"
  234. "ext %[temp5], %[temp1], 8, 8 \n\t"
  235. "ext %[temp6], %[temp2], 8, 8 \n\t"
  236. "ext %[temp7], %[temp3], 8, 8 \n\t"
  237. "addiu %[src], %[src], 16 \n\t"
  238. "addiu %[dst], %[dst], 16 \n\t"
  239. "replv.ph %[temp4], %[temp4] \n\t"
  240. "replv.ph %[temp5], %[temp5] \n\t"
  241. "replv.ph %[temp6], %[temp6] \n\t"
  242. "replv.ph %[temp7], %[temp7] \n\t"
  243. "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
  244. "addu.qb %[temp1], %[temp1], %[temp5] \n\t"
  245. "addu.qb %[temp2], %[temp2], %[temp6] \n\t"
  246. "addu.qb %[temp3], %[temp3], %[temp7] \n\t"
  247. "sw %[temp0], -16(%[dst]) \n\t"
  248. "sw %[temp1], -12(%[dst]) \n\t"
  249. "sw %[temp2], -8(%[dst]) \n\t"
  250. "bne %[src], %[p_loop1_end], 0b \n\t"
  251. " sw %[temp3], -4(%[dst]) \n\t"
  252. "3: \n\t"
  253. "beq %[src], %[p_loop2_end], 2f \n\t"
  254. " nop \n\t"
  255. "1: \n\t"
  256. "lw %[temp0], 0(%[src]) \n\t"
  257. "addiu %[src], %[src], 4 \n\t"
  258. "addiu %[dst], %[dst], 4 \n\t"
  259. "ext %[temp4], %[temp0], 8, 8 \n\t"
  260. "replv.ph %[temp4], %[temp4] \n\t"
  261. "addu.qb %[temp0], %[temp0], %[temp4] \n\t"
  262. "bne %[src], %[p_loop2_end], 1b \n\t"
  263. " sw %[temp0], -4(%[dst]) \n\t"
  264. "2: \n\t"
  265. ".set pop \n\t"
  266. : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
  267. [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
  268. [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
  269. [temp7]"=&r"(temp7)
  270. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  271. : "memory"
  272. );
  273. }
  274. static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
  275. const uint32_t* src, int num_pixels,
  276. uint32_t* dst) {
  277. int temp0, temp1, temp2, temp3, temp4, temp5;
  278. uint32_t argb, argb1, new_red;
  279. const uint32_t G_to_R = m->green_to_red_;
  280. const uint32_t G_to_B = m->green_to_blue_;
  281. const uint32_t R_to_B = m->red_to_blue_;
  282. const uint32_t* const p_loop_end = src + (num_pixels & ~1);
  283. __asm__ volatile (
  284. ".set push \n\t"
  285. ".set noreorder \n\t"
  286. "beq %[src], %[p_loop_end], 1f \n\t"
  287. " nop \n\t"
  288. "replv.ph %[temp0], %[G_to_R] \n\t"
  289. "replv.ph %[temp1], %[G_to_B] \n\t"
  290. "replv.ph %[temp2], %[R_to_B] \n\t"
  291. "shll.ph %[temp0], %[temp0], 8 \n\t"
  292. "shll.ph %[temp1], %[temp1], 8 \n\t"
  293. "shll.ph %[temp2], %[temp2], 8 \n\t"
  294. "shra.ph %[temp0], %[temp0], 8 \n\t"
  295. "shra.ph %[temp1], %[temp1], 8 \n\t"
  296. "shra.ph %[temp2], %[temp2], 8 \n\t"
  297. "0: \n\t"
  298. "lw %[argb], 0(%[src]) \n\t"
  299. "lw %[argb1], 4(%[src]) \n\t"
  300. "sw %[argb], 0(%[dst]) \n\t"
  301. "sw %[argb1], 4(%[dst]) \n\t"
  302. "addiu %[src], %[src], 8 \n\t"
  303. "addiu %[dst], %[dst], 8 \n\t"
  304. "precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
  305. "preceu.ph.qbra %[temp3], %[temp3] \n\t"
  306. "shll.ph %[temp3], %[temp3], 8 \n\t"
  307. "shra.ph %[temp3], %[temp3], 8 \n\t"
  308. "mul.ph %[temp5], %[temp3], %[temp0] \n\t"
  309. "mul.ph %[temp3], %[temp3], %[temp1] \n\t"
  310. "precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
  311. "ins %[argb1], %[argb], 16, 16 \n\t"
  312. "shra.ph %[temp5], %[temp5], 5 \n\t"
  313. "shra.ph %[temp3], %[temp3], 5 \n\t"
  314. "addu.ph %[new_red], %[new_red], %[temp5] \n\t"
  315. "addu.ph %[argb1], %[argb1], %[temp3] \n\t"
  316. "preceu.ph.qbra %[temp5], %[new_red] \n\t"
  317. "shll.ph %[temp4], %[temp5], 8 \n\t"
  318. "shra.ph %[temp4], %[temp4], 8 \n\t"
  319. "mul.ph %[temp4], %[temp4], %[temp2] \n\t"
  320. "sb %[temp5], -2(%[dst]) \n\t"
  321. "sra %[temp5], %[temp5], 16 \n\t"
  322. "shra.ph %[temp4], %[temp4], 5 \n\t"
  323. "addu.ph %[argb1], %[argb1], %[temp4] \n\t"
  324. "preceu.ph.qbra %[temp3], %[argb1] \n\t"
  325. "sb %[temp5], -6(%[dst]) \n\t"
  326. "sb %[temp3], -4(%[dst]) \n\t"
  327. "sra %[temp3], %[temp3], 16 \n\t"
  328. "bne %[src], %[p_loop_end], 0b \n\t"
  329. " sb %[temp3], -8(%[dst]) \n\t"
  330. "1: \n\t"
  331. ".set pop \n\t"
  332. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  333. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
  334. [new_red]"=&r"(new_red), [argb]"=&r"(argb),
  335. [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
  336. : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
  337. [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
  338. : "memory", "hi", "lo"
  339. );
  340. // Fall-back to C-version for left-overs.
  341. if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
  342. }
  343. static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
  344. int num_pixels, uint8_t* dst) {
  345. int temp0, temp1, temp2, temp3;
  346. const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
  347. const uint32_t* const p_loop2_end = src + num_pixels;
  348. __asm__ volatile (
  349. ".set push \n\t"
  350. ".set noreorder \n\t"
  351. "beq %[src], %[p_loop1_end], 3f \n\t"
  352. " nop \n\t"
  353. "0: \n\t"
  354. "lw %[temp3], 12(%[src]) \n\t"
  355. "lw %[temp2], 8(%[src]) \n\t"
  356. "lw %[temp1], 4(%[src]) \n\t"
  357. "lw %[temp0], 0(%[src]) \n\t"
  358. "ins %[temp3], %[temp2], 24, 8 \n\t"
  359. "sll %[temp2], %[temp2], 8 \n\t"
  360. "rotr %[temp3], %[temp3], 16 \n\t"
  361. "ins %[temp2], %[temp1], 0, 16 \n\t"
  362. "sll %[temp1], %[temp1], 8 \n\t"
  363. "wsbh %[temp3], %[temp3] \n\t"
  364. "balign %[temp0], %[temp1], 1 \n\t"
  365. "wsbh %[temp2], %[temp2] \n\t"
  366. "wsbh %[temp0], %[temp0] \n\t"
  367. "usw %[temp3], 8(%[dst]) \n\t"
  368. "rotr %[temp0], %[temp0], 16 \n\t"
  369. "usw %[temp2], 4(%[dst]) \n\t"
  370. "addiu %[src], %[src], 16 \n\t"
  371. "usw %[temp0], 0(%[dst]) \n\t"
  372. "bne %[src], %[p_loop1_end], 0b \n\t"
  373. " addiu %[dst], %[dst], 12 \n\t"
  374. "3: \n\t"
  375. "beq %[src], %[p_loop2_end], 2f \n\t"
  376. " nop \n\t"
  377. "1: \n\t"
  378. "lw %[temp0], 0(%[src]) \n\t"
  379. "addiu %[src], %[src], 4 \n\t"
  380. "wsbh %[temp1], %[temp0] \n\t"
  381. "addiu %[dst], %[dst], 3 \n\t"
  382. "ush %[temp1], -2(%[dst]) \n\t"
  383. "sra %[temp0], %[temp0], 16 \n\t"
  384. "bne %[src], %[p_loop2_end], 1b \n\t"
  385. " sb %[temp0], -3(%[dst]) \n\t"
  386. "2: \n\t"
  387. ".set pop \n\t"
  388. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  389. [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
  390. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  391. : "memory"
  392. );
  393. }
  394. static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
  395. int num_pixels, uint8_t* dst) {
  396. int temp0, temp1, temp2, temp3;
  397. const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
  398. const uint32_t* const p_loop2_end = src + num_pixels;
  399. __asm__ volatile (
  400. ".set push \n\t"
  401. ".set noreorder \n\t"
  402. "beq %[src], %[p_loop1_end], 3f \n\t"
  403. " nop \n\t"
  404. "0: \n\t"
  405. "lw %[temp0], 0(%[src]) \n\t"
  406. "lw %[temp1], 4(%[src]) \n\t"
  407. "lw %[temp2], 8(%[src]) \n\t"
  408. "lw %[temp3], 12(%[src]) \n\t"
  409. "wsbh %[temp0], %[temp0] \n\t"
  410. "wsbh %[temp1], %[temp1] \n\t"
  411. "wsbh %[temp2], %[temp2] \n\t"
  412. "wsbh %[temp3], %[temp3] \n\t"
  413. "addiu %[src], %[src], 16 \n\t"
  414. "balign %[temp0], %[temp0], 1 \n\t"
  415. "balign %[temp1], %[temp1], 1 \n\t"
  416. "balign %[temp2], %[temp2], 1 \n\t"
  417. "balign %[temp3], %[temp3], 1 \n\t"
  418. "usw %[temp0], 0(%[dst]) \n\t"
  419. "usw %[temp1], 4(%[dst]) \n\t"
  420. "usw %[temp2], 8(%[dst]) \n\t"
  421. "usw %[temp3], 12(%[dst]) \n\t"
  422. "bne %[src], %[p_loop1_end], 0b \n\t"
  423. " addiu %[dst], %[dst], 16 \n\t"
  424. "3: \n\t"
  425. "beq %[src], %[p_loop2_end], 2f \n\t"
  426. " nop \n\t"
  427. "1: \n\t"
  428. "lw %[temp0], 0(%[src]) \n\t"
  429. "wsbh %[temp0], %[temp0] \n\t"
  430. "addiu %[src], %[src], 4 \n\t"
  431. "balign %[temp0], %[temp0], 1 \n\t"
  432. "usw %[temp0], 0(%[dst]) \n\t"
  433. "bne %[src], %[p_loop2_end], 1b \n\t"
  434. " addiu %[dst], %[dst], 4 \n\t"
  435. "2: \n\t"
  436. ".set pop \n\t"
  437. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  438. [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
  439. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  440. : "memory"
  441. );
  442. }
  443. static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
  444. int num_pixels, uint8_t* dst) {
  445. int temp0, temp1, temp2, temp3, temp4, temp5;
  446. const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
  447. const uint32_t* const p_loop2_end = src + num_pixels;
  448. __asm__ volatile (
  449. ".set push \n\t"
  450. ".set noreorder \n\t"
  451. "beq %[src], %[p_loop1_end], 3f \n\t"
  452. " nop \n\t"
  453. "0: \n\t"
  454. "lw %[temp0], 0(%[src]) \n\t"
  455. "lw %[temp1], 4(%[src]) \n\t"
  456. "lw %[temp2], 8(%[src]) \n\t"
  457. "lw %[temp3], 12(%[src]) \n\t"
  458. "ext %[temp4], %[temp0], 28, 4 \n\t"
  459. "ext %[temp5], %[temp0], 12, 4 \n\t"
  460. "ins %[temp0], %[temp4], 0, 4 \n\t"
  461. "ext %[temp4], %[temp1], 28, 4 \n\t"
  462. "ins %[temp0], %[temp5], 16, 4 \n\t"
  463. "ext %[temp5], %[temp1], 12, 4 \n\t"
  464. "ins %[temp1], %[temp4], 0, 4 \n\t"
  465. "ext %[temp4], %[temp2], 28, 4 \n\t"
  466. "ins %[temp1], %[temp5], 16, 4 \n\t"
  467. "ext %[temp5], %[temp2], 12, 4 \n\t"
  468. "ins %[temp2], %[temp4], 0, 4 \n\t"
  469. "ext %[temp4], %[temp3], 28, 4 \n\t"
  470. "ins %[temp2], %[temp5], 16, 4 \n\t"
  471. "ext %[temp5], %[temp3], 12, 4 \n\t"
  472. "ins %[temp3], %[temp4], 0, 4 \n\t"
  473. "precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"
  474. "ins %[temp3], %[temp5], 16, 4 \n\t"
  475. "addiu %[src], %[src], 16 \n\t"
  476. "precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
  477. #if (WEBP_SWAP_16BIT_CSP == 1)
  478. "usw %[temp1], 0(%[dst]) \n\t"
  479. "usw %[temp3], 4(%[dst]) \n\t"
  480. #else
  481. "wsbh %[temp1], %[temp1] \n\t"
  482. "wsbh %[temp3], %[temp3] \n\t"
  483. "usw %[temp1], 0(%[dst]) \n\t"
  484. "usw %[temp3], 4(%[dst]) \n\t"
  485. #endif
  486. "bne %[src], %[p_loop1_end], 0b \n\t"
  487. " addiu %[dst], %[dst], 8 \n\t"
  488. "3: \n\t"
  489. "beq %[src], %[p_loop2_end], 2f \n\t"
  490. " nop \n\t"
  491. "1: \n\t"
  492. "lw %[temp0], 0(%[src]) \n\t"
  493. "ext %[temp4], %[temp0], 28, 4 \n\t"
  494. "ext %[temp5], %[temp0], 12, 4 \n\t"
  495. "ins %[temp0], %[temp4], 0, 4 \n\t"
  496. "ins %[temp0], %[temp5], 16, 4 \n\t"
  497. "addiu %[src], %[src], 4 \n\t"
  498. "precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"
  499. #if (WEBP_SWAP_16BIT_CSP == 1)
  500. "ush %[temp0], 0(%[dst]) \n\t"
  501. #else
  502. "wsbh %[temp0], %[temp0] \n\t"
  503. "ush %[temp0], 0(%[dst]) \n\t"
  504. #endif
  505. "bne %[src], %[p_loop2_end], 1b \n\t"
  506. " addiu %[dst], %[dst], 2 \n\t"
  507. "2: \n\t"
  508. ".set pop \n\t"
  509. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  510. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
  511. [dst]"+&r"(dst), [src]"+&r"(src)
  512. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  513. : "memory"
  514. );
  515. }
  516. static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
  517. int num_pixels, uint8_t* dst) {
  518. int temp0, temp1, temp2, temp3, temp4, temp5;
  519. const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
  520. const uint32_t* const p_loop2_end = src + num_pixels;
  521. __asm__ volatile (
  522. ".set push \n\t"
  523. ".set noreorder \n\t"
  524. "beq %[src], %[p_loop1_end], 3f \n\t"
  525. " nop \n\t"
  526. "0: \n\t"
  527. "lw %[temp0], 0(%[src]) \n\t"
  528. "lw %[temp1], 4(%[src]) \n\t"
  529. "lw %[temp2], 8(%[src]) \n\t"
  530. "lw %[temp3], 12(%[src]) \n\t"
  531. "ext %[temp4], %[temp0], 8, 16 \n\t"
  532. "ext %[temp5], %[temp0], 5, 11 \n\t"
  533. "ext %[temp0], %[temp0], 3, 5 \n\t"
  534. "ins %[temp4], %[temp5], 0, 11 \n\t"
  535. "ext %[temp5], %[temp1], 5, 11 \n\t"
  536. "ins %[temp4], %[temp0], 0, 5 \n\t"
  537. "ext %[temp0], %[temp1], 8, 16 \n\t"
  538. "ext %[temp1], %[temp1], 3, 5 \n\t"
  539. "ins %[temp0], %[temp5], 0, 11 \n\t"
  540. "ext %[temp5], %[temp2], 5, 11 \n\t"
  541. "ins %[temp0], %[temp1], 0, 5 \n\t"
  542. "ext %[temp1], %[temp2], 8, 16 \n\t"
  543. "ext %[temp2], %[temp2], 3, 5 \n\t"
  544. "ins %[temp1], %[temp5], 0, 11 \n\t"
  545. "ext %[temp5], %[temp3], 5, 11 \n\t"
  546. "ins %[temp1], %[temp2], 0, 5 \n\t"
  547. "ext %[temp2], %[temp3], 8, 16 \n\t"
  548. "ext %[temp3], %[temp3], 3, 5 \n\t"
  549. "ins %[temp2], %[temp5], 0, 11 \n\t"
  550. "append %[temp0], %[temp4], 16 \n\t"
  551. "ins %[temp2], %[temp3], 0, 5 \n\t"
  552. "addiu %[src], %[src], 16 \n\t"
  553. "append %[temp2], %[temp1], 16 \n\t"
  554. #if (WEBP_SWAP_16BIT_CSP == 1)
  555. "usw %[temp0], 0(%[dst]) \n\t"
  556. "usw %[temp2], 4(%[dst]) \n\t"
  557. #else
  558. "wsbh %[temp0], %[temp0] \n\t"
  559. "wsbh %[temp2], %[temp2] \n\t"
  560. "usw %[temp0], 0(%[dst]) \n\t"
  561. "usw %[temp2], 4(%[dst]) \n\t"
  562. #endif
  563. "bne %[src], %[p_loop1_end], 0b \n\t"
  564. " addiu %[dst], %[dst], 8 \n\t"
  565. "3: \n\t"
  566. "beq %[src], %[p_loop2_end], 2f \n\t"
  567. " nop \n\t"
  568. "1: \n\t"
  569. "lw %[temp0], 0(%[src]) \n\t"
  570. "ext %[temp4], %[temp0], 8, 16 \n\t"
  571. "ext %[temp5], %[temp0], 5, 11 \n\t"
  572. "ext %[temp0], %[temp0], 3, 5 \n\t"
  573. "ins %[temp4], %[temp5], 0, 11 \n\t"
  574. "addiu %[src], %[src], 4 \n\t"
  575. "ins %[temp4], %[temp0], 0, 5 \n\t"
  576. #if (WEBP_SWAP_16BIT_CSP == 1)
  577. "ush %[temp4], 0(%[dst]) \n\t"
  578. #else
  579. "wsbh %[temp4], %[temp4] \n\t"
  580. "ush %[temp4], 0(%[dst]) \n\t"
  581. #endif
  582. "bne %[src], %[p_loop2_end], 1b \n\t"
  583. " addiu %[dst], %[dst], 2 \n\t"
  584. "2: \n\t"
  585. ".set pop \n\t"
  586. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  587. [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
  588. [dst]"+&r"(dst), [src]"+&r"(src)
  589. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  590. : "memory"
  591. );
  592. }
  593. static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
  594. int num_pixels, uint8_t* dst) {
  595. int temp0, temp1, temp2, temp3;
  596. const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
  597. const uint32_t* const p_loop2_end = src + num_pixels;
  598. __asm__ volatile (
  599. ".set push \n\t"
  600. ".set noreorder \n\t"
  601. "beq %[src], %[p_loop1_end], 3f \n\t"
  602. " nop \n\t"
  603. "0: \n\t"
  604. "lw %[temp0], 0(%[src]) \n\t"
  605. "lw %[temp1], 4(%[src]) \n\t"
  606. "lw %[temp2], 8(%[src]) \n\t"
  607. "lw %[temp3], 12(%[src]) \n\t"
  608. "ins %[temp0], %[temp1], 24, 8 \n\t"
  609. "sra %[temp1], %[temp1], 8 \n\t"
  610. "ins %[temp1], %[temp2], 16, 16 \n\t"
  611. "sll %[temp2], %[temp2], 8 \n\t"
  612. "balign %[temp3], %[temp2], 1 \n\t"
  613. "addiu %[src], %[src], 16 \n\t"
  614. "usw %[temp0], 0(%[dst]) \n\t"
  615. "usw %[temp1], 4(%[dst]) \n\t"
  616. "usw %[temp3], 8(%[dst]) \n\t"
  617. "bne %[src], %[p_loop1_end], 0b \n\t"
  618. " addiu %[dst], %[dst], 12 \n\t"
  619. "3: \n\t"
  620. "beq %[src], %[p_loop2_end], 2f \n\t"
  621. " nop \n\t"
  622. "1: \n\t"
  623. "lw %[temp0], 0(%[src]) \n\t"
  624. "addiu %[src], %[src], 4 \n\t"
  625. "addiu %[dst], %[dst], 3 \n\t"
  626. "ush %[temp0], -3(%[dst]) \n\t"
  627. "sra %[temp0], %[temp0], 16 \n\t"
  628. "bne %[src], %[p_loop2_end], 1b \n\t"
  629. " sb %[temp0], -1(%[dst]) \n\t"
  630. "2: \n\t"
  631. ".set pop \n\t"
  632. : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
  633. [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
  634. : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
  635. : "memory"
  636. );
  637. }
  638. //------------------------------------------------------------------------------
  639. // Entry point
  640. extern void VP8LDspInitMIPSdspR2(void);
  641. WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
  642. VP8LMapColor32b = MapARGB_MIPSdspR2;
  643. VP8LMapColor8b = MapAlpha_MIPSdspR2;
  644. VP8LPredictors[5] = Predictor5_MIPSdspR2;
  645. VP8LPredictors[6] = Predictor6_MIPSdspR2;
  646. VP8LPredictors[7] = Predictor7_MIPSdspR2;
  647. VP8LPredictors[8] = Predictor8_MIPSdspR2;
  648. VP8LPredictors[9] = Predictor9_MIPSdspR2;
  649. VP8LPredictors[10] = Predictor10_MIPSdspR2;
  650. VP8LPredictors[11] = Predictor11_MIPSdspR2;
  651. VP8LPredictors[12] = Predictor12_MIPSdspR2;
  652. VP8LPredictors[13] = Predictor13_MIPSdspR2;
  653. VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
  654. VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
  655. VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
  656. VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
  657. VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
  658. VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
  659. VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
  660. }
  661. #else // !WEBP_USE_MIPS_DSP_R2
  662. WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
  663. #endif // WEBP_USE_MIPS_DSP_R2