lossless_enc_msa.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. // Copyright 2016 Google Inc. All Rights Reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style license
  4. // that can be found in the COPYING file in the root of the source
  5. // tree. An additional intellectual property rights grant can be found
  6. // in the file PATENTS. All contributing project authors may
  7. // be found in the AUTHORS file in the root of the source tree.
  8. // -----------------------------------------------------------------------------
  9. //
  10. // MSA variant of Image transform methods for lossless encoder.
  11. //
  12. // Authors: Prashant Patil (Prashant.Patil@imgtec.com)
  13. #include "./dsp.h"
  14. #if defined(WEBP_USE_MSA)
  15. #include "./lossless.h"
  16. #include "./msa_macro.h"
  17. #define TRANSFORM_COLOR_8(src0, src1, dst0, dst1, c0, c1, mask0, mask1) do { \
  18. v8i16 g0, g1, t0, t1, t2, t3; \
  19. v4i32 t4, t5; \
  20. VSHF_B2_SH(src0, src0, src1, src1, mask0, mask0, g0, g1); \
  21. DOTP_SB2_SH(g0, g1, c0, c0, t0, t1); \
  22. SRAI_H2_SH(t0, t1, 5); \
  23. t0 = __msa_subv_h((v8i16)src0, t0); \
  24. t1 = __msa_subv_h((v8i16)src1, t1); \
  25. t4 = __msa_srli_w((v4i32)src0, 16); \
  26. t5 = __msa_srli_w((v4i32)src1, 16); \
  27. DOTP_SB2_SH(t4, t5, c1, c1, t2, t3); \
  28. SRAI_H2_SH(t2, t3, 5); \
  29. SUB2(t0, t2, t1, t3, t0, t1); \
  30. VSHF_B2_UB(src0, t0, src1, t1, mask1, mask1, dst0, dst1); \
  31. } while (0)
  32. #define TRANSFORM_COLOR_4(src, dst, c0, c1, mask0, mask1) do { \
  33. const v16i8 g0 = VSHF_SB(src, src, mask0); \
  34. v8i16 t0 = __msa_dotp_s_h(c0, g0); \
  35. v8i16 t1; \
  36. v4i32 t2; \
  37. t0 = SRAI_H(t0, 5); \
  38. t0 = __msa_subv_h((v8i16)src, t0); \
  39. t2 = __msa_srli_w((v4i32)src, 16); \
  40. t1 = __msa_dotp_s_h(c1, (v16i8)t2); \
  41. t1 = SRAI_H(t1, 5); \
  42. t0 = t0 - t1; \
  43. dst = VSHF_UB(src, t0, mask1); \
  44. } while (0)
  45. static void TransformColor_MSA(const VP8LMultipliers* const m, uint32_t* data,
  46. int num_pixels) {
  47. v16u8 src0, dst0;
  48. const v16i8 g2br = (v16i8)__msa_fill_w(m->green_to_blue_ |
  49. (m->green_to_red_ << 16));
  50. const v16i8 r2b = (v16i8)__msa_fill_w(m->red_to_blue_);
  51. const v16u8 mask0 = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
  52. 13, 255, 13, 255 };
  53. const v16u8 mask1 = { 16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11,
  54. 28, 13, 30, 15 };
  55. while (num_pixels >= 8) {
  56. v16u8 src1, dst1;
  57. LD_UB2(data, 4, src0, src1);
  58. TRANSFORM_COLOR_8(src0, src1, dst0, dst1, g2br, r2b, mask0, mask1);
  59. ST_UB2(dst0, dst1, data, 4);
  60. data += 8;
  61. num_pixels -= 8;
  62. }
  63. if (num_pixels > 0) {
  64. if (num_pixels >= 4) {
  65. src0 = LD_UB(data);
  66. TRANSFORM_COLOR_4(src0, dst0, g2br, r2b, mask0, mask1);
  67. ST_UB(dst0, data);
  68. data += 4;
  69. num_pixels -= 4;
  70. }
  71. if (num_pixels > 0) {
  72. src0 = LD_UB(data);
  73. TRANSFORM_COLOR_4(src0, dst0, g2br, r2b, mask0, mask1);
  74. if (num_pixels == 3) {
  75. const uint64_t pix_d = __msa_copy_s_d((v2i64)dst0, 0);
  76. const uint32_t pix_w = __msa_copy_s_w((v4i32)dst0, 2);
  77. SD(pix_d, data + 0);
  78. SW(pix_w, data + 2);
  79. } else if (num_pixels == 2) {
  80. const uint64_t pix_d = __msa_copy_s_d((v2i64)dst0, 0);
  81. SD(pix_d, data);
  82. } else {
  83. const uint32_t pix_w = __msa_copy_s_w((v4i32)dst0, 0);
  84. SW(pix_w, data);
  85. }
  86. }
  87. }
  88. }
  89. static void SubtractGreenFromBlueAndRed_MSA(uint32_t* argb_data,
  90. int num_pixels) {
  91. int i;
  92. uint8_t* ptemp_data = (uint8_t*)argb_data;
  93. v16u8 src0, dst0, tmp0;
  94. const v16u8 mask = { 1, 255, 1, 255, 5, 255, 5, 255, 9, 255, 9, 255,
  95. 13, 255, 13, 255 };
  96. while (num_pixels >= 8) {
  97. v16u8 src1, dst1, tmp1;
  98. LD_UB2(ptemp_data, 16, src0, src1);
  99. VSHF_B2_UB(src0, src1, src1, src0, mask, mask, tmp0, tmp1);
  100. SUB2(src0, tmp0, src1, tmp1, dst0, dst1);
  101. ST_UB2(dst0, dst1, ptemp_data, 16);
  102. ptemp_data += 8 * 4;
  103. num_pixels -= 8;
  104. }
  105. if (num_pixels > 0) {
  106. if (num_pixels >= 4) {
  107. src0 = LD_UB(ptemp_data);
  108. tmp0 = VSHF_UB(src0, src0, mask);
  109. dst0 = src0 - tmp0;
  110. ST_UB(dst0, ptemp_data);
  111. ptemp_data += 4 * 4;
  112. num_pixels -= 4;
  113. }
  114. for (i = 0; i < num_pixels; i++) {
  115. const uint8_t b = ptemp_data[0];
  116. const uint8_t g = ptemp_data[1];
  117. const uint8_t r = ptemp_data[2];
  118. ptemp_data[0] = (b - g) & 0xff;
  119. ptemp_data[2] = (r - g) & 0xff;
  120. ptemp_data += 4;
  121. }
  122. }
  123. }
  124. //------------------------------------------------------------------------------
  125. // Entry point
  126. extern void VP8LEncDspInitMSA(void);
  127. WEBP_TSAN_IGNORE_FUNCTION void VP8LEncDspInitMSA(void) {
  128. VP8LSubtractGreenFromBlueAndRed = SubtractGreenFromBlueAndRed_MSA;
  129. VP8LTransformColor = TransformColor_MSA;
  130. }
  131. #else // !WEBP_USE_MSA
  132. WEBP_DSP_INIT_STUB(VP8LEncDspInitMSA)
  133. #endif // WEBP_USE_MSA