dwt.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. /*
  2. * MMX optimized discrete wavelet transform
  3. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  4. * Copyright (c) 2010 David Conrad
  5. *
  6. * This file is part of FFmpeg.
  7. *
  8. * FFmpeg is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * FFmpeg is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with FFmpeg; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. #include "libavutil/x86_cpu.h"
  23. #include "dsputil_mmx.h"
  24. #include "dwt.h"
  25. #define COMPOSE_VERTICAL(ext, align) \
  26. void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
  27. void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
  28. void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
  29. void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
  30. void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
  31. \
  32. static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
  33. { \
  34. int i, width_align = width&~(align-1); \
  35. \
  36. for(i=width_align; i<width; i++) \
  37. b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
  38. \
  39. ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
  40. } \
  41. \
  42. static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
  43. { \
  44. int i, width_align = width&~(align-1); \
  45. \
  46. for(i=width_align; i<width; i++) \
  47. b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
  48. \
  49. ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
  50. } \
  51. \
  52. static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
  53. IDWTELEM *b3, IDWTELEM *b4, int width) \
  54. { \
  55. int i, width_align = width&~(align-1); \
  56. \
  57. for(i=width_align; i<width; i++) \
  58. b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
  59. \
  60. ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
  61. } \
  62. \
  63. static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
  64. IDWTELEM *b3, IDWTELEM *b4, int width) \
  65. { \
  66. int i, width_align = width&~(align-1); \
  67. \
  68. for(i=width_align; i<width; i++) \
  69. b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
  70. \
  71. ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
  72. } \
  73. static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
  74. { \
  75. int i, width_align = width&~(align-1); \
  76. \
  77. for(i=width_align; i<width; i++) { \
  78. b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
  79. b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
  80. } \
  81. \
  82. ff_vertical_compose_haar##ext(b0, b1, width_align); \
  83. } \
  84. \
  85. #if HAVE_YASM
  86. #if !ARCH_X86_64
  87. COMPOSE_VERTICAL(_mmx, 4)
  88. #endif
  89. COMPOSE_VERTICAL(_sse2, 8)
  90. #endif
  91. void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
  92. void ff_horizontal_compose_haar0i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w);
  93. void ff_horizontal_compose_haar1i_mmx(IDWTELEM *b, IDWTELEM *tmp, int w);
  94. void ff_horizontal_compose_haar0i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w);
  95. void ff_horizontal_compose_haar1i_sse2(IDWTELEM *b, IDWTELEM *tmp, int w);
  96. void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
  97. {
  98. for (; x < w2; x++) {
  99. b[2*x ] = (tmp[x] + 1)>>1;
  100. b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
  101. }
  102. }
  103. void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
  104. {
  105. for (; x < w2; x++) {
  106. b[2*x ] = tmp[x];
  107. b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);
  108. }
  109. }
  110. void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x)
  111. {
  112. for (; x < w2; x++) {
  113. b[2*x ] = (tmp[x] + 1)>>1;
  114. b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;
  115. }
  116. }
  117. void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
  118. {
  119. #if HAVE_YASM
  120. int mm_flags = av_get_cpu_flags();;
  121. #if !ARCH_X86_64
  122. if (!(mm_flags & AV_CPU_FLAG_MMX))
  123. return;
  124. switch (type) {
  125. case DWT_DIRAC_DD9_7:
  126. d->vertical_compose_l0 = vertical_compose53iL0_mmx;
  127. d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
  128. break;
  129. case DWT_DIRAC_LEGALL5_3:
  130. d->vertical_compose_l0 = vertical_compose53iL0_mmx;
  131. d->vertical_compose_h0 = vertical_compose_dirac53iH0_mmx;
  132. break;
  133. case DWT_DIRAC_DD13_7:
  134. d->vertical_compose_l0 = vertical_compose_dd137iL0_mmx;
  135. d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
  136. break;
  137. case DWT_DIRAC_HAAR0:
  138. d->vertical_compose = vertical_compose_haar_mmx;
  139. d->horizontal_compose = ff_horizontal_compose_haar0i_mmx;
  140. break;
  141. case DWT_DIRAC_HAAR1:
  142. d->vertical_compose = vertical_compose_haar_mmx;
  143. d->horizontal_compose = ff_horizontal_compose_haar1i_mmx;
  144. break;
  145. }
  146. #endif
  147. if (!(mm_flags & AV_CPU_FLAG_SSE2))
  148. return;
  149. switch (type) {
  150. case DWT_DIRAC_DD9_7:
  151. d->vertical_compose_l0 = vertical_compose53iL0_sse2;
  152. d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
  153. break;
  154. case DWT_DIRAC_LEGALL5_3:
  155. d->vertical_compose_l0 = vertical_compose53iL0_sse2;
  156. d->vertical_compose_h0 = vertical_compose_dirac53iH0_sse2;
  157. break;
  158. case DWT_DIRAC_DD13_7:
  159. d->vertical_compose_l0 = vertical_compose_dd137iL0_sse2;
  160. d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
  161. break;
  162. case DWT_DIRAC_HAAR0:
  163. d->vertical_compose = vertical_compose_haar_sse2;
  164. //MMXDISABLED d->horizontal_compose = ff_horizontal_compose_haar0i_sse2;
  165. break;
  166. case DWT_DIRAC_HAAR1:
  167. d->vertical_compose = vertical_compose_haar_sse2;
  168. d->horizontal_compose = ff_horizontal_compose_haar1i_sse2;
  169. break;
  170. }
  171. if (!(mm_flags & AV_CPU_FLAG_SSSE3))
  172. return;
  173. switch (type) {
  174. case DWT_DIRAC_DD9_7:
  175. //MMXDISABLED d->horizontal_compose = ff_horizontal_compose_dd97i_ssse3;
  176. break;
  177. }
  178. #endif // HAVE_YASM
  179. }