fpel_mmx.c 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. /*
  2. * MMX-optimized avg/put pixel routines
  3. *
  4. * Copyright (c) 2000, 2001 Fabrice Bellard
  5. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  6. *
  7. * This file is part of FFmpeg.
  8. *
  9. * FFmpeg is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU Lesser General Public
  11. * License as published by the Free Software Foundation; either
  12. * version 2.1 of the License, or (at your option) any later version.
  13. *
  14. * FFmpeg is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. * Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public
  20. * License along with FFmpeg; if not, write to the Free Software
  21. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22. */
  23. #include <stddef.h>
  24. #include <stdint.h>
  25. #include "config.h"
  26. #include "dsputil_x86.h"
  27. #if HAVE_MMX_INLINE
  28. // in case more speed is needed - unrolling would certainly help
  29. void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  30. ptrdiff_t line_size, int h)
  31. {
  32. MOVQ_BFE(mm6);
  33. JUMPALIGN();
  34. do {
  35. __asm__ volatile(
  36. "movq %0, %%mm0 \n\t"
  37. "movq %1, %%mm1 \n\t"
  38. PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  39. "movq %%mm2, %0 \n\t"
  40. :"+m"(*block)
  41. :"m"(*pixels)
  42. :"memory");
  43. pixels += line_size;
  44. block += line_size;
  45. }
  46. while (--h);
  47. }
  48. void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  49. ptrdiff_t line_size, int h)
  50. {
  51. MOVQ_BFE(mm6);
  52. JUMPALIGN();
  53. do {
  54. __asm__ volatile(
  55. "movq %0, %%mm0 \n\t"
  56. "movq %1, %%mm1 \n\t"
  57. PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  58. "movq %%mm2, %0 \n\t"
  59. "movq 8%0, %%mm0 \n\t"
  60. "movq 8%1, %%mm1 \n\t"
  61. PAVGB_MMX(%%mm0, %%mm1, %%mm2, %%mm6)
  62. "movq %%mm2, 8%0 \n\t"
  63. :"+m"(*block)
  64. :"m"(*pixels)
  65. :"memory");
  66. pixels += line_size;
  67. block += line_size;
  68. }
  69. while (--h);
  70. }
  71. void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
  72. ptrdiff_t line_size, int h)
  73. {
  74. __asm__ volatile (
  75. "lea (%3, %3), %%"REG_a" \n\t"
  76. ".p2align 3 \n\t"
  77. "1: \n\t"
  78. "movq (%1 ), %%mm0 \n\t"
  79. "movq (%1, %3), %%mm1 \n\t"
  80. "movq %%mm0, (%2) \n\t"
  81. "movq %%mm1, (%2, %3) \n\t"
  82. "add %%"REG_a", %1 \n\t"
  83. "add %%"REG_a", %2 \n\t"
  84. "movq (%1 ), %%mm0 \n\t"
  85. "movq (%1, %3), %%mm1 \n\t"
  86. "movq %%mm0, (%2) \n\t"
  87. "movq %%mm1, (%2, %3) \n\t"
  88. "add %%"REG_a", %1 \n\t"
  89. "add %%"REG_a", %2 \n\t"
  90. "subl $4, %0 \n\t"
  91. "jnz 1b \n\t"
  92. : "+g"(h), "+r"(pixels), "+r"(block)
  93. : "r"((x86_reg)line_size)
  94. : "%"REG_a, "memory"
  95. );
  96. }
  97. void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
  98. ptrdiff_t line_size, int h)
  99. {
  100. __asm__ volatile (
  101. "lea (%3, %3), %%"REG_a" \n\t"
  102. ".p2align 3 \n\t"
  103. "1: \n\t"
  104. "movq (%1 ), %%mm0 \n\t"
  105. "movq 8(%1 ), %%mm4 \n\t"
  106. "movq (%1, %3), %%mm1 \n\t"
  107. "movq 8(%1, %3), %%mm5 \n\t"
  108. "movq %%mm0, (%2) \n\t"
  109. "movq %%mm4, 8(%2) \n\t"
  110. "movq %%mm1, (%2, %3) \n\t"
  111. "movq %%mm5, 8(%2, %3) \n\t"
  112. "add %%"REG_a", %1 \n\t"
  113. "add %%"REG_a", %2 \n\t"
  114. "movq (%1 ), %%mm0 \n\t"
  115. "movq 8(%1 ), %%mm4 \n\t"
  116. "movq (%1, %3), %%mm1 \n\t"
  117. "movq 8(%1, %3), %%mm5 \n\t"
  118. "movq %%mm0, (%2) \n\t"
  119. "movq %%mm4, 8(%2) \n\t"
  120. "movq %%mm1, (%2, %3) \n\t"
  121. "movq %%mm5, 8(%2, %3) \n\t"
  122. "add %%"REG_a", %1 \n\t"
  123. "add %%"REG_a", %2 \n\t"
  124. "subl $4, %0 \n\t"
  125. "jnz 1b \n\t"
  126. : "+g"(h), "+r"(pixels), "+r"(block)
  127. : "r"((x86_reg)line_size)
  128. : "%"REG_a, "memory"
  129. );
  130. }
  131. #endif /* HAVE_MMX_INLINE */