dsputil_mmi.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. /*
  2. * MMI optimized DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. *
  5. * MMI optimization by Leon van Stuivenberg
  6. * clear_blocks_mmi() by BroadQ
  7. *
  8. * This file is part of FFmpeg.
  9. *
  10. * FFmpeg is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * FFmpeg is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with FFmpeg; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. */
  24. #include "libavcodec/dsputil.h"
  25. #include "mmi.h"
  26. void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
  27. void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
  28. void ff_mmi_idct(DCTELEM *block);
  29. static void clear_blocks_mmi(DCTELEM * blocks)
  30. {
  31. __asm__ volatile(
  32. ".set noreorder \n"
  33. "addiu $9, %0, 768 \n"
  34. "nop \n"
  35. "1: \n"
  36. "sq $0, 0(%0) \n"
  37. "move $8, %0 \n"
  38. "addi %0, %0, 64 \n"
  39. "sq $0, 16($8) \n"
  40. "slt $10, %0, $9 \n"
  41. "sq $0, 32($8) \n"
  42. "bnez $10, 1b \n"
  43. "sq $0, 48($8) \n"
  44. ".set reorder \n"
  45. : "+r" (blocks) :: "$8", "$9", "memory" );
  46. }
  47. static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size)
  48. {
  49. __asm__ volatile(
  50. ".set push \n\t"
  51. ".set mips3 \n\t"
  52. "ld $8, 0(%0) \n\t"
  53. "add %0, %0, %2 \n\t"
  54. "ld $9, 0(%0) \n\t"
  55. "add %0, %0, %2 \n\t"
  56. "ld $10, 0(%0) \n\t"
  57. "pextlb $8, $0, $8 \n\t"
  58. "sq $8, 0(%1) \n\t"
  59. "add %0, %0, %2 \n\t"
  60. "ld $8, 0(%0) \n\t"
  61. "pextlb $9, $0, $9 \n\t"
  62. "sq $9, 16(%1) \n\t"
  63. "add %0, %0, %2 \n\t"
  64. "ld $9, 0(%0) \n\t"
  65. "pextlb $10, $0, $10 \n\t"
  66. "sq $10, 32(%1) \n\t"
  67. "add %0, %0, %2 \n\t"
  68. "ld $10, 0(%0) \n\t"
  69. "pextlb $8, $0, $8 \n\t"
  70. "sq $8, 48(%1) \n\t"
  71. "add %0, %0, %2 \n\t"
  72. "ld $8, 0(%0) \n\t"
  73. "pextlb $9, $0, $9 \n\t"
  74. "sq $9, 64(%1) \n\t"
  75. "add %0, %0, %2 \n\t"
  76. "ld $9, 0(%0) \n\t"
  77. "pextlb $10, $0, $10 \n\t"
  78. "sq $10, 80(%1) \n\t"
  79. "pextlb $8, $0, $8 \n\t"
  80. "sq $8, 96(%1) \n\t"
  81. "pextlb $9, $0, $9 \n\t"
  82. "sq $9, 112(%1) \n\t"
  83. ".set pop \n\t"
  84. : "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" );
  85. }
  86. static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  87. {
  88. __asm__ volatile(
  89. ".set push \n\t"
  90. ".set mips3 \n\t"
  91. "1: \n\t"
  92. "ldr $8, 0(%1) \n\t"
  93. "addiu %2, %2, -1 \n\t"
  94. "ldl $8, 7(%1) \n\t"
  95. "add %1, %1, %3 \n\t"
  96. "sd $8, 0(%0) \n\t"
  97. "add %0, %0, %3 \n\t"
  98. "bgtz %2, 1b \n\t"
  99. ".set pop \n\t"
  100. : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
  101. : "$8", "memory" );
  102. }
  103. static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  104. {
  105. __asm__ volatile (
  106. ".set push \n\t"
  107. ".set mips3 \n\t"
  108. "1: \n\t"
  109. "ldr $8, 0(%1) \n\t"
  110. "add $11, %1, %3 \n\t"
  111. "ldl $8, 7(%1) \n\t"
  112. "add $10, %0, %3 \n\t"
  113. "ldr $9, 8(%1) \n\t"
  114. "ldl $9, 15(%1) \n\t"
  115. "ldr $12, 0($11) \n\t"
  116. "add %1, $11, %3 \n\t"
  117. "ldl $12, 7($11) \n\t"
  118. "pcpyld $8, $9, $8 \n\t"
  119. "sq $8, 0(%0) \n\t"
  120. "ldr $13, 8($11) \n\t"
  121. "addiu %2, %2, -2 \n\t"
  122. "ldl $13, 15($11) \n\t"
  123. "add %0, $10, %3 \n\t"
  124. "pcpyld $12, $13, $12 \n\t"
  125. "sq $12, 0($10) \n\t"
  126. "bgtz %2, 1b \n\t"
  127. ".set pop \n\t"
  128. : "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size)
  129. : "$8", "$9", "$10", "$11", "$12", "$13", "memory" );
  130. }
  131. void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
  132. {
  133. const int idct_algo= avctx->idct_algo;
  134. c->clear_blocks = clear_blocks_mmi;
  135. c->put_pixels_tab[1][0] = put_pixels8_mmi;
  136. c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi;
  137. c->put_pixels_tab[0][0] = put_pixels16_mmi;
  138. c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
  139. c->get_pixels = get_pixels_mmi;
  140. if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
  141. c->idct_put= ff_mmi_idct_put;
  142. c->idct_add= ff_mmi_idct_add;
  143. c->idct = ff_mmi_idct;
  144. c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
  145. }
  146. }