dsptest.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /*
  2. * MMX optimized DSP utils
  3. * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. */
  19. #define TESTCPU_MAIN
  20. #include "avcodec.h"
  21. #include "dsputil.h"
  22. #include "mpegvideo.h"
  23. #include "mpeg12data.h"
  24. #include "mpeg4data.h"
  25. #include "../libavcodec/i386/cputest.c"
  26. #include "../libavcodec/i386/dsputil_mmx.c"
  27. #include "../libavcodec/i386/fdct_mmx.c"
  28. #include "../libavcodec/i386/idct_mmx.c"
  29. #include "../libavcodec/i386/motion_est_mmx.c"
  30. #include "../libavcodec/i386/simple_idct_mmx.c"
  31. #include "../libavcodec/dsputil.c"
  32. #include "../libavcodec/simple_idct.c"
  33. #include "../libavcodec/jfdctfst.c"
  34. #undef TESTCPU_MAIN
  35. #define PAD 0x10000
  36. /*
  37. * for testing speed of various routine - should be probably extended
  38. * for a general purpose regression test later
  39. *
  40. * currently only for i386 - FIXME
  41. */
  42. #define PIX_FUNC_C(a) \
  43. { #a "_c", a ## _c, 0 }, \
  44. { #a "_mmx", a ## _mmx, MM_MMX }, \
  45. { #a "_mmx2", a ## _mmx2, MM_MMXEXT | PAD }
  46. #define PIX_FUNC(a) \
  47. { #a "_mmx", a ## _mmx, MM_MMX }, \
  48. { #a "_3dnow", a ## _3dnow, MM_3DNOW }, \
  49. { #a "_mmx2", a ## _mmx2, MM_MMXEXT | PAD }
  50. #define PIX_FUNC_MMX(a) \
  51. { #a "_mmx", a ## _mmx, MM_MMX | PAD }
  52. /*
  53. PIX_FUNC_C(pix_abs16x16),
  54. PIX_FUNC_C(pix_abs16x16_x2),
  55. PIX_FUNC_C(pix_abs16x16_y2),
  56. PIX_FUNC_C(pix_abs16x16_xy2),
  57. PIX_FUNC_C(pix_abs8x8),
  58. PIX_FUNC_C(pix_abs8x8_x2),
  59. PIX_FUNC_C(pix_abs8x8_y2),
  60. PIX_FUNC_C(pix_abs8x8_xy2),
  61. */
  62. static const struct pix_func {
  63. char* name;
  64. op_pixels_func func;
  65. int mm_flags;
  66. } pix_func[] = {
  67. PIX_FUNC_MMX(put_pixels),
  68. //PIX_FUNC_MMX(get_pixels),
  69. //PIX_FUNC_MMX(put_pixels_clamped),
  70. #if 1
  71. PIX_FUNC(put_pixels_x2),
  72. PIX_FUNC(put_pixels_y2),
  73. PIX_FUNC_MMX(put_pixels_xy2),
  74. PIX_FUNC(put_no_rnd_pixels_x2),
  75. PIX_FUNC(put_no_rnd_pixels_y2),
  76. PIX_FUNC_MMX(put_no_rnd_pixels_xy2),
  77. PIX_FUNC(avg_pixels),
  78. PIX_FUNC(avg_pixels_x2),
  79. PIX_FUNC(avg_pixels_y2),
  80. PIX_FUNC(avg_pixels_xy2),
  81. PIX_FUNC_MMX(avg_no_rnd_pixels),
  82. PIX_FUNC_MMX(avg_no_rnd_pixels_x2),
  83. PIX_FUNC_MMX(avg_no_rnd_pixels_y2),
  84. PIX_FUNC_MMX(avg_no_rnd_pixels_xy2),
  85. #endif
  86. { 0, 0 }
  87. };
  88. static inline long long rdtsc()
  89. {
  90. long long l;
  91. asm volatile( "rdtsc\n\t"
  92. : "=A" (l)
  93. );
  94. return l;
  95. }
  96. static test_speed(int step)
  97. {
  98. const struct pix_func* pix = pix_func;
  99. const int linesize = 720;
  100. char empty[32768];
  101. char* bu =(char*)(((long)empty + 32) & ~0xf);
  102. int sum = 0;
  103. while (pix->name)
  104. {
  105. int i;
  106. uint64_t te, ts;
  107. op_pixels_func func = pix->func;
  108. char* im = bu;
  109. if (pix->mm_flags & mm_flags)
  110. {
  111. printf("%30s... ", pix->name);
  112. fflush(stdout);
  113. ts = rdtsc();
  114. for(i=0; i<100000; i++){
  115. func(im, im + 1000, linesize, 16);
  116. im += step;
  117. if (im > bu + 20000)
  118. im = bu;
  119. }
  120. te = rdtsc();
  121. emms();
  122. printf("% 9d\n", (int)(te - ts));
  123. sum += (te - ts) / 100000;
  124. if (pix->mm_flags & PAD)
  125. puts("");
  126. }
  127. pix++;
  128. }
  129. printf("Total sum: %d\n", sum);
  130. }
  131. int main(int argc, char* argv[])
  132. {
  133. int step = 16;
  134. if (argc > 1)
  135. {
  136. // something simple for now
  137. if (argc > 2 && (strcmp("-s", argv[1]) == 0
  138. || strcmp("-step", argv[1]) == 0))
  139. step = atoi(argv[2]);
  140. }
  141. mm_flags = mm_support();
  142. printf("%s: detected CPU flags:", argv[0]);
  143. if (mm_flags & MM_MMX)
  144. printf(" mmx");
  145. if (mm_flags & MM_MMXEXT)
  146. printf(" mmxext");
  147. if (mm_flags & MM_3DNOW)
  148. printf(" 3dnow");
  149. if (mm_flags & MM_SSE)
  150. printf(" sse");
  151. if (mm_flags & MM_SSE2)
  152. printf(" sse2");
  153. printf("\n");
  154. printf("Using step: %d\n", step);
  155. test_speed(step);
  156. }