intreadwrite.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /*
  2. * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #ifndef AVUTIL_AVR32_INTREADWRITE_H
  21. #define AVUTIL_AVR32_INTREADWRITE_H
  22. #include <stdint.h>
  23. #include "config.h"
  24. #include "libavutil/bswap.h"
  25. /*
  26. * AVR32 does not support unaligned memory accesses, except for the AP
  27. * series which supports unaligned 32-bit loads and stores. 16-bit
  28. * and 64-bit accesses must be aligned to 16 and 32 bits, respectively.
  29. * This means we cannot use the byte-swapping load/store instructions
  30. * here.
  31. *
  32. * For 16-bit, 24-bit, and (on UC series) 32-bit loads, we instead use
  33. * the LDINS.B instruction, which gcc fails to utilise with the
  34. * generic code. GCC also fails to use plain LD.W and ST.W even for
  35. * AP processors, so we override the generic code. The 64-bit
  36. * versions are improved by using our optimised 32-bit functions.
  37. */
  38. #define AV_RL16 AV_RL16
  39. static av_always_inline uint16_t AV_RL16(const void *p)
  40. {
  41. uint16_t v;
  42. __asm__ ("ld.ub %0, %1 \n\t"
  43. "ldins.b %0:l, %2 \n\t"
  44. : "=&r"(v)
  45. : "m"(*(const uint8_t*)p), "RKs12"(*((const uint8_t*)p+1)));
  46. return v;
  47. }
  48. #define AV_RB16 AV_RB16
  49. static av_always_inline uint16_t AV_RB16(const void *p)
  50. {
  51. uint16_t v;
  52. __asm__ ("ld.ub %0, %2 \n\t"
  53. "ldins.b %0:l, %1 \n\t"
  54. : "=&r"(v)
  55. : "RKs12"(*(const uint8_t*)p), "m"(*((const uint8_t*)p+1)));
  56. return v;
  57. }
  58. #define AV_RB24 AV_RB24
  59. static av_always_inline uint32_t AV_RB24(const void *p)
  60. {
  61. uint32_t v;
  62. __asm__ ("ld.ub %0, %3 \n\t"
  63. "ldins.b %0:l, %2 \n\t"
  64. "ldins.b %0:u, %1 \n\t"
  65. : "=&r"(v)
  66. : "RKs12"(* (const uint8_t*)p),
  67. "RKs12"(*((const uint8_t*)p+1)),
  68. "m" (*((const uint8_t*)p+2)));
  69. return v;
  70. }
  71. #define AV_RL24 AV_RL24
  72. static av_always_inline uint32_t AV_RL24(const void *p)
  73. {
  74. uint32_t v;
  75. __asm__ ("ld.ub %0, %1 \n\t"
  76. "ldins.b %0:l, %2 \n\t"
  77. "ldins.b %0:u, %3 \n\t"
  78. : "=&r"(v)
  79. : "m" (* (const uint8_t*)p),
  80. "RKs12"(*((const uint8_t*)p+1)),
  81. "RKs12"(*((const uint8_t*)p+2)));
  82. return v;
  83. }
  84. #if ARCH_AVR32_AP
  85. #define AV_RB32 AV_RB32
  86. static av_always_inline uint32_t AV_RB32(const void *p)
  87. {
  88. uint32_t v;
  89. __asm__ ("ld.w %0, %1" : "=r"(v) : "m"(*(const uint32_t*)p));
  90. return v;
  91. }
  92. #define AV_WB32 AV_WB32
  93. static av_always_inline void AV_WB32(void *p, uint32_t v)
  94. {
  95. __asm__ ("st.w %0, %1" : "=m"(*(uint32_t*)p) : "r"(v));
  96. }
  97. /* These two would be defined by generic code, but we need them sooner. */
  98. #define AV_RL32(p) av_bswap32(AV_RB32(p))
  99. #define AV_WL32(p, v) AV_WB32(p, av_bswap32(v))
  100. #define AV_WB64 AV_WB64
  101. static av_always_inline void AV_WB64(void *p, uint64_t v)
  102. {
  103. union { uint64_t v; uint32_t hl[2]; } vv = { v };
  104. AV_WB32(p, vv.hl[0]);
  105. AV_WB32((uint32_t*)p+1, vv.hl[1]);
  106. }
  107. #define AV_WL64 AV_WL64
  108. static av_always_inline void AV_WL64(void *p, uint64_t v)
  109. {
  110. union { uint64_t v; uint32_t hl[2]; } vv = { v };
  111. AV_WL32(p, vv.hl[1]);
  112. AV_WL32((uint32_t*)p+1, vv.hl[0]);
  113. }
  114. #else /* ARCH_AVR32_AP */
  115. #define AV_RB32 AV_RB32
  116. static av_always_inline uint32_t AV_RB32(const void *p)
  117. {
  118. uint32_t v;
  119. __asm__ ("ld.ub %0, %4 \n\t"
  120. "ldins.b %0:l, %3 \n\t"
  121. "ldins.b %0:u, %2 \n\t"
  122. "ldins.b %0:t, %1 \n\t"
  123. : "=&r"(v)
  124. : "RKs12"(* (const uint8_t*)p),
  125. "RKs12"(*((const uint8_t*)p+1)),
  126. "RKs12"(*((const uint8_t*)p+2)),
  127. "m" (*((const uint8_t*)p+3)));
  128. return v;
  129. }
  130. #define AV_RL32 AV_RL32
  131. static av_always_inline uint32_t AV_RL32(const void *p)
  132. {
  133. uint32_t v;
  134. __asm__ ("ld.ub %0, %1 \n\t"
  135. "ldins.b %0:l, %2 \n\t"
  136. "ldins.b %0:u, %3 \n\t"
  137. "ldins.b %0:t, %4 \n\t"
  138. : "=&r"(v)
  139. : "m" (* (const uint8_t*)p),
  140. "RKs12"(*((const uint8_t*)p+1)),
  141. "RKs12"(*((const uint8_t*)p+2)),
  142. "RKs12"(*((const uint8_t*)p+3)));
  143. return v;
  144. }
  145. #endif /* ARCH_AVR32_AP */
  146. #define AV_RB64 AV_RB64
  147. static av_always_inline uint64_t AV_RB64(const void *p)
  148. {
  149. union { uint64_t v; uint32_t hl[2]; } v;
  150. v.hl[0] = AV_RB32(p);
  151. v.hl[1] = AV_RB32((const uint32_t*)p+1);
  152. return v.v;
  153. }
  154. #define AV_RL64 AV_RL64
  155. static av_always_inline uint64_t AV_RL64(const void *p)
  156. {
  157. union { uint64_t v; uint32_t hl[2]; } v;
  158. v.hl[1] = AV_RL32(p);
  159. v.hl[0] = AV_RL32((const uint32_t*)p+1);
  160. return v.v;
  161. }
  162. #endif /* AVUTIL_AVR32_INTREADWRITE_H */