sme-libc-routines.c 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #include <stdlib.h>
  2. // WARNING: When building the scalar versions of these functions you need to
  3. // use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
  4. // from recognising a loop idiom and planting calls to memcpy!
  5. static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
  6. size_t n) __arm_streaming_compatible {
  7. unsigned char *destp = (unsigned char *)dest;
  8. const unsigned char *srcp = (const unsigned char *)src;
  9. for (size_t i = 0; i < n; ++i)
  10. destp[i] = srcp[i];
  11. return dest;
  12. }
  13. // If dest and src overlap then behaviour is undefined, hence we can add the
  14. // restrict keywords here. This also matches the definition of the libc memcpy
  15. // according to the man page.
  16. void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
  17. size_t n) __arm_streaming_compatible {
  18. return __arm_sc_memcpy_fwd(dest, src, n);
  19. }
  20. void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
  21. unsigned char *destp = (unsigned char *)dest;
  22. unsigned char c8 = (unsigned char)c;
  23. for (size_t i = 0; i < n; ++i)
  24. destp[i] = c8;
  25. return dest;
  26. }
  27. static void *__arm_sc_memcpy_rev(void *dest, const void *src,
  28. size_t n) __arm_streaming_compatible {
  29. unsigned char *destp = (unsigned char *)dest;
  30. const unsigned char *srcp = (const unsigned char *)src;
  31. // TODO: Improve performance by copying larger chunks in reverse, or by
  32. // using SVE.
  33. while (n > 0) {
  34. --n;
  35. destp[n] = srcp[n];
  36. }
  37. return dest;
  38. }
  39. // Semantically a memmove is equivalent to the following:
  40. // 1. Copy the entire contents of src to a temporary array that does not
  41. // overlap with src or dest.
  42. // 2. Copy the contents of the temporary array into dest.
  43. void *__arm_sc_memmove(void *dest, const void *src,
  44. size_t n) __arm_streaming_compatible {
  45. unsigned char *destp = (unsigned char *)dest;
  46. const unsigned char *srcp = (const unsigned char *)src;
  47. // If src and dest don't overlap then just invoke memcpy
  48. if ((srcp > (destp + n)) || (destp > (srcp + n)))
  49. return __arm_sc_memcpy_fwd(dest, src, n);
  50. // Overlap case 1:
  51. // src: Low | -> | High
  52. // dest: Low | -> | High
  53. // Here src is always ahead of dest at a higher addres. If we first read a
  54. // chunk of data from src we can safely write the same chunk to dest without
  55. // corrupting future reads of src.
  56. if (srcp > destp)
  57. return __arm_sc_memcpy_fwd(dest, src, n);
  58. // Overlap case 2:
  59. // src: Low | -> | High
  60. // dest: Low | -> | High
  61. // While we're in the overlap region we're always corrupting future reads of
  62. // src when writing to dest. An efficient way to do this is to copy the data
  63. // in reverse by starting at the highest address.
  64. return __arm_sc_memcpy_rev(dest, src, n);
  65. }
  66. const void *__arm_sc_memchr(const void *src, int c,
  67. size_t n) __arm_streaming_compatible {
  68. const unsigned char *srcp = (const unsigned char *)src;
  69. unsigned char c8 = (unsigned char)c;
  70. for (size_t i = 0; i < n; ++i)
  71. if (srcp[i] == c8)
  72. return &srcp[i];
  73. return NULL;
  74. }