codec_neon64.c 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. #if (defined(__ARM_NEON) && !defined(__ARM_NEON__))
  2. #define __ARM_NEON__
  3. #endif
  4. #include <stdint.h>
  5. #include <stddef.h>
  6. #include <stdlib.h>
  7. #ifdef __ARM_NEON__
  8. #include <arm_neon.h>
  9. #endif
  10. #include "libbase64.h"
  11. #include "codecs.h"
  12. #if (defined(__aarch64__) && defined(__ARM_NEON__))
  13. #define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n))
  14. #define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n))
  15. #define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n))
  16. #define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b)))
  17. // With this transposed encoding table, we can use
  18. // a 64-byte lookup to do the encoding.
  19. // Read the table top to bottom, left to right.
  20. static const char *neon64_base64_table_enc_transposed =
  21. {
  22. "AQgw"
  23. "BRhx"
  24. "CSiy"
  25. "DTjz"
  26. "EUk0"
  27. "FVl1"
  28. "GWm2"
  29. "HXn3"
  30. "IYo4"
  31. "JZp5"
  32. "Kaq6"
  33. "Lbr7"
  34. "Mcs8"
  35. "Ndt9"
  36. "Oeu+"
  37. "Pfv/"
  38. };
  39. #endif
  40. // Stride size is so large on these NEON 64-bit functions
  41. // (48 bytes encode, 64 bytes decode) that we inline the
  42. // uint64 codec to stay performant on smaller inputs.
  43. void
  44. neon64_base64_stream_encode
  45. ( struct neon64_base64_state *state
  46. , const char *src
  47. , size_t srclen
  48. , char *out
  49. , size_t *outlen
  50. )
  51. {
  52. #if (defined(__aarch64__) && defined(__ARM_NEON__))
  53. uint8x16x4_t tbl_enc = vld4q_u8((uint8_t const*)neon64_base64_table_enc_transposed);
  54. #include "enc_head.c"
  55. #include "enc_neon.c"
  56. #include "enc_uint64.c"
  57. #include "enc_tail.c"
  58. #else
  59. (void)state;
  60. (void)src;
  61. (void)srclen;
  62. (void)out;
  63. (void)outlen;
  64. abort();
  65. #endif
  66. }
  67. int
  68. neon64_base64_stream_decode
  69. ( struct neon64_base64_state *state
  70. , const char *src
  71. , size_t srclen
  72. , char *out
  73. , size_t *outlen
  74. )
  75. {
  76. #if (defined(__aarch64__) && defined(__ARM_NEON__))
  77. #include "dec_head.c"
  78. #include "dec_neon.c"
  79. #include "dec_uint64.c"
  80. #include "dec_tail.c"
  81. #else
  82. (void)state;
  83. (void)src;
  84. (void)srclen;
  85. (void)out;
  86. (void)outlen;
  87. abort();
  88. #endif
  89. }