base64.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #include "base64.h"
  2. #include <contrib/libs/base64/avx2/libbase64.h>
  3. #include <contrib/libs/base64/ssse3/libbase64.h>
  4. #include <contrib/libs/base64/neon32/libbase64.h>
  5. #include <contrib/libs/base64/neon64/libbase64.h>
  6. #include <contrib/libs/base64/plain32/libbase64.h>
  7. #include <contrib/libs/base64/plain64/libbase64.h>
  8. #include <util/generic/yexception.h>
  9. #include <util/system/cpu_id.h>
  10. #include <util/system/platform.h>
  11. #include <cstdlib>
  12. namespace {
  13. struct TImpl {
  14. void (*Encode)(const char* src, size_t srclen, char* out, size_t* outlen);
  15. int (*Decode)(const char* src, size_t srclen, char* out, size_t* outlen);
  16. TImpl() {
  17. #if defined(_arm32_)
  18. const bool haveNEON32 = true;
  19. #else
  20. const bool haveNEON32 = false;
  21. #endif
  22. #if defined(_arm64_)
  23. const bool haveNEON64 = true;
  24. #else
  25. const bool haveNEON64 = false;
  26. #endif
  27. # ifdef _windows_
  28. // msvc does something wrong in release-build, so we temprorary disable this branch on windows
  29. // https://developercommunity.visualstudio.com/content/problem/334085/release-build-has-made-wrong-optimizaion-in-base64.html
  30. const bool isWin = true;
  31. # else
  32. const bool isWin = false;
  33. # endif
  34. if (!isWin && NX86::HaveAVX() && NX86::HaveAVX2()) {
  35. Encode = avx2_base64_encode;
  36. Decode = avx2_base64_decode;
  37. } else if (NX86::HaveSSSE3()) {
  38. Encode = ssse3_base64_encode;
  39. Decode = ssse3_base64_decode;
  40. } else if (haveNEON64) {
  41. Encode = neon64_base64_encode;
  42. Decode = neon64_base64_decode;
  43. } else if (haveNEON32) {
  44. Encode = neon32_base64_encode;
  45. Decode = neon32_base64_decode;
  46. } else if (sizeof(void*) == 8) {
  47. // running on a 64 bit platform
  48. Encode = plain64_base64_encode;
  49. Decode = plain64_base64_decode;
  50. } else if (sizeof(void*) == 4) {
  51. // running on a 32 bit platform (actually impossible in Arcadia)
  52. Encode = plain32_base64_encode;
  53. Decode = plain32_base64_decode;
  54. } else {
  55. // failed to find appropriate implementation
  56. std::abort();
  57. }
  58. }
  59. };
  60. const TImpl GetImpl() {
  61. static const TImpl IMPL;
  62. return IMPL;
  63. }
  64. }
  65. static const char base64_etab_std[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  66. static const char base64_bkw[] = {
  67. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 0..15
  68. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 16..31
  69. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\76', '\0', '\76', '\0', '\77', // 32.47
  70. '\64', '\65', '\66', '\67', '\70', '\71', '\72', '\73', '\74', '\75', '\0', '\0', '\0', '\0', '\0', '\0', // 48..63
  71. '\0', '\0', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\10', '\11', '\12', '\13', '\14', '\15', '\16', // 64..79
  72. '\17', '\20', '\21', '\22', '\23', '\24', '\25', '\26', '\27', '\30', '\31', '\0', '\0', '\0', '\0', '\77', // 80..95
  73. '\0', '\32', '\33', '\34', '\35', '\36', '\37', '\40', '\41', '\42', '\43', '\44', '\45', '\46', '\47', '\50', // 96..111
  74. '\51', '\52', '\53', '\54', '\55', '\56', '\57', '\60', '\61', '\62', '\63', '\0', '\0', '\0', '\0', '\0', // 112..127
  75. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 128..143
  76. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  77. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  78. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  79. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  80. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  81. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  82. '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'};
  83. static_assert(Y_ARRAY_SIZE(base64_bkw) == 256, "wrong size");
  84. // Base64 for url encoding, RFC3548
  85. static const char base64_etab_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
  86. static inline unsigned char GetBase64EncodedIndex0(unsigned char octet0) {
  87. return (octet0 >> 2);
  88. }
  89. static inline unsigned char GetBase64EncodedIndex1(unsigned char octet0, unsigned char octet1) {
  90. return (((octet0 << 4) & 0x30) | ((octet1 >> 4) & 0x0f));
  91. }
  92. static inline unsigned char GetBase64EncodedIndex2(unsigned char octet1, unsigned char octet2) {
  93. return (((octet1 << 2) & 0x3c) | ((octet2 >> 6) & 0x03));
  94. }
  95. static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) {
  96. return (octet2 & 0x3f);
  97. }
  98. template <bool urlVersion, bool usePadding = true>
  99. static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) {
  100. const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std);
  101. const char pad = (urlVersion ? ',' : '=');
  102. size_t idx = 0;
  103. while (idx + 2 < len) {
  104. *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])];
  105. *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])];
  106. *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], instr[idx + 2])];
  107. *outstr++ = base64_etab[GetBase64EncodedIndex3(instr[idx + 2])];
  108. idx += 3;
  109. }
  110. if (idx < len) {
  111. *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])];
  112. if (idx + 1 < len) {
  113. *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])];
  114. *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], '\0')];
  115. } else {
  116. *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], '\0')];
  117. if (usePadding) {
  118. *outstr++ = pad;
  119. }
  120. }
  121. if (usePadding) {
  122. *outstr++ = pad;
  123. }
  124. }
  125. *outstr = 0;
  126. return outstr;
  127. }
  128. static char* Base64EncodePlain(char* outstr, const unsigned char* instr, size_t len) {
  129. return Base64EncodeImpl<false>(outstr, instr, len);
  130. }
  131. char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) {
  132. return Base64EncodeImpl<true>(outstr, instr, len);
  133. }
  134. char* Base64EncodeNoPadding(char* outstr, const unsigned char* instr, size_t len) {
  135. return Base64EncodeImpl<false, false>(outstr, instr, len);
  136. }
  137. char* Base64EncodeUrlNoPadding(char* outstr, const unsigned char* instr, size_t len) {
  138. return Base64EncodeImpl<true, false>(outstr, instr, len);
  139. }
  140. inline void uudecode_1(char* dst, unsigned char* src) {
  141. dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4));
  142. dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2));
  143. dst[2] = char((base64_bkw[src[2]] << 6) | base64_bkw[src[3]]);
  144. }
  145. static size_t Base64DecodePlain(void* dst, const char* b, const char* e) {
  146. size_t n = 0;
  147. while (b < e) {
  148. uudecode_1((char*)dst + n, (unsigned char*)b);
  149. b += 4;
  150. n += 3;
  151. }
  152. if (n > 0) {
  153. if (b[-1] == ',' || b[-1] == '=') {
  154. n--;
  155. if (b[-2] == ',' || b[-2] == '=') {
  156. n--;
  157. }
  158. }
  159. }
  160. return n;
  161. }
  162. // Table for Base64StrictDecode
  163. static const char base64_bkw_strict[] =
  164. "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
  165. "\100\100\100\100\100\100\100\100\100\100\100\76\101\76\100\77\64\65\66\67\70\71\72\73\74\75\100\100\100\101\100\100"
  166. "\100\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17\20\21\22\23\24\25\26\27\30\31\100\100\100\100\77"
  167. "\100\32\33\34\35\36\37\40\41\42\43\44\45\46\47\50\51\52\53\54\55\56\57\60\61\62\63\100\100\100\100\100"
  168. "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
  169. "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
  170. "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
  171. "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100";
  172. size_t Base64StrictDecode(void* out, const char* b, const char* e) {
  173. char* dst = (char*)out;
  174. const unsigned char* src = (unsigned char*)b;
  175. const unsigned char* const end = (unsigned char*)e;
  176. Y_ENSURE(!((e - b) % 4), "incorrect input length for base64 decode");
  177. while (src < end) {
  178. const char zeroth = base64_bkw_strict[src[0]];
  179. const char first = base64_bkw_strict[src[1]];
  180. const char second = base64_bkw_strict[src[2]];
  181. const char third = base64_bkw_strict[src[3]];
  182. constexpr char invalid = 64;
  183. constexpr char padding = 65;
  184. if (Y_UNLIKELY(zeroth == invalid || first == invalid ||
  185. second == invalid || third == invalid ||
  186. zeroth == padding || first == padding))
  187. {
  188. ythrow yexception() << "invalid character in input";
  189. }
  190. dst[0] = char((zeroth << 2) | (first >> 4));
  191. dst[1] = char((first << 4) | (second >> 2));
  192. dst[2] = char((second << 6) | third);
  193. src += 4;
  194. dst += 3;
  195. if (src[-1] == ',' || src[-1] == '=') {
  196. --dst;
  197. if (src[-2] == ',' || src[-2] == '=') {
  198. --dst;
  199. }
  200. } else if (Y_UNLIKELY(src[-2] == ',' || src[-2] == '=')) {
  201. ythrow yexception() << "incorrect padding";
  202. }
  203. }
  204. return dst - (char*)out;
  205. }
  206. size_t Base64Decode(void* dst, const char* b, const char* e) {
  207. static const TImpl IMPL = GetImpl();
  208. const auto size = e - b;
  209. Y_ENSURE(!(size % 4), "incorrect input length for base64 decode");
  210. if (Y_LIKELY(size < 8)) {
  211. return Base64DecodePlain(dst, b, e);
  212. }
  213. size_t outLen;
  214. IMPL.Decode(b, size, (char*)dst, &outLen);
  215. return outLen;
  216. }
  217. size_t Base64DecodeUneven(void* dst, const TStringBuf s) {
  218. const size_t tailSize = s.length() % 4;
  219. if (tailSize == 0) {
  220. return Base64Decode(dst, s.begin(), s.end());
  221. }
  222. // divide s into even part and tail and decode in two step, to avoid memory allocation
  223. char tail[4] = {'=', '=', '=', '='};
  224. memcpy(tail, s.end() - tailSize, tailSize);
  225. size_t decodedEven = s.length() > 4 ? Base64Decode(dst, s.begin(), s.end() - tailSize) : 0;
  226. // there should not be tail of size 1 it's incorrect for 8-bit bytes
  227. size_t decodedTail = tailSize != 1 ? Base64Decode(static_cast<char*>(dst) + decodedEven, tail, tail + 4) : 0;
  228. return decodedEven + decodedTail;
  229. }
  230. TString Base64DecodeUneven(const TStringBuf s) {
  231. TString ret;
  232. ret.ReserveAndResize(Base64DecodeBufSize(s.size()));
  233. size_t size = Base64DecodeUneven(const_cast<char*>(ret.data()), s);
  234. ret.resize(size);
  235. return ret;
  236. }
  237. char* Base64Encode(char* outstr, const unsigned char* instr, size_t len) {
  238. static const TImpl IMPL = GetImpl();
  239. if (Y_LIKELY(len < 8)) {
  240. return Base64EncodePlain(outstr, instr, len);
  241. }
  242. size_t outLen;
  243. IMPL.Encode((char*)instr, len, outstr, &outLen);
  244. *(outstr + outLen) = '\0';
  245. return outstr + outLen;
  246. }