base32.cpp 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. #include "base32.h"
  2. #include <util/generic/yexception.h>
  3. #include <algorithm>
  4. #include <array>
  5. #include <limits>
  6. namespace {
  7. // RFC 4648 Base32 alphabet
  8. //
  9. // A 9 J 18 S 27 3
  10. // 1 B 10 K 19 T 28 4
  11. // 2 C 11 L 20 U 29 5
  12. // 3 D 12 M 21 V 30 6
  13. // 4 E 13 N 22 W 31 7
  14. // 5 F 14 O 23 X
  15. // 6 G 15 P 24 Y
  16. // 7 H 16 Q 25 Z
  17. // 8 I 17 R 26 2 pad =
  18. constexpr std::string_view BASE32_TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  19. "234567";
  20. constexpr uint8_t BAD = 0xff;
  21. // clang-format off
  22. constexpr std::array<uint8_t, 256> BASE32_DECODE_TABLE = {{
  23. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  24. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  25. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  26. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  27. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  28. 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, BAD, BAD, BAD, BAD,
  29. BAD, BAD, BAD, BAD, BAD, 0x0, 0x1, 0x2, 0x3, 0x4,
  30. 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe,
  31. 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
  32. 0x19, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  33. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  34. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  35. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  36. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  37. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  38. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  39. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  40. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  41. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  42. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  43. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  44. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  45. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  46. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  47. BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD, BAD,
  48. BAD, BAD, BAD, BAD, BAD, BAD,
  49. }};
  50. // clang-format on
  51. char encodeBits(unsigned char sz) {
  52. static_assert(static_cast<size_t>(std::numeric_limits<decltype(sz)>::max()) < BASE32_DECODE_TABLE.size());
  53. return BASE32_TABLE[sz];
  54. }
  55. uint8_t decodeChar(unsigned char ch, bool isStrict) {
  56. if (uint8_t val = BASE32_DECODE_TABLE[ch]; val != BAD) {
  57. return val;
  58. }
  59. if (isStrict) {
  60. ythrow yexception() << "Error during decode symbol from Base32: character is not in Base32 set";
  61. }
  62. return 0;
  63. }
  64. void shiftBitsFrom(unsigned char& dst, const unsigned char& src, size_t i, size_t n) {
  65. unsigned char m = ((src << i) & 0xFF) >> (8 - n);
  66. dst = dst << n;
  67. dst |= m;
  68. }
  69. size_t Base32DecodeImpl(std::string_view src, char* dst, bool isStrict) {
  70. if (src.empty()) {
  71. return 0;
  72. }
  73. size_t dstSize = 0;
  74. size_t bitIndex = 0;
  75. unsigned char byte = 0;
  76. for (auto c = src.cbegin(); c != src.cend(); ++c) {
  77. if (*c == '=') {
  78. Y_ENSURE(
  79. !isStrict || std::all_of(c, src.cend(), [](char pad) { return pad == '='; }),
  80. "Unexpected character after padding");
  81. break;
  82. }
  83. uint8_t octet = decodeChar(*c, isStrict) << 3;
  84. byte = byte | (octet >> bitIndex);
  85. size_t bitsWritten = std::min<size_t>(5, 8 - bitIndex);
  86. if (bitsWritten < 5 || (bitIndex + bitsWritten) == 8) {
  87. dst[dstSize++] = byte;
  88. byte = (octet << bitsWritten);
  89. }
  90. bitIndex = (bitIndex + 5) % 8;
  91. }
  92. // For example, correct encoding of \x00 is
  93. // AA====== (\b0000'0000\b00xx'xxxx), not a
  94. // AAA= (\b0000'0000\b0000'000x)
  95. size_t lastOctetBitsCount = (dstSize * 8) % 5;
  96. size_t expectedBitIndex = (lastOctetBitsCount == 0) ? 0 : 5 - lastOctetBitsCount;
  97. Y_ENSURE(!isStrict || (byte == 0 && bitIndex == expectedBitIndex), "Invalid Base32 string format");
  98. return dstSize;
  99. }
  100. } // namespace
  101. size_t Base32Encode(std::string_view src, char* dst) {
  102. if (src.size() == 0) {
  103. return 0;
  104. }
  105. size_t dstSize = 0;
  106. size_t curInd = 0;
  107. unsigned char c = src[curInd];
  108. unsigned char bitInd = 0;
  109. unsigned char n = 0;
  110. unsigned char ind = 0;
  111. while (curInd < src.size()) {
  112. ind = 0;
  113. unsigned char bitProcessed = 0;
  114. while (bitProcessed < 5) {
  115. n = std::min<unsigned char>(8 - bitInd, 5 - bitProcessed);
  116. shiftBitsFrom(ind, c, bitInd, n);
  117. bitProcessed += n;
  118. if (bitInd + n >= 8) {
  119. ++curInd;
  120. if (curInd == src.size()) {
  121. c = 0;
  122. } else {
  123. c = src[curInd];
  124. }
  125. }
  126. bitInd = (bitInd + n) % 8;
  127. }
  128. dst[dstSize++] = encodeBits(ind);
  129. }
  130. const size_t paddingSize = ((8 - (dstSize & 7)) & 7);
  131. for (size_t i = 0; i < paddingSize; ++i) {
  132. dst[dstSize++] = '=';
  133. }
  134. return dstSize;
  135. }
  136. size_t Base32Decode(std::string_view src, char* dst) {
  137. return Base32DecodeImpl(src, dst, /*isStrict*/ false);
  138. }
  139. size_t Base32StrictDecode(std::string_view src, char* dst) {
  140. return Base32DecodeImpl(src, dst, /*isStrict*/ true);
  141. }