helpers.cpp 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #include "helpers.h"
  2. #include <util/string/ascii.h>
  3. namespace NUnifiedAgent::NPrivate {
  4. bool IsUtf8(const THashMap<TString, TString>& meta) {
  5. for (const auto& p : meta) {
  6. if (!IsUtf(p.first) || !IsUtf(p.second)) {
  7. return false;
  8. }
  9. }
  10. return true;
  11. }
  12. ResultReplacingNonUTF ReplaceNonUTF(TStringBuf message, char signBrokenSymbol, size_t maxSize) {
  13. ResultReplacingNonUTF result;
  14. if (maxSize == 0) {
  15. result.IsTruncated = !message.empty();
  16. return result;
  17. }
  18. if (message.empty()) {
  19. return result;
  20. }
  21. auto currentPoint = reinterpret_cast<const unsigned char*>(&message[0]);
  22. auto endPoint = currentPoint + message.size();
  23. auto pushSignBroken = [&result, signBrokenSymbol]() {
  24. if (result.Data.empty() || result.Data.back() != signBrokenSymbol) {
  25. result.Data.push_back(signBrokenSymbol);
  26. }
  27. ++result.BrokenCount;
  28. };
  29. while (currentPoint < endPoint) {
  30. wchar32 rune = 0;
  31. size_t rune_len = 0;
  32. auto statusRead = SafeReadUTF8Char(rune, rune_len, currentPoint, endPoint);
  33. if (statusRead == RECODE_OK) {
  34. if (rune_len == 1 && !IsAsciiAlnum(*currentPoint) && !IsAsciiPunct(*currentPoint) && !IsAsciiSpace(*currentPoint)) {
  35. ++currentPoint;
  36. pushSignBroken();
  37. } else {
  38. while (rune_len != 0) {
  39. result.Data.push_back(*currentPoint);
  40. ++currentPoint;
  41. --rune_len;
  42. }
  43. }
  44. } else if (statusRead == RECODE_BROKENSYMBOL) {
  45. ++currentPoint;
  46. pushSignBroken();
  47. } else {
  48. pushSignBroken();
  49. break;
  50. }
  51. if (result.Data.size() >= maxSize && currentPoint < endPoint) {
  52. result.IsTruncated = true;
  53. break;
  54. }
  55. }
  56. return result;
  57. }
  58. }