csv.cpp 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #include "csv.h"
  2. TStringBuf NCsvFormat::CsvSplitter::Consume() {
  3. if (Begin == End) {
  4. return nullptr;
  5. }
  6. TString::const_iterator TokenStart = Begin;
  7. TString::const_iterator TokenEnd = Begin;
  8. if (Quote == '\0') {
  9. while (1) {
  10. if (TokenEnd == End || *TokenEnd == Delimeter) {
  11. Begin = TokenEnd;
  12. return TStringBuf(TokenStart, TokenEnd);
  13. }
  14. ++TokenEnd;
  15. }
  16. } else {
  17. bool Escape = false;
  18. if (*Begin == Quote) {
  19. Escape = true;
  20. ++TokenStart;
  21. ++TokenEnd;
  22. Y_ENSURE(TokenStart != End, TStringBuf("RFC4180 violation: quotation mark must be followed by something"));
  23. }
  24. while (1) {
  25. if (TokenEnd == End || (!Escape && *TokenEnd == Delimeter)) {
  26. Begin = TokenEnd;
  27. return TStringBuf(TokenStart, TokenEnd);
  28. } else if (*TokenEnd == Quote) {
  29. Y_ENSURE(Escape, TStringBuf("RFC4180 violation: quotation mark must be in the escaped string only"));
  30. if (TokenEnd + 1 == End) {
  31. Begin = TokenEnd + 1;
  32. } else if (*(TokenEnd + 1) == Delimeter) {
  33. Begin = TokenEnd + 1;
  34. } else if (*(TokenEnd + 1) == Quote) {
  35. TempResultParts.push_back(TStringBuf(TokenStart, (TokenEnd + 1)));
  36. TokenEnd += 2;
  37. TokenStart = TokenEnd;
  38. continue;
  39. } else {
  40. Y_ENSURE(false, TStringBuf("RFC4180 violation: in escaped string quotation mark must be followed by a delimiter, EOL or another quotation mark"));
  41. }
  42. if (TempResultParts.size()) {
  43. auto newEscapedStringPtr = std::make_unique<TString>();
  44. size_t newStringSize = 0;
  45. for (auto tempResultPart : TempResultParts) {
  46. newStringSize += tempResultPart.size();
  47. }
  48. newStringSize += TokenEnd - TokenStart;
  49. newEscapedStringPtr->reserve(newStringSize);
  50. for (auto tempResultPart : TempResultParts) {
  51. *newEscapedStringPtr += TString{ tempResultPart };
  52. }
  53. *newEscapedStringPtr += TString{ TStringBuf(TokenStart, TokenEnd) };
  54. TempResultParts.clear();
  55. // Storing built string so that returned TStringBuf won't change until this splitter is destroyed
  56. TempResults.push_back(std::move(newEscapedStringPtr));
  57. return TStringBuf(*TempResults.back());
  58. } else {
  59. return TStringBuf(TokenStart, TokenEnd);
  60. }
  61. }
  62. ++TokenEnd;
  63. }
  64. }
  65. }
  66. TString NCsvFormat::TLinesSplitter::ConsumeLine() {
  67. bool Escape = false;
  68. TString result;
  69. TString line;
  70. while (Input.ReadLine(line)) {
  71. for (auto it = line.begin(); it != line.end(); ++it) {
  72. if (*it == Quote) {
  73. Escape = !Escape;
  74. }
  75. }
  76. if (!result) {
  77. result = line;
  78. } else {
  79. result += line;
  80. }
  81. if (!Escape) {
  82. break;
  83. } else {
  84. result += "\n";
  85. }
  86. }
  87. return result;
  88. }