csv.h 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. #pragma once
  2. #include <util/generic/yexception.h>
  3. #include <util/generic/strbuf.h>
  4. #include <util/generic/vector.h>
  5. #include <util/stream/input.h>
  6. #include <vector>
  7. /*
  8. Split string by rfc4180
  9. */
  10. namespace NCsvFormat {
  11. class TLinesSplitter {
  12. private:
  13. IInputStream& Input;
  14. const char Quote;
  15. public:
  16. TLinesSplitter(IInputStream& input, const char quote = '"')
  17. : Input(input)
  18. , Quote(quote) {
  19. }
  20. TString ConsumeLine();
  21. };
  22. class CsvSplitter {
  23. public:
  24. CsvSplitter(const TString& data, const char delimeter = ',', const char quote = '"')
  25. // quote = '\0' ignores quoting in values and words like simple split
  26. : Delimeter(delimeter)
  27. , Quote(quote)
  28. , Begin(data.begin())
  29. , End(data.end())
  30. {
  31. }
  32. bool Step() {
  33. if (Begin == End) {
  34. return false;
  35. }
  36. ++Begin;
  37. return true;
  38. }
  39. TStringBuf Consume();
  40. explicit operator TVector<TString>() {
  41. TVector<TString> ret;
  42. do {
  43. TStringBuf buf = Consume();
  44. ret.push_back(TString{buf});
  45. } while (Step());
  46. return ret;
  47. }
  48. private:
  49. const char Delimeter;
  50. const char Quote;
  51. TString::const_iterator Begin;
  52. const TString::const_iterator End;
  53. std::vector<std::unique_ptr<TString>> TempResults; // CsvSplitter lifetime
  54. std::vector<TStringBuf> TempResultParts; // Single Consume() method call lifetime
  55. };
  56. }