xml-textreader_ut.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. #include "xml-textreader.h"
  2. #include <library/cpp/testing/unittest/registar.h>
  3. #include <util/generic/hash.h>
  4. #include <util/generic/vector.h>
  5. #include <util/string/join.h>
  6. namespace {
  7. /**
  8. * Simple wrapper around the xmlTextReader wrapper
  9. */
  10. void ParseXml(const TString& xmlData,
  11. std::function<void(NXml::TConstNode)> nodeHandlerFunc,
  12. const TString& localName,
  13. const TString& namespaceUri = TString()) {
  14. TStringInput in(xmlData);
  15. NXml::TTextReader reader(in);
  16. while (reader.Read()) {
  17. if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element &&
  18. reader.GetLocalName() == localName &&
  19. reader.GetNamespaceUri() == namespaceUri)
  20. {
  21. const NXml::TConstNode node = reader.Expand();
  22. nodeHandlerFunc(node);
  23. }
  24. }
  25. }
  26. }
  27. Y_UNIT_TEST_SUITE(TestXmlTextReader) {
  28. Y_UNIT_TEST(BasicExample) {
  29. const TString xml = "<?xml version=\"1.0\"?>\n"
  30. "<example toto=\"1\">\n"
  31. " <examplechild id=\"1\">\n"
  32. " <child_of_child/>\n"
  33. " </examplechild>\n"
  34. " <examplechild id=\"2\" toto=\"3\">\n"
  35. " <child_of_child>Some content : -)</child_of_child>\n"
  36. " </examplechild>\n"
  37. "</example>\n";
  38. TStringInput input(xml);
  39. NXml::TTextReader reader(input);
  40. using ENT = NXml::TTextReader::ENodeType;
  41. struct TItem {
  42. int Depth;
  43. ENT Type;
  44. TString Name;
  45. TString Attrs;
  46. TString Value;
  47. };
  48. TVector<TItem> found;
  49. TVector<TString> msgs;
  50. while (reader.Read()) {
  51. // dump attributes as "k1: v1, k2: v2, ..."
  52. TVector<TString> kv;
  53. if (reader.HasAttributes()) {
  54. reader.MoveToFirstAttribute();
  55. do {
  56. kv.push_back(TString::Join(reader.GetName(), ": ", reader.GetValue()));
  57. } while (reader.MoveToNextAttribute());
  58. reader.MoveToElement();
  59. }
  60. found.push_back(TItem{
  61. reader.GetDepth(),
  62. reader.GetNodeType(),
  63. TString(reader.GetName()),
  64. JoinSeq(", ", kv),
  65. reader.HasValue() ? TString(reader.GetValue()) : TString(),
  66. });
  67. }
  68. const TVector<TItem> expected = {
  69. TItem{0, ENT::Element, "example", "toto: 1", ""},
  70. TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
  71. TItem{1, ENT::Element, "examplechild", "id: 1", ""},
  72. TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
  73. TItem{2, ENT::Element, "child_of_child", "", ""},
  74. TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
  75. TItem{1, ENT::EndElement, "examplechild", "id: 1", ""},
  76. TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
  77. TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""},
  78. TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
  79. TItem{2, ENT::Element, "child_of_child", "", ""},
  80. TItem{3, ENT::Text, "#text", "", "Some content : -)"},
  81. TItem{2, ENT::EndElement, "child_of_child", "", ""},
  82. TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
  83. TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""},
  84. TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"},
  85. TItem{0, ENT::EndElement, "example", "toto: 1", ""}};
  86. UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size());
  87. for (size_t i = 0; i < expected.size(); ++i) {
  88. UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i);
  89. UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i);
  90. UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i);
  91. UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i);
  92. UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i);
  93. }
  94. }
  95. const TString GEODATA = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
  96. "<root>"
  97. ""
  98. " <country id=\"225\">"
  99. " <name>Россия</name>"
  100. " <cities>"
  101. " <city>Москва</city>"
  102. " <city>Санкт-Петербург</city>"
  103. " </cities>"
  104. " </country>"
  105. ""
  106. " <country id=\"149\">"
  107. " <name>Беларусь</name>"
  108. " <cities>"
  109. " <city>Минск</city>"
  110. " </cities>"
  111. " </country>"
  112. ""
  113. " <country id=\"187\">"
  114. " <name>Украина</name>"
  115. " <cities>"
  116. " <city>Киев</city>"
  117. " </cities>"
  118. " </country>"
  119. ""
  120. "</root>";
  121. Y_UNIT_TEST(ParseXmlSimple) {
  122. struct TCountry {
  123. TString Name;
  124. TVector<TString> Cities;
  125. };
  126. THashMap<int, TCountry> data;
  127. auto handler = [&data](NXml::TConstNode node) {
  128. const int id = node.Attr<int>("id");
  129. TCountry& c = data[id];
  130. c.Name = node.FirstChild("name").Value<TString>();
  131. const NXml::TConstNodes cityNodes = node.Nodes("cities/city");
  132. for (auto cityNode : cityNodes) {
  133. c.Cities.push_back(cityNode.Value<TString>());
  134. }
  135. };
  136. ParseXml(GEODATA, handler, "country");
  137. UNIT_ASSERT_EQUAL(data.size(), 3);
  138. UNIT_ASSERT(data.contains(225));
  139. const TCountry& russia = data.at(225);
  140. UNIT_ASSERT_EQUAL(russia.Name, "Россия");
  141. UNIT_ASSERT_EQUAL(russia.Cities.size(), 2);
  142. UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва");
  143. UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург");
  144. UNIT_ASSERT(data.contains(149));
  145. const TCountry& belarus = data.at(149);
  146. UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь");
  147. UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1);
  148. UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск");
  149. UNIT_ASSERT(data.contains(187));
  150. const TCountry& ukraine = data.at(187);
  151. UNIT_ASSERT_EQUAL(ukraine.Name, "Украина");
  152. UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1);
  153. UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев");
  154. }
  155. Y_UNIT_TEST(ParseXmlDeepLevel) {
  156. TVector<TString> cities;
  157. auto handler = [&cities](NXml::TConstNode node) {
  158. cities.push_back(node.Value<TString>());
  159. };
  160. ParseXml(GEODATA, handler, "city");
  161. UNIT_ASSERT_EQUAL(cities.size(), 4);
  162. UNIT_ASSERT_EQUAL(cities[0], "Москва");
  163. UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург");
  164. UNIT_ASSERT_EQUAL(cities[2], "Минск");
  165. UNIT_ASSERT_EQUAL(cities[3], "Киев");
  166. }
  167. Y_UNIT_TEST(ParseXmlException) {
  168. // Check that exception properly passes through plain C code of libxml,
  169. // no leaks are detected by valgrind.
  170. auto handler = [](NXml::TConstNode node) {
  171. const int id = node.Attr<int>("id");
  172. if (id != 225) {
  173. ythrow yexception() << "unsupported id: " << id;
  174. }
  175. };
  176. UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception);
  177. UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception);
  178. UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception);
  179. UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception);
  180. }
  181. const TString BACKA = // UTF-8 encoding is used implicitly
  182. "<Companies"
  183. " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
  184. " xmlns=\"http://maps.yandex.ru/backa/1.x\""
  185. " xmlns:atom=\"http://www.w3.org/2005/Atom\""
  186. " xmlns:biz=\"http://maps.yandex.ru/business/1.x\""
  187. " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\""
  188. " xmlns:gml=\"http://www.opengis.net/gml\""
  189. ">"
  190. ""
  191. " <Company id=\"0001\">"
  192. " <Geo>"
  193. " <Location>"
  194. " <gml:pos>37.62669 55.664827</gml:pos>"
  195. " <kind>house</kind>"
  196. " </Location>"
  197. " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">"
  198. " <Country>"
  199. " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>"
  200. " </Country>"
  201. " </AddressDetails>"
  202. " </Geo>"
  203. " </Company>"
  204. ""
  205. " <Company id=\"0002\">"
  206. " <Geo>"
  207. " <Location>"
  208. " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>"
  209. " <kind>locality</kind>"
  210. " </Location>"
  211. " <xal:AddressDetails>"
  212. " <xal:Country>"
  213. " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>"
  214. " </xal:Country>"
  215. " </xal:AddressDetails>"
  216. " </Geo>"
  217. " </Company>"
  218. ""
  219. "</Companies>";
  220. Y_UNIT_TEST(NamespaceHell) {
  221. using TNS = NXml::TNamespaceForXPath;
  222. const NXml::TNamespacesForXPath ns = {
  223. TNS{"b", "http://maps.yandex.ru/backa/1.x"},
  224. TNS{"gml", "http://www.opengis.net/gml"},
  225. TNS{"xal", "urn:oasis:names:tc:ciq:xsdschema:xAL:2.0"}};
  226. int count = 0;
  227. THashMap<TString, TString> positions;
  228. THashMap<TString, TString> addresses;
  229. auto handler = [&](NXml::TConstNode node) {
  230. count++;
  231. const auto id = node.Attr<TString>("id");
  232. NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns);
  233. const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt);
  234. positions[id] = location.Node("gml:pos", false, *ctxt).Value<TString>();
  235. addresses[id] = node.Node("b:Geo/xal:AddressDetails/xal:Country/xal:AddressLine", false, *ctxt).Value<TString>();
  236. };
  237. ParseXml(BACKA, handler, "Company");
  238. UNIT_ASSERT_EQUAL(count, 0);
  239. // nothing found because namespace was not specified
  240. ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x");
  241. UNIT_ASSERT_VALUES_EQUAL(count, 2);
  242. UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827");
  243. UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092");
  244. UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14");
  245. UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43");
  246. }
  247. }