123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- #include "xml-textreader.h"
- #include <library/cpp/testing/unittest/registar.h>
- #include <util/generic/hash.h>
- #include <util/generic/vector.h>
- #include <util/string/join.h>
- namespace {
- /**
- * Simple wrapper around the xmlTextReader wrapper
- */
- void ParseXml(const TString& xmlData,
- std::function<void(NXml::TConstNode)> nodeHandlerFunc,
- const TString& localName,
- const TString& namespaceUri = TString()) {
- TStringInput in(xmlData);
- NXml::TTextReader reader(in);
- while (reader.Read()) {
- if (reader.GetNodeType() == NXml::TTextReader::ENodeType::Element &&
- reader.GetLocalName() == localName &&
- reader.GetNamespaceUri() == namespaceUri)
- {
- const NXml::TConstNode node = reader.Expand();
- nodeHandlerFunc(node);
- }
- }
- }
- }
- Y_UNIT_TEST_SUITE(TestXmlTextReader) {
- Y_UNIT_TEST(BasicExample) {
- const TString xml = "<?xml version=\"1.0\"?>\n"
- "<example toto=\"1\">\n"
- " <examplechild id=\"1\">\n"
- " <child_of_child/>\n"
- " </examplechild>\n"
- " <examplechild id=\"2\" toto=\"3\">\n"
- " <child_of_child>Some content : -)</child_of_child>\n"
- " </examplechild>\n"
- "</example>\n";
- TStringInput input(xml);
- NXml::TTextReader reader(input);
- using ENT = NXml::TTextReader::ENodeType;
- struct TItem {
- int Depth;
- ENT Type;
- TString Name;
- TString Attrs;
- TString Value;
- };
- TVector<TItem> found;
- TVector<TString> msgs;
- while (reader.Read()) {
- // dump attributes as "k1: v1, k2: v2, ..."
- TVector<TString> kv;
- if (reader.HasAttributes()) {
- reader.MoveToFirstAttribute();
- do {
- kv.push_back(TString::Join(reader.GetName(), ": ", reader.GetValue()));
- } while (reader.MoveToNextAttribute());
- reader.MoveToElement();
- }
- found.push_back(TItem{
- reader.GetDepth(),
- reader.GetNodeType(),
- TString(reader.GetName()),
- JoinSeq(", ", kv),
- reader.HasValue() ? TString(reader.GetValue()) : TString(),
- });
- }
- const TVector<TItem> expected = {
- TItem{0, ENT::Element, "example", "toto: 1", ""},
- TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::Element, "examplechild", "id: 1", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{2, ENT::Element, "child_of_child", "", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::EndElement, "examplechild", "id: 1", ""},
- TItem{1, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::Element, "examplechild", "id: 2, toto: 3", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{2, ENT::Element, "child_of_child", "", ""},
- TItem{3, ENT::Text, "#text", "", "Some content : -)"},
- TItem{2, ENT::EndElement, "child_of_child", "", ""},
- TItem{2, ENT::SignificantWhitespace, "#text", "", "\n "},
- TItem{1, ENT::EndElement, "examplechild", "id: 2, toto: 3", ""},
- TItem{1, ENT::SignificantWhitespace, "#text", "", "\n"},
- TItem{0, ENT::EndElement, "example", "toto: 1", ""}};
- UNIT_ASSERT_VALUES_EQUAL(found.size(), expected.size());
- for (size_t i = 0; i < expected.size(); ++i) {
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Depth, expected[i].Depth, "line " << i);
- UNIT_ASSERT_EQUAL_C(found[i].Type, expected[i].Type, "line " << i);
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Name, expected[i].Name, "line " << i);
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Attrs, expected[i].Attrs, "line " << i);
- UNIT_ASSERT_VALUES_EQUAL_C(found[i].Value, expected[i].Value, "line " << i);
- }
- }
- const TString GEODATA = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
- "<root>"
- ""
- " <country id=\"225\">"
- " <name>Россия</name>"
- " <cities>"
- " <city>Москва</city>"
- " <city>Санкт-Петербург</city>"
- " </cities>"
- " </country>"
- ""
- " <country id=\"149\">"
- " <name>Беларусь</name>"
- " <cities>"
- " <city>Минск</city>"
- " </cities>"
- " </country>"
- ""
- " <country id=\"187\">"
- " <name>Украина</name>"
- " <cities>"
- " <city>Киев</city>"
- " </cities>"
- " </country>"
- ""
- "</root>";
- Y_UNIT_TEST(ParseXmlSimple) {
- struct TCountry {
- TString Name;
- TVector<TString> Cities;
- };
- THashMap<int, TCountry> data;
- auto handler = [&data](NXml::TConstNode node) {
- const int id = node.Attr<int>("id");
- TCountry& c = data[id];
- c.Name = node.FirstChild("name").Value<TString>();
- const NXml::TConstNodes cityNodes = node.Nodes("cities/city");
- for (auto cityNode : cityNodes) {
- c.Cities.push_back(cityNode.Value<TString>());
- }
- };
- ParseXml(GEODATA, handler, "country");
- UNIT_ASSERT_EQUAL(data.size(), 3);
- UNIT_ASSERT(data.contains(225));
- const TCountry& russia = data.at(225);
- UNIT_ASSERT_EQUAL(russia.Name, "Россия");
- UNIT_ASSERT_EQUAL(russia.Cities.size(), 2);
- UNIT_ASSERT_EQUAL(russia.Cities[0], "Москва");
- UNIT_ASSERT_EQUAL(russia.Cities[1], "Санкт-Петербург");
- UNIT_ASSERT(data.contains(149));
- const TCountry& belarus = data.at(149);
- UNIT_ASSERT_EQUAL(belarus.Name, "Беларусь");
- UNIT_ASSERT_EQUAL(belarus.Cities.size(), 1);
- UNIT_ASSERT_EQUAL(belarus.Cities[0], "Минск");
- UNIT_ASSERT(data.contains(187));
- const TCountry& ukraine = data.at(187);
- UNIT_ASSERT_EQUAL(ukraine.Name, "Украина");
- UNIT_ASSERT_EQUAL(ukraine.Cities.size(), 1);
- UNIT_ASSERT_EQUAL(ukraine.Cities[0], "Киев");
- }
- Y_UNIT_TEST(ParseXmlDeepLevel) {
- TVector<TString> cities;
- auto handler = [&cities](NXml::TConstNode node) {
- cities.push_back(node.Value<TString>());
- };
- ParseXml(GEODATA, handler, "city");
- UNIT_ASSERT_EQUAL(cities.size(), 4);
- UNIT_ASSERT_EQUAL(cities[0], "Москва");
- UNIT_ASSERT_EQUAL(cities[1], "Санкт-Петербург");
- UNIT_ASSERT_EQUAL(cities[2], "Минск");
- UNIT_ASSERT_EQUAL(cities[3], "Киев");
- }
- Y_UNIT_TEST(ParseXmlException) {
- // Check that exception properly passes through plain C code of libxml,
- // no leaks are detected by valgrind.
- auto handler = [](NXml::TConstNode node) {
- const int id = node.Attr<int>("id");
- if (id != 225) {
- ythrow yexception() << "unsupported id: " << id;
- }
- };
- UNIT_ASSERT_EXCEPTION(ParseXml(GEODATA, handler, "country"), yexception);
- UNIT_ASSERT_EXCEPTION(ParseXml("<a></b>", handler, "a"), yexception);
- UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2\"></b></root>", handler, "a"), yexception);
- UNIT_ASSERT_EXCEPTION(ParseXml("<root><a id=\"1\"></a><a id=\"2></a></root>", handler, "a"), yexception);
- }
- const TString BACKA = // UTF-8 encoding is used implicitly
- "<Companies"
- " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\""
- " xmlns=\"http://maps.yandex.ru/backa/1.x\""
- " xmlns:atom=\"http://www.w3.org/2005/Atom\""
- " xmlns:biz=\"http://maps.yandex.ru/business/1.x\""
- " xmlns:xal=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\""
- " xmlns:gml=\"http://www.opengis.net/gml\""
- ">"
- ""
- " <Company id=\"0001\">"
- " <Geo>"
- " <Location>"
- " <gml:pos>37.62669 55.664827</gml:pos>"
- " <kind>house</kind>"
- " </Location>"
- " <AddressDetails xmlns=\"urn:oasis:names:tc:ciq:xsdschema:xAL:2.0\">"
- " <Country>"
- " <AddressLine xml:lang=\"ru\">Москва, Каширское ш., 14</AddressLine>"
- " </Country>"
- " </AddressDetails>"
- " </Geo>"
- " </Company>"
- ""
- " <Company id=\"0002\">"
- " <Geo>"
- " <Location>"
- " <pos xmlns=\"http://www.opengis.net/gml\">150.819797 59.56092</pos>"
- " <kind>locality</kind>"
- " </Location>"
- " <xal:AddressDetails>"
- " <xal:Country>"
- " <xal:AddressLine xml:lang=\"ru\">Магадан, ул. Пролетарская, 43</xal:AddressLine>"
- " </xal:Country>"
- " </xal:AddressDetails>"
- " </Geo>"
- " </Company>"
- ""
- "</Companies>";
- Y_UNIT_TEST(NamespaceHell) {
- using TNS = NXml::TNamespaceForXPath;
- const NXml::TNamespacesForXPath ns = {
- TNS{"b", "http://maps.yandex.ru/backa/1.x"},
- TNS{"gml", "http://www.opengis.net/gml"},
- TNS{"xal", "urn:oasis:names:tc:ciq:xsdschema:xAL:2.0"}};
- int count = 0;
- THashMap<TString, TString> positions;
- THashMap<TString, TString> addresses;
- auto handler = [&](NXml::TConstNode node) {
- count++;
- const auto id = node.Attr<TString>("id");
- NXml::TXPathContextPtr ctxt = node.CreateXPathContext(ns);
- const NXml::TConstNode location = node.Node("b:Geo/b:Location", false, *ctxt);
- positions[id] = location.Node("gml:pos", false, *ctxt).Value<TString>();
- addresses[id] = node.Node("b:Geo/xal:AddressDetails/xal:Country/xal:AddressLine", false, *ctxt).Value<TString>();
- };
- ParseXml(BACKA, handler, "Company");
- UNIT_ASSERT_EQUAL(count, 0);
- // nothing found because namespace was not specified
- ParseXml(BACKA, handler, "Company", "http://maps.yandex.ru/backa/1.x");
- UNIT_ASSERT_VALUES_EQUAL(count, 2);
- UNIT_ASSERT_VALUES_EQUAL(positions["0001"], "37.62669 55.664827");
- UNIT_ASSERT_VALUES_EQUAL(positions["0002"], "150.819797 59.56092");
- UNIT_ASSERT_VALUES_EQUAL(addresses["0001"], "Москва, Каширское ш., 14");
- UNIT_ASSERT_VALUES_EQUAL(addresses["0002"], "Магадан, ул. Пролетарская, 43");
- }
- }
|