xml-document.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. #include "xml-document.h"
  2. #include <libxml/xinclude.h>
  3. #include <libxml/xpathInternals.h>
  4. #include <library/cpp/xml/init/init.h>
  5. #include <util/generic/yexception.h>
  6. #include <util/folder/dirut.h>
  7. namespace {
  8. struct TInit {
  9. inline TInit() {
  10. NXml::InitEngine();
  11. }
  12. } initer;
  13. }
  14. namespace NXml {
  15. TDocument::TDocument(const TString& xml, Source type) {
  16. switch (type) {
  17. case File:
  18. ParseFile(xml);
  19. break;
  20. case String:
  21. ParseString(xml);
  22. break;
  23. case RootName: {
  24. TDocHolder doc(xmlNewDoc(XMLCHAR("1.0")));
  25. if (!doc)
  26. THROW(XmlException, "Can't create xml document.");
  27. doc->encoding = xmlStrdup(XMLCHAR("utf-8"));
  28. TNodePtr node(xmlNewNode(nullptr, XMLCHAR(xml.c_str())));
  29. if (!node)
  30. THROW(XmlException, "Can't create root node.");
  31. xmlDocSetRootElement(doc.Get(), node.Get());
  32. Y_UNUSED(node.Release());
  33. Doc = std::move(doc);
  34. } break;
  35. default:
  36. THROW(InvalidArgument, "Wrong source type");
  37. }
  38. }
  39. TDocument::TDocument(TDocument&& doc)
  40. : Doc(std::move(doc.Doc))
  41. {
  42. }
  43. TDocument& TDocument::operator=(TDocument&& doc) {
  44. if (this != &doc)
  45. doc.Swap(*this);
  46. return *this;
  47. }
  48. void TDocument::ParseFile(const TString& file) {
  49. if (!NFs::Exists(file))
  50. THROW(XmlException, "File " << file << " doesn't exist");
  51. TParserCtxtPtr pctx(xmlNewParserCtxt());
  52. if (!pctx)
  53. THROW(XmlException, "Can't create parser context");
  54. TDocHolder doc(xmlCtxtReadFile(pctx.Get(), file.c_str(), nullptr, XML_PARSE_NOCDATA));
  55. if (!doc)
  56. THROW(XmlException, "Can't parse file " << file);
  57. int res = xmlXIncludeProcessFlags(doc.Get(), XML_PARSE_XINCLUDE | XML_PARSE_NOCDATA | XML_PARSE_NOXINCNODE);
  58. if (res == -1)
  59. THROW(XmlException, "XIncludes processing failed");
  60. Doc = std::move(doc);
  61. }
  62. void TDocument::ParseString(TZtStringBuf xml) {
  63. TParserCtxtPtr pctx(xmlNewParserCtxt());
  64. if (pctx.Get() == nullptr)
  65. THROW(XmlException, "Can't create parser context");
  66. TDocHolder doc(xmlCtxtReadMemory(pctx.Get(), xml.c_str(), (int)xml.size(), nullptr, nullptr, XML_PARSE_NOCDATA));
  67. if (!doc)
  68. THROW(XmlException, "Can't parse string");
  69. Doc = std::move(doc);
  70. }
  71. TNode TDocument::Root() {
  72. xmlNode* r = xmlDocGetRootElement(Doc.Get());
  73. if (r == nullptr)
  74. THROW(XmlException, "TDocument hasn't root element");
  75. return TNode(Doc.Get(), r);
  76. }
  77. TConstNode TDocument::Root() const {
  78. xmlNode* r = xmlDocGetRootElement(Doc.Get());
  79. if (r == nullptr)
  80. THROW(XmlException, "TDocument hasn't root element");
  81. return TConstNode(TNode(Doc.Get(), r));
  82. }
  83. bool TNode::IsNull() const {
  84. return NodePointer == nullptr;
  85. }
  86. bool TNode::IsElementNode() const {
  87. return !IsNull() && (NodePointer->type == XML_ELEMENT_NODE);
  88. }
  89. TXPathContextPtr TNode::CreateXPathContext(const TNamespacesForXPath& nss) const {
  90. TXPathContextPtr ctx = xmlXPathNewContext(DocPointer);
  91. if (!ctx)
  92. THROW(XmlException, "Can't create empty xpath context");
  93. for (const auto& ns : nss) {
  94. const int r = xmlXPathRegisterNs(ctx.Get(), XMLCHAR(ns.Prefix.c_str()), XMLCHAR(ns.Url.c_str()));
  95. if (r != 0)
  96. THROW(XmlException, "Can't register namespace " << ns.Url << " with prefix " << ns.Prefix);
  97. }
  98. return ctx;
  99. }
  100. TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const {
  101. TXPathContextPtr ctxt = CreateXPathContext(ns);
  102. return XPath(xpath, quiet, *ctxt);
  103. }
  104. TConstNodes TNode::XPath(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
  105. if (xmlXPathSetContextNode(NodePointer, &ctxt) != 0)
  106. THROW(XmlException, "Can't set xpath context node, probably the context is associated with another document");
  107. TXPathObjectPtr obj = xmlXPathEvalExpression(XMLCHAR(xpath.c_str()), &ctxt);
  108. if (!obj)
  109. THROW(XmlException, "Can't evaluate xpath expression " << xpath);
  110. TConstNodes nodes(DocPointer, obj);
  111. if (nodes.Size() == 0 && !quiet)
  112. THROW(NodeNotFound, xpath);
  113. return nodes;
  114. }
  115. TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const {
  116. TXPathContextPtr ctxt = CreateXPathContext(ns);
  117. return Nodes(xpath, quiet, *ctxt);
  118. }
  119. TConstNodes TNode::Nodes(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
  120. TConstNodes nodes = XPath(xpath, quiet, ctxt);
  121. if (nodes.Size() != 0 && !nodes[0].IsElementNode())
  122. THROW(XmlException, "xpath points to non-element nodes: " << xpath);
  123. return nodes;
  124. }
  125. TNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) {
  126. TXPathContextPtr ctxt = CreateXPathContext(ns);
  127. return Node(xpath, quiet, *ctxt);
  128. }
  129. TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, const TNamespacesForXPath& ns) const {
  130. TXPathContextPtr ctxt = CreateXPathContext(ns);
  131. return Node(xpath, quiet, *ctxt);
  132. }
  133. TNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) {
  134. TConstNodes n = Nodes(xpath, quiet, ctxt);
  135. if (n.Size() == 0 && !quiet)
  136. THROW(NodeNotFound, xpath);
  137. if (n.Size() == 0)
  138. return TNode();
  139. else
  140. return n[0].ConstCast();
  141. }
  142. TConstNode TNode::Node(TZtStringBuf xpath, bool quiet, TXPathContext& ctxt) const {
  143. return const_cast<TNode*>(this)->Node(xpath, quiet, ctxt);
  144. }
  145. TNode TNode::FirstChild(TZtStringBuf name) {
  146. if (IsNull())
  147. THROW(XmlException, "Node is null");
  148. return Find(NodePointer->children, name);
  149. }
  150. TConstNode TNode::FirstChild(TZtStringBuf name) const {
  151. return const_cast<TNode*>(this)->FirstChild(name);
  152. }
  153. TNode TNode::FirstChild() {
  154. if (IsNull())
  155. THROW(XmlException, "Node is null");
  156. return TNode(DocPointer, NodePointer->children);
  157. }
  158. TConstNode TNode::FirstChild() const {
  159. return const_cast<TNode*>(this)->FirstChild();
  160. }
  161. TNode TNode::Parent() {
  162. if (nullptr == NodePointer->parent)
  163. THROW(XmlException, "Parent node not exists");
  164. return TNode(DocPointer, NodePointer->parent);
  165. }
  166. TConstNode TNode::Parent() const {
  167. return const_cast<TNode*>(this)->Parent();
  168. }
  169. TNode TNode::NextSibling(TZtStringBuf name) {
  170. if (IsNull())
  171. THROW(XmlException, "Node is null");
  172. return Find(NodePointer->next, name);
  173. }
  174. TConstNode TNode::NextSibling(TZtStringBuf name) const {
  175. return const_cast<TNode*>(this)->NextSibling(name);
  176. }
  177. TNode TNode::NextSibling() {
  178. if (IsNull())
  179. THROW(XmlException, "Node is null");
  180. return TNode(DocPointer, NodePointer->next);
  181. }
  182. TConstNode TNode::NextSibling() const {
  183. return const_cast<TNode*>(this)->NextSibling();
  184. }
  185. /* NOTE: by default child will inherit it's parent ns */
  186. TNode TNode::AddChild(TZtStringBuf name) {
  187. return AddChild(name, "");
  188. }
  189. /* NOTE: source node will be copied, as otherwise it will be double-freed from this and its own document */
  190. TNode TNode::AddChild(const TConstNode& node) {
  191. xmlNodePtr copy = xmlDocCopyNode(node.ConstCast().NodePointer, DocPointer, 1 /* recursive */);
  192. copy = xmlAddChild(NodePointer, copy);
  193. return TNode(DocPointer, copy);
  194. }
  195. void TNode::SetPrivate(void* priv) {
  196. NodePointer->_private = priv;
  197. }
  198. void* TNode::GetPrivate() const {
  199. return NodePointer->_private;
  200. }
  201. TNode TNode::Find(xmlNode* start, TZtStringBuf name) {
  202. for (; start; start = start->next)
  203. if (start->type == XML_ELEMENT_NODE && (name.empty() || !xmlStrcmp(start->name, XMLCHAR(name.c_str()))))
  204. return TNode(DocPointer, start);
  205. return TNode();
  206. }
  207. TString TNode::Name() const {
  208. if (IsNull())
  209. THROW(XmlException, "Node is null");
  210. return CAST2CHAR(NodePointer->name);
  211. }
  212. TString TNode::Path() const {
  213. TCharPtr path(xmlGetNodePath(NodePointer));
  214. if (!!path)
  215. return CAST2CHAR(path.Get());
  216. else
  217. return "";
  218. }
  219. xmlNode* TNode::GetPtr() {
  220. return NodePointer;
  221. }
  222. const xmlNode* TNode::GetPtr() const {
  223. return NodePointer;
  224. }
  225. bool TNode::IsText() const {
  226. if (IsNull())
  227. THROW(XmlException, "Node is null");
  228. return NodePointer->type == XML_TEXT_NODE;
  229. }
  230. void TNode::Remove() {
  231. xmlNode* nodePtr = GetPtr();
  232. xmlUnlinkNode(nodePtr);
  233. xmlFreeNode(nodePtr);
  234. }
  235. static int XmlWriteToOstream(void* context, const char* buffer, int len) {
  236. // possibly use to save doc as well
  237. IOutputStream* out = (IOutputStream*)context;
  238. out->Write(buffer, len);
  239. return len;
  240. }
  241. void TNode::SaveInternal(IOutputStream& stream, TZtStringBuf enc, int options) const {
  242. const char* encoding = enc.size() ? enc.data() : "utf-8";
  243. TSaveCtxtPtr ctx(xmlSaveToIO(XmlWriteToOstream, /* close */ nullptr, &stream,
  244. encoding, options));
  245. if (xmlSaveTree(ctx.Get(), (xmlNode*)GetPtr()) < 0)
  246. THROW(XmlException, "Failed saving node to stream");
  247. }
  248. void TNode::Save(IOutputStream& stream, TZtStringBuf enc, bool shouldFormat) const {
  249. SaveInternal(stream, enc, shouldFormat ? XML_SAVE_FORMAT : 0);
  250. }
  251. void TNode::SaveAsHtml(IOutputStream& stream, TZtStringBuf enc, bool shouldFormat) const {
  252. int options = XML_SAVE_AS_HTML;
  253. options |= shouldFormat ? XML_SAVE_FORMAT : 0;
  254. SaveInternal(stream, enc, options);
  255. }
  256. TConstNodes::TConstNodes(const TConstNodes& nodes)
  257. : SizeValue(nodes.Size())
  258. , Doc(nodes.Doc)
  259. , Obj(nodes.Obj)
  260. {
  261. }
  262. TConstNodes& TConstNodes::operator=(const TConstNodes& nodes) {
  263. if (this != &nodes) {
  264. SizeValue = nodes.Size();
  265. Doc = nodes.Doc;
  266. Obj = nodes.Obj;
  267. }
  268. return *this;
  269. }
  270. TConstNodes::TConstNodes(TConstNodesRef ref)
  271. : SizeValue(ref.r_.Size())
  272. , Doc(ref.r_.Doc)
  273. , Obj(ref.r_.Obj)
  274. {
  275. }
  276. TConstNodes& TConstNodes::operator=(TConstNodesRef ref) {
  277. if (this != &ref.r_) {
  278. SizeValue = ref.r_.Size();
  279. Doc = ref.r_.Doc;
  280. Obj = ref.r_.Obj;
  281. }
  282. return *this;
  283. }
  284. TConstNodes::operator TConstNodesRef() {
  285. return TConstNodesRef(*this);
  286. }
  287. TConstNodes::TConstNodes(xmlDoc* doc, TXPathObjectPtr obj)
  288. : SizeValue(obj && obj->nodesetval ? obj->nodesetval->nodeNr : 0)
  289. , Doc(doc)
  290. , Obj(obj)
  291. {
  292. }
  293. TConstNode TConstNodes::operator[](size_t number) const {
  294. if (number + 1 > Size())
  295. THROW(XmlException, "index out of range " << number);
  296. if (!Obj || !Obj->nodesetval)
  297. THROW(XmlException, "Broken TConstNodes object, Obj is null");
  298. xmlNode* node = Obj->nodesetval->nodeTab[number];
  299. return TNode(Doc, node);
  300. }
  301. TConstNode TConstNodes::TNodeIter::operator*() const {
  302. return Nodes[Index];
  303. }
  304. }