xml-textreader.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. #include "xml-textreader.h"
  2. #include <contrib/libs/libxml/include/libxml/xmlreader.h>
  3. #include <util/generic/yexception.h>
  4. #include <util/string/strip.h>
  5. #include <util/system/compiler.h>
  6. namespace NXml {
  7. TTextReader::TTextReader(IInputStream& stream, const TOptions& options)
  8. : Stream(stream)
  9. , IsError(false)
  10. {
  11. Impl.Reset(xmlReaderForIO(ReadFromInputStreamCallback, nullptr, this, nullptr, nullptr, options.GetMask()));
  12. if (!Impl) {
  13. ythrow yexception() << "cannot instantiate underlying xmlTextReader structure";
  14. }
  15. SetupErrorHandler();
  16. CheckForExceptions();
  17. }
  18. TTextReader::~TTextReader() {
  19. }
  20. bool TTextReader::Read() {
  21. return BoolResult(xmlTextReaderRead(Impl.Get()));
  22. }
  23. TString TTextReader::ReadInnerXml() const {
  24. return TempStringOrEmptyResult(xmlTextReaderReadInnerXml(Impl.Get()));
  25. }
  26. TString TTextReader::ReadOuterXml() const {
  27. return TempStringOrEmptyResult(xmlTextReaderReadOuterXml(Impl.Get()));
  28. }
  29. TString TTextReader::ReadString() const {
  30. return TempStringOrEmptyResult(xmlTextReaderReadString(Impl.Get()));
  31. }
  32. bool TTextReader::ReadAttributeValue() const {
  33. return BoolResult(xmlTextReaderReadAttributeValue(Impl.Get()));
  34. }
  35. int TTextReader::GetAttributeCount() const {
  36. return IntResult(xmlTextReaderAttributeCount(Impl.Get()));
  37. }
  38. TStringBuf TTextReader::GetBaseUri() const {
  39. return ConstStringOrEmptyResult(xmlTextReaderConstBaseUri(Impl.Get()));
  40. }
  41. int TTextReader::GetDepth() const {
  42. return IntResult(xmlTextReaderDepth(Impl.Get()));
  43. }
  44. bool TTextReader::HasAttributes() const {
  45. return BoolResult(xmlTextReaderHasAttributes(Impl.Get()));
  46. }
  47. bool TTextReader::HasValue() const {
  48. return BoolResult(xmlTextReaderHasValue(Impl.Get()));
  49. }
  50. bool TTextReader::IsDefault() const {
  51. return BoolResult(xmlTextReaderIsDefault(Impl.Get()));
  52. }
  53. bool TTextReader::IsEmptyElement() const {
  54. return BoolResult(xmlTextReaderIsEmptyElement(Impl.Get()));
  55. }
  56. TStringBuf TTextReader::GetLocalName() const {
  57. return ConstStringOrEmptyResult(xmlTextReaderConstLocalName(Impl.Get()));
  58. }
  59. TStringBuf TTextReader::GetName() const {
  60. return ConstStringOrEmptyResult(xmlTextReaderConstName(Impl.Get()));
  61. }
  62. TStringBuf TTextReader::GetNamespaceUri() const {
  63. return ConstStringOrEmptyResult(xmlTextReaderConstNamespaceUri(Impl.Get()));
  64. }
  65. TTextReader::ENodeType TTextReader::GetNodeType() const {
  66. return static_cast<ENodeType>(IntResult(xmlTextReaderNodeType(Impl.Get())));
  67. }
  68. TStringBuf TTextReader::GetPrefix() const {
  69. return ConstStringOrEmptyResult(xmlTextReaderConstPrefix(Impl.Get()));
  70. }
  71. char TTextReader::GetQuoteChar() const {
  72. return CharResult(xmlTextReaderQuoteChar(Impl.Get()));
  73. }
  74. TStringBuf TTextReader::GetValue() const {
  75. return ConstStringOrEmptyResult(xmlTextReaderConstValue(Impl.Get()));
  76. }
  77. TTextReader::EReadState TTextReader::GetReadState() const {
  78. return static_cast<EReadState>(IntResult(xmlTextReaderReadState(Impl.Get())));
  79. }
  80. void TTextReader::Close() {
  81. if (xmlTextReaderClose(Impl.Get()) == -1) {
  82. ThrowException();
  83. }
  84. }
  85. TString TTextReader::GetAttribute(int number) const {
  86. return TempStringResult(xmlTextReaderGetAttributeNo(Impl.Get(), number));
  87. }
  88. TString TTextReader::GetAttribute(TZtStringBuf name) const {
  89. return TempStringResult(xmlTextReaderGetAttribute(Impl.Get(), XMLCHAR(name.data())));
  90. }
  91. TString TTextReader::GetAttribute(TZtStringBuf localName, TZtStringBuf nsUri) const {
  92. return TempStringResult(xmlTextReaderGetAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data())));
  93. }
  94. TString TTextReader::LookupNamespace(TZtStringBuf prefix) const {
  95. return TempStringResult(xmlTextReaderLookupNamespace(Impl.Get(), XMLCHAR(prefix.data())));
  96. }
  97. bool TTextReader::MoveToAttribute(int number) {
  98. return BoolResult(xmlTextReaderMoveToAttributeNo(Impl.Get(), number));
  99. }
  100. bool TTextReader::MoveToAttribute(TZtStringBuf name) {
  101. return BoolResult(xmlTextReaderMoveToAttribute(Impl.Get(), XMLCHAR(name.data())));
  102. }
  103. bool TTextReader::MoveToAttribute(TZtStringBuf localName, TZtStringBuf nsUri) {
  104. return BoolResult(xmlTextReaderMoveToAttributeNs(Impl.Get(), XMLCHAR(localName.data()), XMLCHAR(nsUri.data())));
  105. }
  106. bool TTextReader::MoveToFirstAttribute() {
  107. return BoolResult(xmlTextReaderMoveToFirstAttribute(Impl.Get()));
  108. }
  109. bool TTextReader::MoveToNextAttribute() {
  110. return BoolResult(xmlTextReaderMoveToNextAttribute(Impl.Get()));
  111. }
  112. bool TTextReader::MoveToElement() {
  113. return BoolResult(xmlTextReaderMoveToElement(Impl.Get()));
  114. }
  115. TConstNode TTextReader::Expand() const {
  116. const xmlNodePtr node = xmlTextReaderExpand(Impl.Get());
  117. if (node == nullptr) {
  118. ThrowException();
  119. }
  120. return TConstNode(TNode(node->doc, node));
  121. }
  122. bool TTextReader::Next() {
  123. return BoolResult(xmlTextReaderNext(Impl.Get()));
  124. }
  125. bool TTextReader::IsValid() const {
  126. return BoolResult(xmlTextReaderIsValid(Impl.Get()));
  127. }
  128. // Callback for xmlReaderForIO() to read more data.
  129. // It is almost "noexcept" (std::bad_alloc may happen when saving exception message to new TString).
  130. // Waiting for std::exception_ptr and std::rethrow_exception from C++11 in Arcadia to make it really "noexcept".
  131. int TTextReader::ReadFromInputStreamCallback(void* context, char* buffer, int len) {
  132. Y_ASSERT(len >= 0);
  133. TTextReader* reader = static_cast<TTextReader*>(context);
  134. int result = -1;
  135. // Exception may be thrown by IInputStream::Read().
  136. // It is caught unconditionally because exceptions cannot safely pass through libxml2 plain C code
  137. // (no destructors, no RAII, raw pointers, so in case of stack unwinding some memory gets leaked).
  138. try {
  139. result = reader->Stream.Read(buffer, len);
  140. } catch (const yexception& ex) {
  141. reader->LogError() << "read from input stream failed: " << ex;
  142. } catch (...) {
  143. reader->LogError() << "read from input stream failed";
  144. }
  145. return result;
  146. }
  147. void TTextReader::OnLibxmlError(void* arg, const char* msg, xmlParserSeverities severity, xmlTextReaderLocatorPtr locator) {
  148. TTextReader* reader = static_cast<TTextReader*>(arg);
  149. Y_ASSERT(reader != nullptr);
  150. TStringStream& out = reader->LogError();
  151. if (severity == XML_PARSER_SEVERITY_ERROR) {
  152. out << "libxml parse error";
  153. } else if (severity == XML_PARSER_SEVERITY_VALIDITY_ERROR) {
  154. out << "libxml validity error";
  155. } else {
  156. out << "libxml error";
  157. }
  158. if (locator != nullptr) {
  159. const int line = xmlTextReaderLocatorLineNumber(locator);
  160. const TCharPtr baseUri = xmlTextReaderLocatorBaseURI(locator);
  161. out << " (";
  162. if (line != -1) {
  163. out << "at line " << line;
  164. if (baseUri) {
  165. out << ", ";
  166. }
  167. }
  168. if (baseUri) {
  169. out << "base URI " << CAST2CHAR(baseUri.Get());
  170. }
  171. out << ")";
  172. }
  173. TStringBuf message = (msg != nullptr) ? msg : "unknown";
  174. message = StripStringRight(message); // remove trailing \n that is added by libxml
  175. if (!message.empty()) {
  176. out << ": " << message;
  177. }
  178. }
  179. void TTextReader::SetupErrorHandler() {
  180. xmlTextReaderErrorFunc func = nullptr;
  181. void* arg = nullptr;
  182. // We respect any other error handlers already set up:
  183. xmlTextReaderGetErrorHandler(Impl.Get(), &func, &arg);
  184. if (!func) {
  185. func = TTextReader::OnLibxmlError;
  186. xmlTextReaderSetErrorHandler(Impl.Get(), func, this);
  187. }
  188. }
  189. TStringStream& TTextReader::LogError() const {
  190. if (IsError) { // maybe there are previous errors
  191. ErrorBuffer << Endl;
  192. }
  193. IsError = true;
  194. return ErrorBuffer;
  195. }
  196. void TTextReader::CheckForExceptions() const {
  197. if (Y_LIKELY(!IsError)) {
  198. return;
  199. }
  200. const TString message = ErrorBuffer.Str();
  201. ErrorBuffer.clear();
  202. IsError = false;
  203. ythrow yexception() << message;
  204. }
  205. void TTextReader::ThrowException() const {
  206. CheckForExceptions();
  207. // Probably CheckForExceptions() would throw an exception with more verbose message. As the last resort
  208. // (we do not even know the name of the failed libxml function, but it's possible to deduce it from stacktrace):
  209. ythrow yexception() << "libxml function returned error exit code";
  210. }
  211. bool TTextReader::BoolResult(int value) const {
  212. if (Y_UNLIKELY(value == -1)) {
  213. ThrowException();
  214. }
  215. return (value != 0);
  216. }
  217. int TTextReader::IntResult(int value) const {
  218. if (Y_UNLIKELY(value == -1)) {
  219. ThrowException();
  220. }
  221. return value;
  222. }
  223. char TTextReader::CharResult(int value) const {
  224. if (Y_UNLIKELY(value == -1)) {
  225. ThrowException();
  226. }
  227. return static_cast<char>(value);
  228. }
  229. TStringBuf TTextReader::ConstStringResult(const xmlChar* value) const {
  230. if (Y_UNLIKELY(value == nullptr)) {
  231. ThrowException();
  232. }
  233. return CAST2CHAR(value);
  234. }
  235. TStringBuf TTextReader::ConstStringOrEmptyResult(const xmlChar* value) const {
  236. CheckForExceptions();
  237. return (value != nullptr) ? TStringBuf(CAST2CHAR(value)) : TStringBuf();
  238. }
  239. TString TTextReader::TempStringResult(TCharPtr value) const {
  240. if (Y_UNLIKELY(value == nullptr)) {
  241. ThrowException();
  242. }
  243. return TString(CAST2CHAR(value.Get()));
  244. }
  245. TString TTextReader::TempStringOrEmptyResult(TCharPtr value) const {
  246. CheckForExceptions();
  247. return (value != nullptr) ? TString(CAST2CHAR(value.Get())) : TString();
  248. }
  249. struct TTextReader::TDeleter {
  250. static inline void Destroy(xmlTextReaderPtr handle) {
  251. xmlFreeTextReader(handle);
  252. }
  253. };
  254. }