ContentHandler.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. //
  2. // ContentHandler.h
  3. //
  4. // Library: XML
  5. // Package: SAX
  6. // Module: SAX
  7. //
  8. // SAX2 ContentHandler Interface.
  9. //
  10. // Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
  11. // and Contributors.
  12. //
  13. // SPDX-License-Identifier: BSL-1.0
  14. //
  15. #ifndef SAX_ContentHandler_INCLUDED
  16. #define SAX_ContentHandler_INCLUDED
  17. #include "Poco/XML/XML.h"
  18. #include "Poco/XML/XMLString.h"
  19. namespace Poco {
  20. namespace XML {
  21. class Locator;
  22. class Attributes;
  23. class XML_API ContentHandler
  24. /// Receive notification of the logical content of a document.
  25. ///
  26. /// This is the main interface that most SAX applications implement: if the
  27. /// application needs to be informed of basic parsing events, it implements
  28. /// this interface and registers an instance with the SAX parser using the setContentHandler
  29. /// method. The parser uses the instance to report basic document-related events
  30. /// like the start and end of elements and character data.
  31. ///
  32. /// The order of events in this interface is very important, and mirrors the
  33. /// order of information in the document itself. For example, all of an element's
  34. /// content (character data, processing instructions, and/or subelements) will
  35. /// appear, in order, between the startElement event and the corresponding endElement
  36. /// event.
  37. ///
  38. /// This interface is similar to the now-deprecated SAX 1.0 DocumentHandler
  39. /// interface, but it adds support for Namespaces and for reporting skipped
  40. /// entities (in non-validating XML processors).
  41. /// Receive notification of the logical content of a document.
  42. {
  43. public:
  44. virtual void setDocumentLocator(const Locator* loc) = 0;
  45. /// Receive an object for locating the origin of SAX document events.
  46. ///
  47. /// SAX parsers are strongly encouraged (though not absolutely required) to
  48. /// supply a locator: if it does so, it must supply the locator to the application
  49. /// by invoking this method before invoking any of the other methods in the
  50. /// ContentHandler interface.
  51. ///
  52. /// The locator allows the application to determine the end position of any
  53. /// document-related event, even if the parser is not reporting an error. Typically,
  54. /// the application will use this information for reporting its own errors (such
  55. /// as character content that does not match an application's business rules).
  56. /// The information returned by the locator is probably not sufficient for use
  57. /// with a search engine.
  58. ///
  59. /// Note that the locator will return correct information only during the invocation
  60. /// SAX event callbacks after startDocument returns and before endDocument is
  61. /// called. The application should not attempt to use it at any other time.
  62. virtual void startDocument() = 0;
  63. /// Receive notification of the beginning of a document.
  64. ///
  65. /// The SAX parser calls this function one time before calling all other
  66. /// functions of this class (except SetDocumentLocator).
  67. virtual void endDocument() = 0;
  68. /// Receive notification of the end of a document.
  69. ///
  70. /// The SAX parser will invoke this method only once, and it will be the last
  71. /// method invoked during the parse. The parser shall not invoke this method
  72. /// until it has either abandoned parsing (because of an unrecoverable error)
  73. /// or reached the end of input.
  74. virtual void startElement(const XMLString& uri, const XMLString& localName, const XMLString& qname, const Attributes& attrList) = 0;
  75. /// Receive notification of the beginning of an element.
  76. ///
  77. /// The Parser will invoke this method at the beginning of every element in
  78. /// the XML document; there will be a corresponding endElement event for every
  79. /// startElement event (even when the element is empty). All of the element's
  80. /// content will be reported, in order, before the corresponding endElement
  81. /// event.
  82. ///
  83. /// This event allows up to three name components for each element:
  84. /// 1. the Namespace URI;
  85. /// 2. the local name; and
  86. /// 3. the qualified (prefixed) name.
  87. ///
  88. /// Any or all of these may be provided, depending on the values of the http://xml.org/sax/features/namespaces
  89. /// and the http://xml.org/sax/features/namespace-prefixes properties:
  90. /// * the Namespace URI and local name are required when the namespaces
  91. /// property is true (the default), and are optional when the namespaces property
  92. /// is false (if one is specified, both must be);
  93. /// * the qualified name is required when the namespace-prefixes property
  94. /// is true, and is optional when the namespace-prefixes property is false (the
  95. /// default).
  96. ///
  97. /// Note that the attribute list provided will contain only attributes with
  98. /// explicit values (specified or defaulted): #IMPLIED attributes will be omitted.
  99. /// The attribute list will contain attributes used for Namespace declarations
  100. /// (xmlns* attributes) only if the http://xml.org/sax/features/namespace-prefixes
  101. /// property is true (it is false by default, and support for a true value is
  102. /// optional).
  103. ///
  104. /// Like characters(), attribute values may have characters that need more than
  105. /// one char value.
  106. virtual void endElement(const XMLString& uri, const XMLString& localName, const XMLString& qname) = 0;
  107. /// Receive notification of the end of an element.
  108. ///
  109. /// The SAX parser will invoke this method at the end of every element in the
  110. /// XML document; there will be a corresponding startElement event for every
  111. /// endElement event (even when the element is empty).
  112. ///
  113. /// For information on the names, see startElement.
  114. virtual void characters(const XMLChar ch[], int start, int length) = 0;
  115. /// Receive notification of character data.
  116. ///
  117. /// The Parser will call this method to report each chunk of character data.
  118. /// SAX parsers may return all contiguous character data in a single chunk,
  119. /// or they may split it into several chunks; however, all of the characters
  120. /// in any single event must come from the same external entity so that the
  121. /// Locator provides useful information.
  122. ///
  123. /// The application must not attempt to read from the array outside of the specified
  124. /// range.
  125. ///
  126. /// Individual characters may consist of more than one XMLChar value. There
  127. /// are three important cases where this happens, because characters can't be
  128. /// represented in just sixteen bits. In one case, characters are represented
  129. /// in a Surrogate Pair, using two special Unicode values. Such characters are
  130. /// in the so-called "Astral Planes", with a code point above U+FFFF. A second
  131. /// case involves composite characters, such as a base character combining with
  132. /// one or more accent characters. And most important, if XMLChar is a plain
  133. /// char, characters are encoded in UTF-8.
  134. ///
  135. /// Your code should not assume that algorithms using char-at-a-time idioms
  136. /// will be working in character units; in some cases they will split characters.
  137. /// This is relevant wherever XML permits arbitrary characters, such as attribute
  138. /// values, processing instruction data, and comments as well as in data reported
  139. /// from this method. It's also generally relevant whenever C++ code manipulates
  140. /// internationalized text; the issue isn't unique to XML.
  141. ///
  142. /// Note that some parsers will report whitespace in element content using the
  143. /// ignorableWhitespace method rather than this one (validating parsers must
  144. /// do so).
  145. virtual void ignorableWhitespace(const XMLChar ch[], int start, int length) = 0;
  146. /// Receive notification of ignorable whitespace in element content.
  147. ///
  148. /// Validating Parsers must use this method to report each chunk of whitespace
  149. /// in element content (see the W3C XML 1.0 recommendation, section 2.10): non-validating
  150. /// parsers may also use this method if they are capable of parsing and using
  151. /// content models.
  152. ///
  153. /// SAX parsers may return all contiguous whitespace in a single chunk, or they
  154. /// may split it into several chunks; however, all of the characters in any
  155. /// single event must come from the same external entity, so that the Locator
  156. /// provides useful information.
  157. ///
  158. /// The application must not attempt to read from the array outside of the specified
  159. /// range.
  160. virtual void processingInstruction(const XMLString& target, const XMLString& data) = 0;
  161. /// Receive notification of a processing instruction.
  162. ///
  163. /// The Parser will invoke this method once for each processing instruction
  164. /// found: note that processing instructions may occur before or after the main
  165. /// document element.
  166. ///
  167. /// A SAX parser must never report an XML declaration (XML 1.0, section 2.8)
  168. /// or a text declaration (XML 1.0, section 4.3.1) using this method.
  169. ///
  170. /// Like characters(), processing instruction data may have characters that
  171. /// need more than one char value.
  172. virtual void startPrefixMapping(const XMLString& prefix, const XMLString& uri) = 0;
  173. /// Begin the scope of a prefix-URI Namespace mapping.
  174. ///
  175. /// The information from this event is not necessary for normal Namespace processing:
  176. /// the SAX XML reader will automatically replace prefixes for element and attribute
  177. /// names when the http://xml.org/sax/features/namespaces feature is true (the
  178. /// default).
  179. ///
  180. /// There are cases, however, when applications need to use prefixes in character
  181. /// data or in attribute values, where they cannot safely be expanded automatically;
  182. /// the start/endPrefixMapping event supplies the information to the application
  183. /// to expand prefixes in those contexts itself, if necessary.
  184. ///
  185. /// Note that start/endPrefixMapping events are not guaranteed to be properly
  186. /// nested relative to each other: all startPrefixMapping events will occur
  187. /// immediately before the corresponding startElement event, and all endPrefixMapping
  188. /// events will occur immediately after the corresponding endElement event,
  189. /// but their order is not otherwise guaranteed.
  190. ///
  191. /// There should never be start/endPrefixMapping events for the "xml" prefix,
  192. /// since it is predeclared and immutable.
  193. virtual void endPrefixMapping(const XMLString& prefix) = 0;
  194. /// End the scope of a prefix-URI mapping.
  195. ///
  196. /// See startPrefixMapping for details. These events will always occur immediately
  197. /// after the corresponding endElement event, but the order of endPrefixMapping
  198. /// events is not otherwise guaranteed.
  199. virtual void skippedEntity(const XMLString& name) = 0;
  200. /// Receive notification of a skipped entity. This is not called for entity
  201. /// references within markup constructs such as element start tags or markup
  202. /// declarations. (The XML recommendation requires reporting skipped external
  203. /// entities. SAX also reports internal entity expansion/non-expansion, except
  204. /// within markup constructs.)
  205. ///
  206. /// The Parser will invoke this method each time the entity is skipped. Non-validating
  207. /// processors may skip entities if they have not seen the declarations (because,
  208. /// for example, the entity was declared in an external DTD subset). All processors
  209. /// may skip external entities, depending on the values of the http://xml.org/sax/features/external-general-entities
  210. /// and the http://xml.org/sax/features/external-parameter-entities properties.
  211. protected:
  212. virtual ~ContentHandler();
  213. };
  214. } } // namespace Poco::XML
  215. #endif // SAX_ContentHandler_INCLUDED