entities.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163
  1. /*
  2. * entities.c : implementation for the XML entities handling
  3. *
  4. * See Copyright for the status of this software.
  5. *
  6. * daniel@veillard.com
  7. */
  8. /* To avoid EBCDIC trouble when parsing on zOS */
  9. #if defined(__MVS__)
  10. #pragma convert("ISO8859-1")
  11. #endif
  12. #define IN_LIBXML
  13. #include "libxml.h"
  14. #include <string.h>
  15. #ifdef HAVE_STDLIB_H
  16. #include <stdlib.h>
  17. #endif
  18. #include <libxml/xmlmemory.h>
  19. #include <libxml/hash.h>
  20. #include <libxml/entities.h>
  21. #include <libxml/parser.h>
  22. #include <libxml/parserInternals.h>
  23. #include <libxml/xmlerror.h>
  24. #include <libxml/globals.h>
  25. #include <libxml/dict.h>
  26. #include "save.h"
  27. /*
  28. * The XML predefined entities.
  29. */
  30. static xmlEntity xmlEntityLt = {
  31. NULL, XML_ENTITY_DECL, BAD_CAST "lt",
  32. NULL, NULL, NULL, NULL, NULL, NULL,
  33. BAD_CAST "<", BAD_CAST "<", 1,
  34. XML_INTERNAL_PREDEFINED_ENTITY,
  35. NULL, NULL, NULL, NULL, 0, 1
  36. };
  37. static xmlEntity xmlEntityGt = {
  38. NULL, XML_ENTITY_DECL, BAD_CAST "gt",
  39. NULL, NULL, NULL, NULL, NULL, NULL,
  40. BAD_CAST ">", BAD_CAST ">", 1,
  41. XML_INTERNAL_PREDEFINED_ENTITY,
  42. NULL, NULL, NULL, NULL, 0, 1
  43. };
  44. static xmlEntity xmlEntityAmp = {
  45. NULL, XML_ENTITY_DECL, BAD_CAST "amp",
  46. NULL, NULL, NULL, NULL, NULL, NULL,
  47. BAD_CAST "&", BAD_CAST "&", 1,
  48. XML_INTERNAL_PREDEFINED_ENTITY,
  49. NULL, NULL, NULL, NULL, 0, 1
  50. };
  51. static xmlEntity xmlEntityQuot = {
  52. NULL, XML_ENTITY_DECL, BAD_CAST "quot",
  53. NULL, NULL, NULL, NULL, NULL, NULL,
  54. BAD_CAST "\"", BAD_CAST "\"", 1,
  55. XML_INTERNAL_PREDEFINED_ENTITY,
  56. NULL, NULL, NULL, NULL, 0, 1
  57. };
  58. static xmlEntity xmlEntityApos = {
  59. NULL, XML_ENTITY_DECL, BAD_CAST "apos",
  60. NULL, NULL, NULL, NULL, NULL, NULL,
  61. BAD_CAST "'", BAD_CAST "'", 1,
  62. XML_INTERNAL_PREDEFINED_ENTITY,
  63. NULL, NULL, NULL, NULL, 0, 1
  64. };
  65. /**
  66. * xmlEntitiesErrMemory:
  67. * @extra: extra information
  68. *
  69. * Handle an out of memory condition
  70. */
  71. static void
  72. xmlEntitiesErrMemory(const char *extra)
  73. {
  74. __xmlSimpleError(XML_FROM_TREE, XML_ERR_NO_MEMORY, NULL, NULL, extra);
  75. }
  76. /**
  77. * xmlEntitiesErr:
  78. * @code: the error code
  79. * @msg: the message
  80. *
  81. * Handle an out of memory condition
  82. */
  83. static void LIBXML_ATTR_FORMAT(2,0)
  84. xmlEntitiesErr(xmlParserErrors code, const char *msg)
  85. {
  86. __xmlSimpleError(XML_FROM_TREE, code, NULL, msg, NULL);
  87. }
  88. /*
  89. * xmlFreeEntity : clean-up an entity record.
  90. */
  91. static void
  92. xmlFreeEntity(xmlEntityPtr entity)
  93. {
  94. xmlDictPtr dict = NULL;
  95. if (entity == NULL)
  96. return;
  97. if (entity->doc != NULL)
  98. dict = entity->doc->dict;
  99. if ((entity->children) && (entity->owner == 1) &&
  100. (entity == (xmlEntityPtr) entity->children->parent))
  101. xmlFreeNodeList(entity->children);
  102. if (dict != NULL) {
  103. if ((entity->name != NULL) && (!xmlDictOwns(dict, entity->name)))
  104. xmlFree((char *) entity->name);
  105. if ((entity->ExternalID != NULL) &&
  106. (!xmlDictOwns(dict, entity->ExternalID)))
  107. xmlFree((char *) entity->ExternalID);
  108. if ((entity->SystemID != NULL) &&
  109. (!xmlDictOwns(dict, entity->SystemID)))
  110. xmlFree((char *) entity->SystemID);
  111. if ((entity->URI != NULL) && (!xmlDictOwns(dict, entity->URI)))
  112. xmlFree((char *) entity->URI);
  113. if ((entity->content != NULL)
  114. && (!xmlDictOwns(dict, entity->content)))
  115. xmlFree((char *) entity->content);
  116. if ((entity->orig != NULL) && (!xmlDictOwns(dict, entity->orig)))
  117. xmlFree((char *) entity->orig);
  118. } else {
  119. if (entity->name != NULL)
  120. xmlFree((char *) entity->name);
  121. if (entity->ExternalID != NULL)
  122. xmlFree((char *) entity->ExternalID);
  123. if (entity->SystemID != NULL)
  124. xmlFree((char *) entity->SystemID);
  125. if (entity->URI != NULL)
  126. xmlFree((char *) entity->URI);
  127. if (entity->content != NULL)
  128. xmlFree((char *) entity->content);
  129. if (entity->orig != NULL)
  130. xmlFree((char *) entity->orig);
  131. }
  132. xmlFree(entity);
  133. }
  134. /*
  135. * xmlCreateEntity:
  136. *
  137. * internal routine doing the entity node structures allocations
  138. */
  139. static xmlEntityPtr
  140. xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type,
  141. const xmlChar *ExternalID, const xmlChar *SystemID,
  142. const xmlChar *content) {
  143. xmlEntityPtr ret;
  144. ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
  145. if (ret == NULL) {
  146. xmlEntitiesErrMemory("xmlCreateEntity: malloc failed");
  147. return(NULL);
  148. }
  149. memset(ret, 0, sizeof(xmlEntity));
  150. ret->type = XML_ENTITY_DECL;
  151. ret->checked = 0;
  152. /*
  153. * fill the structure.
  154. */
  155. ret->etype = (xmlEntityType) type;
  156. if (dict == NULL) {
  157. ret->name = xmlStrdup(name);
  158. if (ExternalID != NULL)
  159. ret->ExternalID = xmlStrdup(ExternalID);
  160. if (SystemID != NULL)
  161. ret->SystemID = xmlStrdup(SystemID);
  162. } else {
  163. ret->name = xmlDictLookup(dict, name, -1);
  164. if (ExternalID != NULL)
  165. ret->ExternalID = xmlDictLookup(dict, ExternalID, -1);
  166. if (SystemID != NULL)
  167. ret->SystemID = xmlDictLookup(dict, SystemID, -1);
  168. }
  169. if (content != NULL) {
  170. ret->length = xmlStrlen(content);
  171. if ((dict != NULL) && (ret->length < 5))
  172. ret->content = (xmlChar *)
  173. xmlDictLookup(dict, content, ret->length);
  174. else
  175. ret->content = xmlStrndup(content, ret->length);
  176. } else {
  177. ret->length = 0;
  178. ret->content = NULL;
  179. }
  180. ret->URI = NULL; /* to be computed by the layer knowing
  181. the defining entity */
  182. ret->orig = NULL;
  183. ret->owner = 0;
  184. return(ret);
  185. }
  186. /*
  187. * xmlAddEntity : register a new entity for an entities table.
  188. */
  189. static xmlEntityPtr
  190. xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type,
  191. const xmlChar *ExternalID, const xmlChar *SystemID,
  192. const xmlChar *content) {
  193. xmlDictPtr dict = NULL;
  194. xmlEntitiesTablePtr table = NULL;
  195. xmlEntityPtr ret, predef;
  196. if (name == NULL)
  197. return(NULL);
  198. if (dtd == NULL)
  199. return(NULL);
  200. if (dtd->doc != NULL)
  201. dict = dtd->doc->dict;
  202. switch (type) {
  203. case XML_INTERNAL_GENERAL_ENTITY:
  204. case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
  205. case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
  206. predef = xmlGetPredefinedEntity(name);
  207. if (predef != NULL) {
  208. int valid = 0;
  209. /* 4.6 Predefined Entities */
  210. if ((type == XML_INTERNAL_GENERAL_ENTITY) &&
  211. (content != NULL)) {
  212. int c = predef->content[0];
  213. if (((content[0] == c) && (content[1] == 0)) &&
  214. ((c == '>') || (c == '\'') || (c == '"'))) {
  215. valid = 1;
  216. } else if ((content[0] == '&') && (content[1] == '#')) {
  217. if (content[2] == 'x') {
  218. xmlChar *hex = BAD_CAST "0123456789ABCDEF";
  219. xmlChar ref[] = "00;";
  220. ref[0] = hex[c / 16 % 16];
  221. ref[1] = hex[c % 16];
  222. if (xmlStrcasecmp(&content[3], ref) == 0)
  223. valid = 1;
  224. } else {
  225. xmlChar ref[] = "00;";
  226. ref[0] = '0' + c / 10 % 10;
  227. ref[1] = '0' + c % 10;
  228. if (xmlStrEqual(&content[2], ref))
  229. valid = 1;
  230. }
  231. }
  232. }
  233. if (!valid) {
  234. xmlEntitiesErr(XML_ERR_ENTITY_PROCESSING,
  235. "xmlAddEntity: invalid redeclaration of predefined"
  236. " entity");
  237. return(NULL);
  238. }
  239. }
  240. if (dtd->entities == NULL)
  241. dtd->entities = xmlHashCreateDict(0, dict);
  242. table = dtd->entities;
  243. break;
  244. case XML_INTERNAL_PARAMETER_ENTITY:
  245. case XML_EXTERNAL_PARAMETER_ENTITY:
  246. if (dtd->pentities == NULL)
  247. dtd->pentities = xmlHashCreateDict(0, dict);
  248. table = dtd->pentities;
  249. break;
  250. case XML_INTERNAL_PREDEFINED_ENTITY:
  251. return(NULL);
  252. }
  253. if (table == NULL)
  254. return(NULL);
  255. ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
  256. if (ret == NULL)
  257. return(NULL);
  258. ret->doc = dtd->doc;
  259. if (xmlHashAddEntry(table, name, ret)) {
  260. /*
  261. * entity was already defined at another level.
  262. */
  263. xmlFreeEntity(ret);
  264. return(NULL);
  265. }
  266. return(ret);
  267. }
  268. /**
  269. * xmlGetPredefinedEntity:
  270. * @name: the entity name
  271. *
  272. * Check whether this name is an predefined entity.
  273. *
  274. * Returns NULL if not, otherwise the entity
  275. */
  276. xmlEntityPtr
  277. xmlGetPredefinedEntity(const xmlChar *name) {
  278. if (name == NULL) return(NULL);
  279. switch (name[0]) {
  280. case 'l':
  281. if (xmlStrEqual(name, BAD_CAST "lt"))
  282. return(&xmlEntityLt);
  283. break;
  284. case 'g':
  285. if (xmlStrEqual(name, BAD_CAST "gt"))
  286. return(&xmlEntityGt);
  287. break;
  288. case 'a':
  289. if (xmlStrEqual(name, BAD_CAST "amp"))
  290. return(&xmlEntityAmp);
  291. if (xmlStrEqual(name, BAD_CAST "apos"))
  292. return(&xmlEntityApos);
  293. break;
  294. case 'q':
  295. if (xmlStrEqual(name, BAD_CAST "quot"))
  296. return(&xmlEntityQuot);
  297. break;
  298. default:
  299. break;
  300. }
  301. return(NULL);
  302. }
  303. /**
  304. * xmlAddDtdEntity:
  305. * @doc: the document
  306. * @name: the entity name
  307. * @type: the entity type XML_xxx_yyy_ENTITY
  308. * @ExternalID: the entity external ID if available
  309. * @SystemID: the entity system ID if available
  310. * @content: the entity content
  311. *
  312. * Register a new entity for this document DTD external subset.
  313. *
  314. * Returns a pointer to the entity or NULL in case of error
  315. */
  316. xmlEntityPtr
  317. xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
  318. const xmlChar *ExternalID, const xmlChar *SystemID,
  319. const xmlChar *content) {
  320. xmlEntityPtr ret;
  321. xmlDtdPtr dtd;
  322. if (doc == NULL) {
  323. xmlEntitiesErr(XML_DTD_NO_DOC,
  324. "xmlAddDtdEntity: document is NULL");
  325. return(NULL);
  326. }
  327. if (doc->extSubset == NULL) {
  328. xmlEntitiesErr(XML_DTD_NO_DTD,
  329. "xmlAddDtdEntity: document without external subset");
  330. return(NULL);
  331. }
  332. dtd = doc->extSubset;
  333. ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
  334. if (ret == NULL) return(NULL);
  335. /*
  336. * Link it to the DTD
  337. */
  338. ret->parent = dtd;
  339. ret->doc = dtd->doc;
  340. if (dtd->last == NULL) {
  341. dtd->children = dtd->last = (xmlNodePtr) ret;
  342. } else {
  343. dtd->last->next = (xmlNodePtr) ret;
  344. ret->prev = dtd->last;
  345. dtd->last = (xmlNodePtr) ret;
  346. }
  347. return(ret);
  348. }
  349. /**
  350. * xmlAddDocEntity:
  351. * @doc: the document
  352. * @name: the entity name
  353. * @type: the entity type XML_xxx_yyy_ENTITY
  354. * @ExternalID: the entity external ID if available
  355. * @SystemID: the entity system ID if available
  356. * @content: the entity content
  357. *
  358. * Register a new entity for this document.
  359. *
  360. * Returns a pointer to the entity or NULL in case of error
  361. */
  362. xmlEntityPtr
  363. xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
  364. const xmlChar *ExternalID, const xmlChar *SystemID,
  365. const xmlChar *content) {
  366. xmlEntityPtr ret;
  367. xmlDtdPtr dtd;
  368. if (doc == NULL) {
  369. xmlEntitiesErr(XML_DTD_NO_DOC,
  370. "xmlAddDocEntity: document is NULL");
  371. return(NULL);
  372. }
  373. if (doc->intSubset == NULL) {
  374. xmlEntitiesErr(XML_DTD_NO_DTD,
  375. "xmlAddDocEntity: document without internal subset");
  376. return(NULL);
  377. }
  378. dtd = doc->intSubset;
  379. ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content);
  380. if (ret == NULL) return(NULL);
  381. /*
  382. * Link it to the DTD
  383. */
  384. ret->parent = dtd;
  385. ret->doc = dtd->doc;
  386. if (dtd->last == NULL) {
  387. dtd->children = dtd->last = (xmlNodePtr) ret;
  388. } else {
  389. dtd->last->next = (xmlNodePtr) ret;
  390. ret->prev = dtd->last;
  391. dtd->last = (xmlNodePtr) ret;
  392. }
  393. return(ret);
  394. }
  395. /**
  396. * xmlNewEntity:
  397. * @doc: the document
  398. * @name: the entity name
  399. * @type: the entity type XML_xxx_yyy_ENTITY
  400. * @ExternalID: the entity external ID if available
  401. * @SystemID: the entity system ID if available
  402. * @content: the entity content
  403. *
  404. * Create a new entity, this differs from xmlAddDocEntity() that if
  405. * the document is NULL or has no internal subset defined, then an
  406. * unlinked entity structure will be returned, it is then the responsibility
  407. * of the caller to link it to the document later or free it when not needed
  408. * anymore.
  409. *
  410. * Returns a pointer to the entity or NULL in case of error
  411. */
  412. xmlEntityPtr
  413. xmlNewEntity(xmlDocPtr doc, const xmlChar *name, int type,
  414. const xmlChar *ExternalID, const xmlChar *SystemID,
  415. const xmlChar *content) {
  416. xmlEntityPtr ret;
  417. xmlDictPtr dict;
  418. if ((doc != NULL) && (doc->intSubset != NULL)) {
  419. return(xmlAddDocEntity(doc, name, type, ExternalID, SystemID, content));
  420. }
  421. if (doc != NULL)
  422. dict = doc->dict;
  423. else
  424. dict = NULL;
  425. ret = xmlCreateEntity(dict, name, type, ExternalID, SystemID, content);
  426. if (ret == NULL)
  427. return(NULL);
  428. ret->doc = doc;
  429. return(ret);
  430. }
  431. /**
  432. * xmlGetEntityFromTable:
  433. * @table: an entity table
  434. * @name: the entity name
  435. * @parameter: look for parameter entities
  436. *
  437. * Do an entity lookup in the table.
  438. * returns the corresponding parameter entity, if found.
  439. *
  440. * Returns A pointer to the entity structure or NULL if not found.
  441. */
  442. static xmlEntityPtr
  443. xmlGetEntityFromTable(xmlEntitiesTablePtr table, const xmlChar *name) {
  444. return((xmlEntityPtr) xmlHashLookup(table, name));
  445. }
  446. /**
  447. * xmlGetParameterEntity:
  448. * @doc: the document referencing the entity
  449. * @name: the entity name
  450. *
  451. * Do an entity lookup in the internal and external subsets and
  452. * returns the corresponding parameter entity, if found.
  453. *
  454. * Returns A pointer to the entity structure or NULL if not found.
  455. */
  456. xmlEntityPtr
  457. xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
  458. xmlEntitiesTablePtr table;
  459. xmlEntityPtr ret;
  460. if (doc == NULL)
  461. return(NULL);
  462. if ((doc->intSubset != NULL) && (doc->intSubset->pentities != NULL)) {
  463. table = (xmlEntitiesTablePtr) doc->intSubset->pentities;
  464. ret = xmlGetEntityFromTable(table, name);
  465. if (ret != NULL)
  466. return(ret);
  467. }
  468. if ((doc->extSubset != NULL) && (doc->extSubset->pentities != NULL)) {
  469. table = (xmlEntitiesTablePtr) doc->extSubset->pentities;
  470. return(xmlGetEntityFromTable(table, name));
  471. }
  472. return(NULL);
  473. }
  474. /**
  475. * xmlGetDtdEntity:
  476. * @doc: the document referencing the entity
  477. * @name: the entity name
  478. *
  479. * Do an entity lookup in the DTD entity hash table and
  480. * returns the corresponding entity, if found.
  481. * Note: the first argument is the document node, not the DTD node.
  482. *
  483. * Returns A pointer to the entity structure or NULL if not found.
  484. */
  485. xmlEntityPtr
  486. xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
  487. xmlEntitiesTablePtr table;
  488. if (doc == NULL)
  489. return(NULL);
  490. if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
  491. table = (xmlEntitiesTablePtr) doc->extSubset->entities;
  492. return(xmlGetEntityFromTable(table, name));
  493. }
  494. return(NULL);
  495. }
  496. /**
  497. * xmlGetDocEntity:
  498. * @doc: the document referencing the entity
  499. * @name: the entity name
  500. *
  501. * Do an entity lookup in the document entity hash table and
  502. * returns the corresponding entity, otherwise a lookup is done
  503. * in the predefined entities too.
  504. *
  505. * Returns A pointer to the entity structure or NULL if not found.
  506. */
  507. xmlEntityPtr
  508. xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
  509. xmlEntityPtr cur;
  510. xmlEntitiesTablePtr table;
  511. if (doc != NULL) {
  512. if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
  513. table = (xmlEntitiesTablePtr) doc->intSubset->entities;
  514. cur = xmlGetEntityFromTable(table, name);
  515. if (cur != NULL)
  516. return(cur);
  517. }
  518. if (doc->standalone != 1) {
  519. if ((doc->extSubset != NULL) &&
  520. (doc->extSubset->entities != NULL)) {
  521. table = (xmlEntitiesTablePtr) doc->extSubset->entities;
  522. cur = xmlGetEntityFromTable(table, name);
  523. if (cur != NULL)
  524. return(cur);
  525. }
  526. }
  527. }
  528. return(xmlGetPredefinedEntity(name));
  529. }
  530. /*
  531. * Macro used to grow the current buffer.
  532. */
  533. #define growBufferReentrant() { \
  534. xmlChar *tmp; \
  535. size_t new_size = buffer_size * 2; \
  536. if (new_size < buffer_size) goto mem_error; \
  537. tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
  538. if (tmp == NULL) goto mem_error; \
  539. buffer = tmp; \
  540. buffer_size = new_size; \
  541. }
  542. /**
  543. * xmlEncodeEntitiesInternal:
  544. * @doc: the document containing the string
  545. * @input: A string to convert to XML.
  546. * @attr: are we handling an attribute value
  547. *
  548. * Do a global encoding of a string, replacing the predefined entities
  549. * and non ASCII values with their entities and CharRef counterparts.
  550. * Contrary to xmlEncodeEntities, this routine is reentrant, and result
  551. * must be deallocated.
  552. *
  553. * Returns A newly allocated string with the substitution done.
  554. */
  555. static xmlChar *
  556. xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
  557. const xmlChar *cur = input;
  558. xmlChar *buffer = NULL;
  559. xmlChar *out = NULL;
  560. size_t buffer_size = 0;
  561. int html = 0;
  562. if (input == NULL) return(NULL);
  563. if (doc != NULL)
  564. html = (doc->type == XML_HTML_DOCUMENT_NODE);
  565. /*
  566. * allocate an translation buffer.
  567. */
  568. buffer_size = 1000;
  569. buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
  570. if (buffer == NULL) {
  571. xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
  572. return(NULL);
  573. }
  574. out = buffer;
  575. while (*cur != '\0') {
  576. size_t indx = out - buffer;
  577. if (indx + 100 > buffer_size) {
  578. growBufferReentrant();
  579. out = &buffer[indx];
  580. }
  581. /*
  582. * By default one have to encode at least '<', '>', '"' and '&' !
  583. */
  584. if (*cur == '<') {
  585. const xmlChar *end;
  586. /*
  587. * Special handling of server side include in HTML attributes
  588. */
  589. if (html && attr &&
  590. (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
  591. ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
  592. while (cur != end) {
  593. *out++ = *cur++;
  594. indx = out - buffer;
  595. if (indx + 100 > buffer_size) {
  596. growBufferReentrant();
  597. out = &buffer[indx];
  598. }
  599. }
  600. *out++ = *cur++;
  601. *out++ = *cur++;
  602. *out++ = *cur++;
  603. continue;
  604. }
  605. *out++ = '&';
  606. *out++ = 'l';
  607. *out++ = 't';
  608. *out++ = ';';
  609. } else if (*cur == '>') {
  610. *out++ = '&';
  611. *out++ = 'g';
  612. *out++ = 't';
  613. *out++ = ';';
  614. } else if (*cur == '&') {
  615. /*
  616. * Special handling of &{...} construct from HTML 4, see
  617. * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
  618. */
  619. if (html && attr && (cur[1] == '{') &&
  620. (strchr((const char *) cur, '}'))) {
  621. while (*cur != '}') {
  622. *out++ = *cur++;
  623. indx = out - buffer;
  624. if (indx + 100 > buffer_size) {
  625. growBufferReentrant();
  626. out = &buffer[indx];
  627. }
  628. }
  629. *out++ = *cur++;
  630. continue;
  631. }
  632. *out++ = '&';
  633. *out++ = 'a';
  634. *out++ = 'm';
  635. *out++ = 'p';
  636. *out++ = ';';
  637. } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
  638. (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
  639. /*
  640. * default case, just copy !
  641. */
  642. *out++ = *cur;
  643. } else if (*cur >= 0x80) {
  644. if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
  645. /*
  646. * Bjørn Reese <br@sseusa.com> provided the patch
  647. xmlChar xc;
  648. xc = (*cur & 0x3F) << 6;
  649. if (cur[1] != 0) {
  650. xc += *(++cur) & 0x3F;
  651. *out++ = xc;
  652. } else
  653. */
  654. *out++ = *cur;
  655. } else {
  656. /*
  657. * We assume we have UTF-8 input.
  658. * It must match either:
  659. * 110xxxxx 10xxxxxx
  660. * 1110xxxx 10xxxxxx 10xxxxxx
  661. * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  662. * That is:
  663. * cur[0] is 11xxxxxx
  664. * cur[1] is 10xxxxxx
  665. * cur[2] is 10xxxxxx if cur[0] is 111xxxxx
  666. * cur[3] is 10xxxxxx if cur[0] is 1111xxxx
  667. * cur[0] is not 11111xxx
  668. */
  669. char buf[11], *ptr;
  670. int val = 0, l = 1;
  671. if (((cur[0] & 0xC0) != 0xC0) ||
  672. ((cur[1] & 0xC0) != 0x80) ||
  673. (((cur[0] & 0xE0) == 0xE0) && ((cur[2] & 0xC0) != 0x80)) ||
  674. (((cur[0] & 0xF0) == 0xF0) && ((cur[3] & 0xC0) != 0x80)) ||
  675. (((cur[0] & 0xF8) == 0xF8))) {
  676. xmlEntitiesErr(XML_CHECK_NOT_UTF8,
  677. "xmlEncodeEntities: input not UTF-8");
  678. if (doc != NULL)
  679. doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
  680. snprintf(buf, sizeof(buf), "&#%d;", *cur);
  681. buf[sizeof(buf) - 1] = 0;
  682. ptr = buf;
  683. while (*ptr != 0) *out++ = *ptr++;
  684. cur++;
  685. continue;
  686. } else if (*cur < 0xE0) {
  687. val = (cur[0]) & 0x1F;
  688. val <<= 6;
  689. val |= (cur[1]) & 0x3F;
  690. l = 2;
  691. } else if (*cur < 0xF0) {
  692. val = (cur[0]) & 0x0F;
  693. val <<= 6;
  694. val |= (cur[1]) & 0x3F;
  695. val <<= 6;
  696. val |= (cur[2]) & 0x3F;
  697. l = 3;
  698. } else if (*cur < 0xF8) {
  699. val = (cur[0]) & 0x07;
  700. val <<= 6;
  701. val |= (cur[1]) & 0x3F;
  702. val <<= 6;
  703. val |= (cur[2]) & 0x3F;
  704. val <<= 6;
  705. val |= (cur[3]) & 0x3F;
  706. l = 4;
  707. }
  708. if ((l == 1) || (!IS_CHAR(val))) {
  709. xmlEntitiesErr(XML_ERR_INVALID_CHAR,
  710. "xmlEncodeEntities: char out of range\n");
  711. if (doc != NULL)
  712. doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
  713. snprintf(buf, sizeof(buf), "&#%d;", *cur);
  714. buf[sizeof(buf) - 1] = 0;
  715. ptr = buf;
  716. while (*ptr != 0) *out++ = *ptr++;
  717. cur++;
  718. continue;
  719. }
  720. /*
  721. * We could do multiple things here. Just save as a char ref
  722. */
  723. snprintf(buf, sizeof(buf), "&#x%X;", val);
  724. buf[sizeof(buf) - 1] = 0;
  725. ptr = buf;
  726. while (*ptr != 0) *out++ = *ptr++;
  727. cur += l;
  728. continue;
  729. }
  730. } else if (IS_BYTE_CHAR(*cur)) {
  731. char buf[11], *ptr;
  732. snprintf(buf, sizeof(buf), "&#%d;", *cur);
  733. buf[sizeof(buf) - 1] = 0;
  734. ptr = buf;
  735. while (*ptr != 0) *out++ = *ptr++;
  736. }
  737. cur++;
  738. }
  739. *out = 0;
  740. return(buffer);
  741. mem_error:
  742. xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
  743. xmlFree(buffer);
  744. return(NULL);
  745. }
  746. /**
  747. * xmlEncodeAttributeEntities:
  748. * @doc: the document containing the string
  749. * @input: A string to convert to XML.
  750. *
  751. * Do a global encoding of a string, replacing the predefined entities
  752. * and non ASCII values with their entities and CharRef counterparts for
  753. * attribute values.
  754. *
  755. * Returns A newly allocated string with the substitution done.
  756. */
  757. xmlChar *
  758. xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input) {
  759. return xmlEncodeEntitiesInternal(doc, input, 1);
  760. }
  761. /**
  762. * xmlEncodeEntitiesReentrant:
  763. * @doc: the document containing the string
  764. * @input: A string to convert to XML.
  765. *
  766. * Do a global encoding of a string, replacing the predefined entities
  767. * and non ASCII values with their entities and CharRef counterparts.
  768. * Contrary to xmlEncodeEntities, this routine is reentrant, and result
  769. * must be deallocated.
  770. *
  771. * Returns A newly allocated string with the substitution done.
  772. */
  773. xmlChar *
  774. xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
  775. return xmlEncodeEntitiesInternal(doc, input, 0);
  776. }
  777. /**
  778. * xmlEncodeSpecialChars:
  779. * @doc: the document containing the string
  780. * @input: A string to convert to XML.
  781. *
  782. * Do a global encoding of a string, replacing the predefined entities
  783. * this routine is reentrant, and result must be deallocated.
  784. *
  785. * Returns A newly allocated string with the substitution done.
  786. */
  787. xmlChar *
  788. xmlEncodeSpecialChars(const xmlDoc *doc ATTRIBUTE_UNUSED, const xmlChar *input) {
  789. const xmlChar *cur = input;
  790. xmlChar *buffer = NULL;
  791. xmlChar *out = NULL;
  792. size_t buffer_size = 0;
  793. if (input == NULL) return(NULL);
  794. /*
  795. * allocate an translation buffer.
  796. */
  797. buffer_size = 1000;
  798. buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
  799. if (buffer == NULL) {
  800. xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed");
  801. return(NULL);
  802. }
  803. out = buffer;
  804. while (*cur != '\0') {
  805. size_t indx = out - buffer;
  806. if (indx + 10 > buffer_size) {
  807. growBufferReentrant();
  808. out = &buffer[indx];
  809. }
  810. /*
  811. * By default one have to encode at least '<', '>', '"' and '&' !
  812. */
  813. if (*cur == '<') {
  814. *out++ = '&';
  815. *out++ = 'l';
  816. *out++ = 't';
  817. *out++ = ';';
  818. } else if (*cur == '>') {
  819. *out++ = '&';
  820. *out++ = 'g';
  821. *out++ = 't';
  822. *out++ = ';';
  823. } else if (*cur == '&') {
  824. *out++ = '&';
  825. *out++ = 'a';
  826. *out++ = 'm';
  827. *out++ = 'p';
  828. *out++ = ';';
  829. } else if (*cur == '"') {
  830. *out++ = '&';
  831. *out++ = 'q';
  832. *out++ = 'u';
  833. *out++ = 'o';
  834. *out++ = 't';
  835. *out++ = ';';
  836. } else if (*cur == '\r') {
  837. *out++ = '&';
  838. *out++ = '#';
  839. *out++ = '1';
  840. *out++ = '3';
  841. *out++ = ';';
  842. } else {
  843. /*
  844. * Works because on UTF-8, all extended sequences cannot
  845. * result in bytes in the ASCII range.
  846. */
  847. *out++ = *cur;
  848. }
  849. cur++;
  850. }
  851. *out = 0;
  852. return(buffer);
  853. mem_error:
  854. xmlEntitiesErrMemory("xmlEncodeSpecialChars: realloc failed");
  855. xmlFree(buffer);
  856. return(NULL);
  857. }
  858. /**
  859. * xmlCreateEntitiesTable:
  860. *
  861. * create and initialize an empty entities hash table.
  862. * This really doesn't make sense and should be deprecated
  863. *
  864. * Returns the xmlEntitiesTablePtr just created or NULL in case of error.
  865. */
  866. xmlEntitiesTablePtr
  867. xmlCreateEntitiesTable(void) {
  868. return((xmlEntitiesTablePtr) xmlHashCreate(0));
  869. }
  870. /**
  871. * xmlFreeEntityWrapper:
  872. * @entity: An entity
  873. * @name: its name
  874. *
  875. * Deallocate the memory used by an entities in the hash table.
  876. */
  877. static void
  878. xmlFreeEntityWrapper(void *entity, const xmlChar *name ATTRIBUTE_UNUSED) {
  879. if (entity != NULL)
  880. xmlFreeEntity((xmlEntityPtr) entity);
  881. }
  882. /**
  883. * xmlFreeEntitiesTable:
  884. * @table: An entity table
  885. *
  886. * Deallocate the memory used by an entities hash table.
  887. */
  888. void
  889. xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
  890. xmlHashFree(table, xmlFreeEntityWrapper);
  891. }
  892. #ifdef LIBXML_TREE_ENABLED
  893. /**
  894. * xmlCopyEntity:
  895. * @ent: An entity
  896. *
  897. * Build a copy of an entity
  898. *
  899. * Returns the new xmlEntitiesPtr or NULL in case of error.
  900. */
  901. static void *
  902. xmlCopyEntity(void *payload, const xmlChar *name ATTRIBUTE_UNUSED) {
  903. xmlEntityPtr ent = (xmlEntityPtr) payload;
  904. xmlEntityPtr cur;
  905. cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
  906. if (cur == NULL) {
  907. xmlEntitiesErrMemory("xmlCopyEntity:: malloc failed");
  908. return(NULL);
  909. }
  910. memset(cur, 0, sizeof(xmlEntity));
  911. cur->type = XML_ENTITY_DECL;
  912. cur->etype = ent->etype;
  913. if (ent->name != NULL)
  914. cur->name = xmlStrdup(ent->name);
  915. if (ent->ExternalID != NULL)
  916. cur->ExternalID = xmlStrdup(ent->ExternalID);
  917. if (ent->SystemID != NULL)
  918. cur->SystemID = xmlStrdup(ent->SystemID);
  919. if (ent->content != NULL)
  920. cur->content = xmlStrdup(ent->content);
  921. if (ent->orig != NULL)
  922. cur->orig = xmlStrdup(ent->orig);
  923. if (ent->URI != NULL)
  924. cur->URI = xmlStrdup(ent->URI);
  925. return(cur);
  926. }
  927. /**
  928. * xmlCopyEntitiesTable:
  929. * @table: An entity table
  930. *
  931. * Build a copy of an entity table.
  932. *
  933. * Returns the new xmlEntitiesTablePtr or NULL in case of error.
  934. */
  935. xmlEntitiesTablePtr
  936. xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
  937. return(xmlHashCopy(table, xmlCopyEntity));
  938. }
  939. #endif /* LIBXML_TREE_ENABLED */
  940. #ifdef LIBXML_OUTPUT_ENABLED
  941. /**
  942. * xmlDumpEntityContent:
  943. * @buf: An XML buffer.
  944. * @content: The entity content.
  945. *
  946. * This will dump the quoted string value, taking care of the special
  947. * treatment required by %
  948. */
  949. static void
  950. xmlDumpEntityContent(xmlBufferPtr buf, const xmlChar *content) {
  951. if (buf->alloc == XML_BUFFER_ALLOC_IMMUTABLE) return;
  952. if (xmlStrchr(content, '%')) {
  953. const xmlChar * base, *cur;
  954. xmlBufferCCat(buf, "\"");
  955. base = cur = content;
  956. while (*cur != 0) {
  957. if (*cur == '"') {
  958. if (base != cur)
  959. xmlBufferAdd(buf, base, cur - base);
  960. xmlBufferAdd(buf, BAD_CAST "&quot;", 6);
  961. cur++;
  962. base = cur;
  963. } else if (*cur == '%') {
  964. if (base != cur)
  965. xmlBufferAdd(buf, base, cur - base);
  966. xmlBufferAdd(buf, BAD_CAST "&#x25;", 6);
  967. cur++;
  968. base = cur;
  969. } else {
  970. cur++;
  971. }
  972. }
  973. if (base != cur)
  974. xmlBufferAdd(buf, base, cur - base);
  975. xmlBufferCCat(buf, "\"");
  976. } else {
  977. xmlBufferWriteQuotedString(buf, content);
  978. }
  979. }
  980. /**
  981. * xmlDumpEntityDecl:
  982. * @buf: An XML buffer.
  983. * @ent: An entity table
  984. *
  985. * This will dump the content of the entity table as an XML DTD definition
  986. */
  987. void
  988. xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
  989. if ((buf == NULL) || (ent == NULL)) return;
  990. switch (ent->etype) {
  991. case XML_INTERNAL_GENERAL_ENTITY:
  992. xmlBufferWriteChar(buf, "<!ENTITY ");
  993. xmlBufferWriteCHAR(buf, ent->name);
  994. xmlBufferWriteChar(buf, " ");
  995. if (ent->orig != NULL)
  996. xmlBufferWriteQuotedString(buf, ent->orig);
  997. else
  998. xmlDumpEntityContent(buf, ent->content);
  999. xmlBufferWriteChar(buf, ">\n");
  1000. break;
  1001. case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
  1002. xmlBufferWriteChar(buf, "<!ENTITY ");
  1003. xmlBufferWriteCHAR(buf, ent->name);
  1004. if (ent->ExternalID != NULL) {
  1005. xmlBufferWriteChar(buf, " PUBLIC ");
  1006. xmlBufferWriteQuotedString(buf, ent->ExternalID);
  1007. xmlBufferWriteChar(buf, " ");
  1008. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1009. } else {
  1010. xmlBufferWriteChar(buf, " SYSTEM ");
  1011. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1012. }
  1013. xmlBufferWriteChar(buf, ">\n");
  1014. break;
  1015. case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
  1016. xmlBufferWriteChar(buf, "<!ENTITY ");
  1017. xmlBufferWriteCHAR(buf, ent->name);
  1018. if (ent->ExternalID != NULL) {
  1019. xmlBufferWriteChar(buf, " PUBLIC ");
  1020. xmlBufferWriteQuotedString(buf, ent->ExternalID);
  1021. xmlBufferWriteChar(buf, " ");
  1022. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1023. } else {
  1024. xmlBufferWriteChar(buf, " SYSTEM ");
  1025. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1026. }
  1027. if (ent->content != NULL) { /* Should be true ! */
  1028. xmlBufferWriteChar(buf, " NDATA ");
  1029. if (ent->orig != NULL)
  1030. xmlBufferWriteCHAR(buf, ent->orig);
  1031. else
  1032. xmlBufferWriteCHAR(buf, ent->content);
  1033. }
  1034. xmlBufferWriteChar(buf, ">\n");
  1035. break;
  1036. case XML_INTERNAL_PARAMETER_ENTITY:
  1037. xmlBufferWriteChar(buf, "<!ENTITY % ");
  1038. xmlBufferWriteCHAR(buf, ent->name);
  1039. xmlBufferWriteChar(buf, " ");
  1040. if (ent->orig == NULL)
  1041. xmlDumpEntityContent(buf, ent->content);
  1042. else
  1043. xmlBufferWriteQuotedString(buf, ent->orig);
  1044. xmlBufferWriteChar(buf, ">\n");
  1045. break;
  1046. case XML_EXTERNAL_PARAMETER_ENTITY:
  1047. xmlBufferWriteChar(buf, "<!ENTITY % ");
  1048. xmlBufferWriteCHAR(buf, ent->name);
  1049. if (ent->ExternalID != NULL) {
  1050. xmlBufferWriteChar(buf, " PUBLIC ");
  1051. xmlBufferWriteQuotedString(buf, ent->ExternalID);
  1052. xmlBufferWriteChar(buf, " ");
  1053. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1054. } else {
  1055. xmlBufferWriteChar(buf, " SYSTEM ");
  1056. xmlBufferWriteQuotedString(buf, ent->SystemID);
  1057. }
  1058. xmlBufferWriteChar(buf, ">\n");
  1059. break;
  1060. default:
  1061. xmlEntitiesErr(XML_DTD_UNKNOWN_ENTITY,
  1062. "xmlDumpEntitiesDecl: internal: unknown type entity type");
  1063. }
  1064. }
  1065. /**
  1066. * xmlDumpEntityDeclScan:
  1067. * @ent: An entity table
  1068. * @buf: An XML buffer.
  1069. *
  1070. * When using the hash table scan function, arguments need to be reversed
  1071. */
  1072. static void
  1073. xmlDumpEntityDeclScan(void *ent, void *buf,
  1074. const xmlChar *name ATTRIBUTE_UNUSED) {
  1075. xmlDumpEntityDecl((xmlBufferPtr) buf, (xmlEntityPtr) ent);
  1076. }
  1077. /**
  1078. * xmlDumpEntitiesTable:
  1079. * @buf: An XML buffer.
  1080. * @table: An entity table
  1081. *
  1082. * This will dump the content of the entity table as an XML DTD definition
  1083. */
  1084. void
  1085. xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
  1086. xmlHashScan(table, xmlDumpEntityDeclScan, buf);
  1087. }
  1088. #endif /* LIBXML_OUTPUT_ENABLED */
  1089. #define bottom_entities
  1090. #include "elfgcchack.h"