pixl-xml.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. // SPDX-License-Identifier: MIT
  2. /*
  3. JavaScript XML Library
  4. Plus a bunch of object utility functions
  5. Usage:
  6. var XML = require('pixl-xml');
  7. var myxmlstring = '<?xml version="1.0"?><Document>' +
  8. '<Simple>Hello</Simple>' +
  9. '<Node Key="Value">Content</Node>' +
  10. '</Document>';
  11. var tree = XML.parse( myxmlstring, { preserveAttributes: true });
  12. console.log( tree );
  13. tree.Simple = "Hello2";
  14. tree.Node._Attribs.Key = "Value2";
  15. tree.Node._Data = "Content2";
  16. tree.New = "I added this";
  17. console.log( XML.stringify( tree, 'Document' ) );
  18. Copyright (c) 2004 - 2015 Joseph Huckaby
  19. Released under the MIT License
  20. This version is for Node.JS, converted in 2012.
  21. */
  22. var fs = require('fs');
  23. var indent_string = "\t";
  24. var xml_header = '<?xml version="1.0"?>';
  25. var sort_args = null;
  26. var re_valid_tag_name = /^\w[\w\-\:]*$/;
  27. var XML = exports.XML = function XML(args) {
  28. // class constructor for XML parser class
  29. // pass in args hash or text to parse
  30. if (!args) args = '';
  31. if (isa_hash(args)) {
  32. for (var key in args) this[key] = args[key];
  33. }
  34. else this.text = args || '';
  35. // stringify buffers
  36. if (this.text instanceof Buffer) {
  37. this.text = this.text.toString();
  38. }
  39. if (!this.text.match(/^\s*</)) {
  40. // try as file path
  41. var file = this.text;
  42. this.text = fs.readFileSync(file, { encoding: 'utf8' });
  43. if (!this.text) throw new Error("File not found: " + file);
  44. }
  45. this.tree = {};
  46. this.errors = [];
  47. this.piNodeList = [];
  48. this.dtdNodeList = [];
  49. this.documentNodeName = '';
  50. if (this.lowerCase) {
  51. this.attribsKey = this.attribsKey.toLowerCase();
  52. this.dataKey = this.dataKey.toLowerCase();
  53. }
  54. this.patTag.lastIndex = 0;
  55. if (this.text) this.parse();
  56. }
  57. XML.prototype.preserveAttributes = false;
  58. XML.prototype.lowerCase = false;
  59. XML.prototype.patTag = /([^<]*?)<([^>]+)>/g;
  60. XML.prototype.patSpecialTag = /^\s*([\!\?])/;
  61. XML.prototype.patPITag = /^\s*\?/;
  62. XML.prototype.patCommentTag = /^\s*\!--/;
  63. XML.prototype.patDTDTag = /^\s*\!DOCTYPE/;
  64. XML.prototype.patCDATATag = /^\s*\!\s*\[\s*CDATA/;
  65. XML.prototype.patStandardTag = /^\s*(\/?)([\w\-\:\.]+)\s*(.*)$/;
  66. XML.prototype.patSelfClosing = /\/\s*$/;
  67. XML.prototype.patAttrib = new RegExp("([\\w\\-\\:\\.]+)\\s*=\\s*([\\\"\\'])([^\\2]*?)\\2", "g");
  68. XML.prototype.patPINode = /^\s*\?\s*([\w\-\:]+)\s*(.*)$/;
  69. XML.prototype.patEndComment = /--$/;
  70. XML.prototype.patNextClose = /([^>]*?)>/g;
  71. XML.prototype.patExternalDTDNode = new RegExp("^\\s*\\!DOCTYPE\\s+([\\w\\-\\:]+)\\s+(SYSTEM|PUBLIC)\\s+\\\"([^\\\"]+)\\\"");
  72. XML.prototype.patInlineDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[/;
  73. XML.prototype.patEndDTD = /\]$/;
  74. XML.prototype.patDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[(.*)\]/;
  75. XML.prototype.patEndCDATA = /\]\]$/;
  76. XML.prototype.patCDATANode = /^\s*\!\s*\[\s*CDATA\s*\[([^]*)\]\]/;
  77. XML.prototype.attribsKey = '_Attribs';
  78. XML.prototype.dataKey = '_Data';
  79. XML.prototype.parse = function(branch, name) {
  80. // parse text into XML tree, recurse for nested nodes
  81. if (!branch) branch = this.tree;
  82. if (!name) name = null;
  83. var foundClosing = false;
  84. var matches = null;
  85. // match each tag, plus preceding text
  86. while ( matches = this.patTag.exec(this.text) ) {
  87. var before = matches[1];
  88. var tag = matches[2];
  89. // text leading up to tag = content of parent node
  90. if (before.match(/\S/)) {
  91. if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
  92. branch[this.dataKey] += trim(decode_entities(before));
  93. }
  94. // parse based on tag type
  95. if (tag.match(this.patSpecialTag)) {
  96. // special tag
  97. if (tag.match(this.patPITag)) tag = this.parsePINode(tag);
  98. else if (tag.match(this.patCommentTag)) tag = this.parseCommentNode(tag);
  99. else if (tag.match(this.patDTDTag)) tag = this.parseDTDNode(tag);
  100. else if (tag.match(this.patCDATATag)) {
  101. tag = this.parseCDATANode(tag);
  102. if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
  103. branch[this.dataKey] += trim(decode_entities(tag));
  104. } // cdata
  105. else {
  106. this.throwParseError( "Malformed special tag", tag );
  107. break;
  108. } // error
  109. if (tag == null) break;
  110. continue;
  111. } // special tag
  112. else {
  113. // Tag is standard, so parse name and attributes (if any)
  114. var matches = tag.match(this.patStandardTag);
  115. if (!matches) {
  116. this.throwParseError( "Malformed tag", tag );
  117. break;
  118. }
  119. var closing = matches[1];
  120. var nodeName = this.lowerCase ? matches[2].toLowerCase() : matches[2];
  121. var attribsRaw = matches[3];
  122. // If this is a closing tag, make sure it matches its opening tag
  123. if (closing) {
  124. if (nodeName == (name || '')) {
  125. foundClosing = 1;
  126. break;
  127. }
  128. else {
  129. this.throwParseError( "Mismatched closing tag (expected </" + name + ">)", tag );
  130. break;
  131. }
  132. } // closing tag
  133. else {
  134. // Not a closing tag, so parse attributes into hash. If tag
  135. // is self-closing, no recursive parsing is needed.
  136. var selfClosing = !!attribsRaw.match(this.patSelfClosing);
  137. var leaf = {};
  138. var attribs = leaf;
  139. // preserve attributes means they go into a sub-hash named "_Attribs"
  140. // the XML composer honors this for restoring the tree back into XML
  141. if (this.preserveAttributes) {
  142. leaf[this.attribsKey] = {};
  143. attribs = leaf[this.attribsKey];
  144. }
  145. // parse attributes
  146. this.patAttrib.lastIndex = 0;
  147. while ( matches = this.patAttrib.exec(attribsRaw) ) {
  148. var key = this.lowerCase ? matches[1].toLowerCase() : matches[1];
  149. attribs[ key ] = decode_entities( matches[3] );
  150. } // foreach attrib
  151. // if no attribs found, but we created the _Attribs subhash, clean it up now
  152. if (this.preserveAttributes && !num_keys(attribs)) {
  153. delete leaf[this.attribsKey];
  154. }
  155. // Recurse for nested nodes
  156. if (!selfClosing) {
  157. this.parse( leaf, nodeName );
  158. if (this.error()) break;
  159. }
  160. // Compress into simple node if text only
  161. var num_leaf_keys = num_keys(leaf);
  162. if ((typeof(leaf[this.dataKey]) != 'undefined') && (num_leaf_keys == 1)) {
  163. leaf = leaf[this.dataKey];
  164. }
  165. else if (!num_leaf_keys) {
  166. leaf = '';
  167. }
  168. // Add leaf to parent branch
  169. if (typeof(branch[nodeName]) != 'undefined') {
  170. if (isa_array(branch[nodeName])) {
  171. branch[nodeName].push( leaf );
  172. }
  173. else {
  174. var temp = branch[nodeName];
  175. branch[nodeName] = [ temp, leaf ];
  176. }
  177. }
  178. else {
  179. branch[nodeName] = leaf;
  180. }
  181. if (this.error() || (branch == this.tree)) break;
  182. } // not closing
  183. } // standard tag
  184. } // main reg exp
  185. // Make sure we found the closing tag
  186. if (name && !foundClosing) {
  187. this.throwParseError( "Missing closing tag (expected </" + name + ">)", name );
  188. }
  189. // If we are the master node, finish parsing and setup our doc node
  190. if (branch == this.tree) {
  191. if (typeof(this.tree[this.dataKey]) != 'undefined') delete this.tree[this.dataKey];
  192. if (num_keys(this.tree) > 1) {
  193. this.throwParseError( 'Only one top-level node is allowed in document', first_key(this.tree) );
  194. return;
  195. }
  196. this.documentNodeName = first_key(this.tree);
  197. if (this.documentNodeName) {
  198. this.tree = this.tree[this.documentNodeName];
  199. }
  200. }
  201. };
  202. XML.prototype.throwParseError = function(key, tag) {
  203. // log error and locate current line number in source XML document
  204. var parsedSource = this.text.substring(0, this.patTag.lastIndex);
  205. var eolMatch = parsedSource.match(/\n/g);
  206. var lineNum = (eolMatch ? eolMatch.length : 0) + 1;
  207. lineNum -= tag.match(/\n/) ? tag.match(/\n/g).length : 0;
  208. this.errors.push({
  209. type: 'Parse',
  210. key: key,
  211. text: '<' + tag + '>',
  212. line: lineNum
  213. });
  214. // Throw actual error (must wrap parse in try/catch)
  215. throw new Error( this.getLastError() );
  216. };
  217. XML.prototype.error = function() {
  218. // return number of errors
  219. return this.errors.length;
  220. };
  221. XML.prototype.getError = function(error) {
  222. // get formatted error
  223. var text = '';
  224. if (!error) return '';
  225. text = (error.type || 'General') + ' Error';
  226. if (error.code) text += ' ' + error.code;
  227. text += ': ' + error.key;
  228. if (error.line) text += ' on line ' + error.line;
  229. if (error.text) text += ': ' + error.text;
  230. return text;
  231. };
  232. XML.prototype.getLastError = function() {
  233. // Get most recently thrown error in plain text format
  234. if (!this.error()) return '';
  235. return this.getError( this.errors[this.errors.length - 1] );
  236. };
  237. XML.prototype.parsePINode = function(tag) {
  238. // Parse Processor Instruction Node, e.g. <?xml version="1.0"?>
  239. if (!tag.match(this.patPINode)) {
  240. this.throwParseError( "Malformed processor instruction", tag );
  241. return null;
  242. }
  243. this.piNodeList.push( tag );
  244. return tag;
  245. };
  246. XML.prototype.parseCommentNode = function(tag) {
  247. // Parse Comment Node, e.g. <!-- hello -->
  248. var matches = null;
  249. this.patNextClose.lastIndex = this.patTag.lastIndex;
  250. while (!tag.match(this.patEndComment)) {
  251. if (matches = this.patNextClose.exec(this.text)) {
  252. tag += '>' + matches[1];
  253. }
  254. else {
  255. this.throwParseError( "Unclosed comment tag", tag );
  256. return null;
  257. }
  258. }
  259. this.patTag.lastIndex = this.patNextClose.lastIndex;
  260. return tag;
  261. };
  262. XML.prototype.parseDTDNode = function(tag) {
  263. // Parse Document Type Descriptor Node, e.g. <!DOCTYPE ... >
  264. var matches = null;
  265. if (tag.match(this.patExternalDTDNode)) {
  266. // tag is external, and thus self-closing
  267. this.dtdNodeList.push( tag );
  268. }
  269. else if (tag.match(this.patInlineDTDNode)) {
  270. // Tag is inline, so check for nested nodes.
  271. this.patNextClose.lastIndex = this.patTag.lastIndex;
  272. while (!tag.match(this.patEndDTD)) {
  273. if (matches = this.patNextClose.exec(this.text)) {
  274. tag += '>' + matches[1];
  275. }
  276. else {
  277. this.throwParseError( "Unclosed DTD tag", tag );
  278. return null;
  279. }
  280. }
  281. this.patTag.lastIndex = this.patNextClose.lastIndex;
  282. // Make sure complete tag is well-formed, and push onto DTD stack.
  283. if (tag.match(this.patDTDNode)) {
  284. this.dtdNodeList.push( tag );
  285. }
  286. else {
  287. this.throwParseError( "Malformed DTD tag", tag );
  288. return null;
  289. }
  290. }
  291. else {
  292. this.throwParseError( "Malformed DTD tag", tag );
  293. return null;
  294. }
  295. return tag;
  296. };
  297. XML.prototype.parseCDATANode = function(tag) {
  298. // Parse CDATA Node, e.g. <![CDATA[Brooks & Shields]]>
  299. var matches = null;
  300. this.patNextClose.lastIndex = this.patTag.lastIndex;
  301. while (!tag.match(this.patEndCDATA)) {
  302. if (matches = this.patNextClose.exec(this.text)) {
  303. tag += '>' + matches[1];
  304. }
  305. else {
  306. this.throwParseError( "Unclosed CDATA tag", tag );
  307. return null;
  308. }
  309. }
  310. this.patTag.lastIndex = this.patNextClose.lastIndex;
  311. if (matches = tag.match(this.patCDATANode)) {
  312. return matches[1];
  313. }
  314. else {
  315. this.throwParseError( "Malformed CDATA tag", tag );
  316. return null;
  317. }
  318. };
  319. XML.prototype.getTree = function() {
  320. // get reference to parsed XML tree
  321. return this.tree;
  322. };
  323. XML.prototype.compose = function() {
  324. // compose tree back into XML
  325. var raw = compose_xml( this.tree, this.documentNodeName );
  326. var body = raw.substring( raw.indexOf("\n") + 1, raw.length );
  327. var xml = '';
  328. if (this.piNodeList.length) {
  329. for (var idx = 0, len = this.piNodeList.length; idx < len; idx++) {
  330. xml += '<' + this.piNodeList[idx] + '>' + "\n";
  331. }
  332. }
  333. else {
  334. xml += xml_header + "\n";
  335. }
  336. if (this.dtdNodeList.length) {
  337. for (var idx = 0, len = this.dtdNodeList.length; idx < len; idx++) {
  338. xml += '<' + this.dtdNodeList[idx] + '>' + "\n";
  339. }
  340. }
  341. xml += body;
  342. return xml;
  343. };
  344. //
  345. // Static Utility Functions:
  346. //
  347. var parse_xml = exports.parse = function parse_xml(text, opts) {
  348. // turn text into XML tree quickly
  349. if (!opts) opts = {};
  350. opts.text = text;
  351. var parser = new XML(opts);
  352. return parser.error() ? parser.getLastError() : parser.getTree();
  353. };
  354. var trim = exports.trim = function trim(text) {
  355. // strip whitespace from beginning and end of string
  356. if (text == null) return '';
  357. if (text && text.replace) {
  358. text = text.replace(/^\s+/, "");
  359. text = text.replace(/\s+$/, "");
  360. }
  361. return text;
  362. };
  363. var encode_entities = exports.encodeEntities = function encode_entities(text) {
  364. // Simple entitize exports.for = function for composing XML
  365. if (text == null) return '';
  366. if (text && text.replace) {
  367. text = text.replace(/\&/g, "&amp;"); // MUST BE FIRST
  368. text = text.replace(/</g, "&lt;");
  369. text = text.replace(/>/g, "&gt;");
  370. }
  371. return text;
  372. };
  373. var encode_attrib_entities = exports.encodeAttribEntities = function encode_attrib_entities(text) {
  374. // Simple entitize exports.for = function for composing XML attributes
  375. if (text == null) return '';
  376. if (text && text.replace) {
  377. text = text.replace(/\&/g, "&amp;"); // MUST BE FIRST
  378. text = text.replace(/</g, "&lt;");
  379. text = text.replace(/>/g, "&gt;");
  380. text = text.replace(/\"/g, "&quot;");
  381. text = text.replace(/\'/g, "&apos;");
  382. }
  383. return text;
  384. };
  385. var decode_entities = exports.decodeEntities = function decode_entities(text) {
  386. // Decode XML entities into raw ASCII
  387. if (text == null) return '';
  388. if (text && text.replace && text.match(/\&/)) {
  389. text = text.replace(/\&lt\;/g, "<");
  390. text = text.replace(/\&gt\;/g, ">");
  391. text = text.replace(/\&quot\;/g, '"');
  392. text = text.replace(/\&apos\;/g, "'");
  393. text = text.replace(/\&amp\;/g, "&"); // MUST BE LAST
  394. }
  395. return text;
  396. };
  397. var compose_xml = exports.stringify = function compose_xml(node, name, indent) {
  398. // Compose node into XML including attributes
  399. // Recurse for child nodes
  400. var xml = "";
  401. // If this is the root node, set the indent to 0
  402. // and setup the XML header (PI node)
  403. if (!indent) {
  404. indent = 0;
  405. xml = xml_header + "\n";
  406. if (!name) {
  407. // no name provided, assume content is wrapped in it
  408. name = first_key(node);
  409. node = node[name];
  410. }
  411. }
  412. // Setup the indent text
  413. var indent_text = "";
  414. for (var k = 0; k < indent; k++) indent_text += indent_string;
  415. if ((typeof(node) == 'object') && (node != null)) {
  416. // node is object -- now see if it is an array or hash
  417. if (!node.length) { // what about zero-length array?
  418. // node is hash
  419. xml += indent_text + "<" + name;
  420. var num_keys = 0;
  421. var has_attribs = 0;
  422. for (var key in node) num_keys++; // there must be a better way...
  423. if (node["_Attribs"]) {
  424. has_attribs = 1;
  425. var sorted_keys = hash_keys_to_array(node["_Attribs"]).sort();
  426. for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
  427. var key = sorted_keys[idx];
  428. xml += " " + key + "=\"" + encode_attrib_entities(node["_Attribs"][key]) + "\"";
  429. }
  430. } // has attribs
  431. if (num_keys > has_attribs) {
  432. // has child elements
  433. xml += ">";
  434. if (node["_Data"]) {
  435. // simple text child node
  436. xml += encode_entities(node["_Data"]) + "</" + name + ">\n";
  437. } // just text
  438. else {
  439. xml += "\n";
  440. var sorted_keys = hash_keys_to_array(node).sort();
  441. for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
  442. var key = sorted_keys[idx];
  443. if ((key != "_Attribs") && key.match(re_valid_tag_name)) {
  444. // recurse for node, with incremented indent value
  445. xml += compose_xml( node[key], key, indent + 1 );
  446. } // not _Attribs key
  447. } // foreach key
  448. xml += indent_text + "</" + name + ">\n";
  449. } // real children
  450. }
  451. else {
  452. // no child elements, so self-close
  453. xml += "/>\n";
  454. }
  455. } // standard node
  456. else {
  457. // node is array
  458. for (var idx = 0; idx < node.length; idx++) {
  459. // recurse for node in array with same indent
  460. xml += compose_xml( node[idx], name, indent );
  461. }
  462. } // array of nodes
  463. } // complex node
  464. else {
  465. // node is simple string
  466. xml += indent_text + "<" + name + ">" + encode_entities(node) + "</" + name + ">\n";
  467. } // simple text node
  468. return xml;
  469. };
  470. var always_array = exports.alwaysArray = function always_array(obj, key) {
  471. // if object is not array, return array containing object
  472. // if key is passed, work like XMLalwaysarray() instead
  473. if (key) {
  474. if ((typeof(obj[key]) != 'object') || (typeof(obj[key].length) == 'undefined')) {
  475. var temp = obj[key];
  476. delete obj[key];
  477. obj[key] = new Array();
  478. obj[key][0] = temp;
  479. }
  480. return null;
  481. }
  482. else {
  483. if ((typeof(obj) != 'object') || (typeof(obj.length) == 'undefined')) { return [ obj ]; }
  484. else return obj;
  485. }
  486. };
  487. var hash_keys_to_array = exports.hashKeysToArray = function hash_keys_to_array(hash) {
  488. // convert hash keys to array (discard values)
  489. var array = [];
  490. for (var key in hash) array.push(key);
  491. return array;
  492. };
  493. var isa_hash = exports.isaHash = function isa_hash(arg) {
  494. // determine if arg is a hash
  495. return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) == 'undefined') );
  496. };
  497. var isa_array = exports.isaArray = function isa_array(arg) {
  498. // determine if arg is an array or is array-like
  499. if (typeof(arg) == 'array') return true;
  500. return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) != 'undefined') );
  501. };
  502. var first_key = exports.firstKey = function first_key(hash) {
  503. // return first key from hash (unordered)
  504. for (var key in hash) return key;
  505. return null; // no keys in hash
  506. };
  507. var num_keys = exports.numKeys = function num_keys(hash) {
  508. // count the number of keys in a hash
  509. var count = 0;
  510. for (var a in hash) count++;
  511. return count;
  512. };