pixl-xml.js 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. /*
  2. JavaScript XML Library
  3. Plus a bunch of object utility functions
  4. Usage:
  5. var XML = require('pixl-xml');
  6. var myxmlstring = '<?xml version="1.0"?><Document>' +
  7. '<Simple>Hello</Simple>' +
  8. '<Node Key="Value">Content</Node>' +
  9. '</Document>';
  10. var tree = XML.parse( myxmlstring, { preserveAttributes: true });
  11. console.log( tree );
  12. tree.Simple = "Hello2";
  13. tree.Node._Attribs.Key = "Value2";
  14. tree.Node._Data = "Content2";
  15. tree.New = "I added this";
  16. console.log( XML.stringify( tree, 'Document' ) );
  17. Copyright (c) 2004 - 2015 Joseph Huckaby
  18. Released under the MIT License
  19. This version is for Node.JS, converted in 2012.
  20. */
  21. var fs = require('fs');
  22. var indent_string = "\t";
  23. var xml_header = '<?xml version="1.0"?>';
  24. var sort_args = null;
  25. var re_valid_tag_name = /^\w[\w\-\:]*$/;
  26. var XML = exports.XML = function XML(args) {
  27. // class constructor for XML parser class
  28. // pass in args hash or text to parse
  29. if (!args) args = '';
  30. if (isa_hash(args)) {
  31. for (var key in args) this[key] = args[key];
  32. }
  33. else this.text = args || '';
  34. // stringify buffers
  35. if (this.text instanceof Buffer) {
  36. this.text = this.text.toString();
  37. }
  38. if (!this.text.match(/^\s*</)) {
  39. // try as file path
  40. var file = this.text;
  41. this.text = fs.readFileSync(file, { encoding: 'utf8' });
  42. if (!this.text) throw new Error("File not found: " + file);
  43. }
  44. this.tree = {};
  45. this.errors = [];
  46. this.piNodeList = [];
  47. this.dtdNodeList = [];
  48. this.documentNodeName = '';
  49. if (this.lowerCase) {
  50. this.attribsKey = this.attribsKey.toLowerCase();
  51. this.dataKey = this.dataKey.toLowerCase();
  52. }
  53. this.patTag.lastIndex = 0;
  54. if (this.text) this.parse();
  55. }
  56. XML.prototype.preserveAttributes = false;
  57. XML.prototype.lowerCase = false;
  58. XML.prototype.patTag = /([^<]*?)<([^>]+)>/g;
  59. XML.prototype.patSpecialTag = /^\s*([\!\?])/;
  60. XML.prototype.patPITag = /^\s*\?/;
  61. XML.prototype.patCommentTag = /^\s*\!--/;
  62. XML.prototype.patDTDTag = /^\s*\!DOCTYPE/;
  63. XML.prototype.patCDATATag = /^\s*\!\s*\[\s*CDATA/;
  64. XML.prototype.patStandardTag = /^\s*(\/?)([\w\-\:\.]+)\s*(.*)$/;
  65. XML.prototype.patSelfClosing = /\/\s*$/;
  66. XML.prototype.patAttrib = new RegExp("([\\w\\-\\:\\.]+)\\s*=\\s*([\\\"\\'])([^\\2]*?)\\2", "g");
  67. XML.prototype.patPINode = /^\s*\?\s*([\w\-\:]+)\s*(.*)$/;
  68. XML.prototype.patEndComment = /--$/;
  69. XML.prototype.patNextClose = /([^>]*?)>/g;
  70. XML.prototype.patExternalDTDNode = new RegExp("^\\s*\\!DOCTYPE\\s+([\\w\\-\\:]+)\\s+(SYSTEM|PUBLIC)\\s+\\\"([^\\\"]+)\\\"");
  71. XML.prototype.patInlineDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[/;
  72. XML.prototype.patEndDTD = /\]$/;
  73. XML.prototype.patDTDNode = /^\s*\!DOCTYPE\s+([\w\-\:]+)\s+\[(.*)\]/;
  74. XML.prototype.patEndCDATA = /\]\]$/;
  75. XML.prototype.patCDATANode = /^\s*\!\s*\[\s*CDATA\s*\[([^]*)\]\]/;
  76. XML.prototype.attribsKey = '_Attribs';
  77. XML.prototype.dataKey = '_Data';
  78. XML.prototype.parse = function(branch, name) {
  79. // parse text into XML tree, recurse for nested nodes
  80. if (!branch) branch = this.tree;
  81. if (!name) name = null;
  82. var foundClosing = false;
  83. var matches = null;
  84. // match each tag, plus preceding text
  85. while ( matches = this.patTag.exec(this.text) ) {
  86. var before = matches[1];
  87. var tag = matches[2];
  88. // text leading up to tag = content of parent node
  89. if (before.match(/\S/)) {
  90. if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
  91. branch[this.dataKey] += trim(decode_entities(before));
  92. }
  93. // parse based on tag type
  94. if (tag.match(this.patSpecialTag)) {
  95. // special tag
  96. if (tag.match(this.patPITag)) tag = this.parsePINode(tag);
  97. else if (tag.match(this.patCommentTag)) tag = this.parseCommentNode(tag);
  98. else if (tag.match(this.patDTDTag)) tag = this.parseDTDNode(tag);
  99. else if (tag.match(this.patCDATATag)) {
  100. tag = this.parseCDATANode(tag);
  101. if (typeof(branch[this.dataKey]) != 'undefined') branch[this.dataKey] += ' '; else branch[this.dataKey] = '';
  102. branch[this.dataKey] += trim(decode_entities(tag));
  103. } // cdata
  104. else {
  105. this.throwParseError( "Malformed special tag", tag );
  106. break;
  107. } // error
  108. if (tag == null) break;
  109. continue;
  110. } // special tag
  111. else {
  112. // Tag is standard, so parse name and attributes (if any)
  113. var matches = tag.match(this.patStandardTag);
  114. if (!matches) {
  115. this.throwParseError( "Malformed tag", tag );
  116. break;
  117. }
  118. var closing = matches[1];
  119. var nodeName = this.lowerCase ? matches[2].toLowerCase() : matches[2];
  120. var attribsRaw = matches[3];
  121. // If this is a closing tag, make sure it matches its opening tag
  122. if (closing) {
  123. if (nodeName == (name || '')) {
  124. foundClosing = 1;
  125. break;
  126. }
  127. else {
  128. this.throwParseError( "Mismatched closing tag (expected </" + name + ">)", tag );
  129. break;
  130. }
  131. } // closing tag
  132. else {
  133. // Not a closing tag, so parse attributes into hash. If tag
  134. // is self-closing, no recursive parsing is needed.
  135. var selfClosing = !!attribsRaw.match(this.patSelfClosing);
  136. var leaf = {};
  137. var attribs = leaf;
  138. // preserve attributes means they go into a sub-hash named "_Attribs"
  139. // the XML composer honors this for restoring the tree back into XML
  140. if (this.preserveAttributes) {
  141. leaf[this.attribsKey] = {};
  142. attribs = leaf[this.attribsKey];
  143. }
  144. // parse attributes
  145. this.patAttrib.lastIndex = 0;
  146. while ( matches = this.patAttrib.exec(attribsRaw) ) {
  147. var key = this.lowerCase ? matches[1].toLowerCase() : matches[1];
  148. attribs[ key ] = decode_entities( matches[3] );
  149. } // foreach attrib
  150. // if no attribs found, but we created the _Attribs subhash, clean it up now
  151. if (this.preserveAttributes && !num_keys(attribs)) {
  152. delete leaf[this.attribsKey];
  153. }
  154. // Recurse for nested nodes
  155. if (!selfClosing) {
  156. this.parse( leaf, nodeName );
  157. if (this.error()) break;
  158. }
  159. // Compress into simple node if text only
  160. var num_leaf_keys = num_keys(leaf);
  161. if ((typeof(leaf[this.dataKey]) != 'undefined') && (num_leaf_keys == 1)) {
  162. leaf = leaf[this.dataKey];
  163. }
  164. else if (!num_leaf_keys) {
  165. leaf = '';
  166. }
  167. // Add leaf to parent branch
  168. if (typeof(branch[nodeName]) != 'undefined') {
  169. if (isa_array(branch[nodeName])) {
  170. branch[nodeName].push( leaf );
  171. }
  172. else {
  173. var temp = branch[nodeName];
  174. branch[nodeName] = [ temp, leaf ];
  175. }
  176. }
  177. else {
  178. branch[nodeName] = leaf;
  179. }
  180. if (this.error() || (branch == this.tree)) break;
  181. } // not closing
  182. } // standard tag
  183. } // main reg exp
  184. // Make sure we found the closing tag
  185. if (name && !foundClosing) {
  186. this.throwParseError( "Missing closing tag (expected </" + name + ">)", name );
  187. }
  188. // If we are the master node, finish parsing and setup our doc node
  189. if (branch == this.tree) {
  190. if (typeof(this.tree[this.dataKey]) != 'undefined') delete this.tree[this.dataKey];
  191. if (num_keys(this.tree) > 1) {
  192. this.throwParseError( 'Only one top-level node is allowed in document', first_key(this.tree) );
  193. return;
  194. }
  195. this.documentNodeName = first_key(this.tree);
  196. if (this.documentNodeName) {
  197. this.tree = this.tree[this.documentNodeName];
  198. }
  199. }
  200. };
  201. XML.prototype.throwParseError = function(key, tag) {
  202. // log error and locate current line number in source XML document
  203. var parsedSource = this.text.substring(0, this.patTag.lastIndex);
  204. var eolMatch = parsedSource.match(/\n/g);
  205. var lineNum = (eolMatch ? eolMatch.length : 0) + 1;
  206. lineNum -= tag.match(/\n/) ? tag.match(/\n/g).length : 0;
  207. this.errors.push({
  208. type: 'Parse',
  209. key: key,
  210. text: '<' + tag + '>',
  211. line: lineNum
  212. });
  213. // Throw actual error (must wrap parse in try/catch)
  214. throw new Error( this.getLastError() );
  215. };
  216. XML.prototype.error = function() {
  217. // return number of errors
  218. return this.errors.length;
  219. };
  220. XML.prototype.getError = function(error) {
  221. // get formatted error
  222. var text = '';
  223. if (!error) return '';
  224. text = (error.type || 'General') + ' Error';
  225. if (error.code) text += ' ' + error.code;
  226. text += ': ' + error.key;
  227. if (error.line) text += ' on line ' + error.line;
  228. if (error.text) text += ': ' + error.text;
  229. return text;
  230. };
  231. XML.prototype.getLastError = function() {
  232. // Get most recently thrown error in plain text format
  233. if (!this.error()) return '';
  234. return this.getError( this.errors[this.errors.length - 1] );
  235. };
  236. XML.prototype.parsePINode = function(tag) {
  237. // Parse Processor Instruction Node, e.g. <?xml version="1.0"?>
  238. if (!tag.match(this.patPINode)) {
  239. this.throwParseError( "Malformed processor instruction", tag );
  240. return null;
  241. }
  242. this.piNodeList.push( tag );
  243. return tag;
  244. };
  245. XML.prototype.parseCommentNode = function(tag) {
  246. // Parse Comment Node, e.g. <!-- hello -->
  247. var matches = null;
  248. this.patNextClose.lastIndex = this.patTag.lastIndex;
  249. while (!tag.match(this.patEndComment)) {
  250. if (matches = this.patNextClose.exec(this.text)) {
  251. tag += '>' + matches[1];
  252. }
  253. else {
  254. this.throwParseError( "Unclosed comment tag", tag );
  255. return null;
  256. }
  257. }
  258. this.patTag.lastIndex = this.patNextClose.lastIndex;
  259. return tag;
  260. };
  261. XML.prototype.parseDTDNode = function(tag) {
  262. // Parse Document Type Descriptor Node, e.g. <!DOCTYPE ... >
  263. var matches = null;
  264. if (tag.match(this.patExternalDTDNode)) {
  265. // tag is external, and thus self-closing
  266. this.dtdNodeList.push( tag );
  267. }
  268. else if (tag.match(this.patInlineDTDNode)) {
  269. // Tag is inline, so check for nested nodes.
  270. this.patNextClose.lastIndex = this.patTag.lastIndex;
  271. while (!tag.match(this.patEndDTD)) {
  272. if (matches = this.patNextClose.exec(this.text)) {
  273. tag += '>' + matches[1];
  274. }
  275. else {
  276. this.throwParseError( "Unclosed DTD tag", tag );
  277. return null;
  278. }
  279. }
  280. this.patTag.lastIndex = this.patNextClose.lastIndex;
  281. // Make sure complete tag is well-formed, and push onto DTD stack.
  282. if (tag.match(this.patDTDNode)) {
  283. this.dtdNodeList.push( tag );
  284. }
  285. else {
  286. this.throwParseError( "Malformed DTD tag", tag );
  287. return null;
  288. }
  289. }
  290. else {
  291. this.throwParseError( "Malformed DTD tag", tag );
  292. return null;
  293. }
  294. return tag;
  295. };
  296. XML.prototype.parseCDATANode = function(tag) {
  297. // Parse CDATA Node, e.g. <![CDATA[Brooks & Shields]]>
  298. var matches = null;
  299. this.patNextClose.lastIndex = this.patTag.lastIndex;
  300. while (!tag.match(this.patEndCDATA)) {
  301. if (matches = this.patNextClose.exec(this.text)) {
  302. tag += '>' + matches[1];
  303. }
  304. else {
  305. this.throwParseError( "Unclosed CDATA tag", tag );
  306. return null;
  307. }
  308. }
  309. this.patTag.lastIndex = this.patNextClose.lastIndex;
  310. if (matches = tag.match(this.patCDATANode)) {
  311. return matches[1];
  312. }
  313. else {
  314. this.throwParseError( "Malformed CDATA tag", tag );
  315. return null;
  316. }
  317. };
  318. XML.prototype.getTree = function() {
  319. // get reference to parsed XML tree
  320. return this.tree;
  321. };
  322. XML.prototype.compose = function() {
  323. // compose tree back into XML
  324. var raw = compose_xml( this.tree, this.documentNodeName );
  325. var body = raw.substring( raw.indexOf("\n") + 1, raw.length );
  326. var xml = '';
  327. if (this.piNodeList.length) {
  328. for (var idx = 0, len = this.piNodeList.length; idx < len; idx++) {
  329. xml += '<' + this.piNodeList[idx] + '>' + "\n";
  330. }
  331. }
  332. else {
  333. xml += xml_header + "\n";
  334. }
  335. if (this.dtdNodeList.length) {
  336. for (var idx = 0, len = this.dtdNodeList.length; idx < len; idx++) {
  337. xml += '<' + this.dtdNodeList[idx] + '>' + "\n";
  338. }
  339. }
  340. xml += body;
  341. return xml;
  342. };
  343. //
  344. // Static Utility Functions:
  345. //
  346. var parse_xml = exports.parse = function parse_xml(text, opts) {
  347. // turn text into XML tree quickly
  348. if (!opts) opts = {};
  349. opts.text = text;
  350. var parser = new XML(opts);
  351. return parser.error() ? parser.getLastError() : parser.getTree();
  352. };
  353. var trim = exports.trim = function trim(text) {
  354. // strip whitespace from beginning and end of string
  355. if (text == null) return '';
  356. if (text && text.replace) {
  357. text = text.replace(/^\s+/, "");
  358. text = text.replace(/\s+$/, "");
  359. }
  360. return text;
  361. };
  362. var encode_entities = exports.encodeEntities = function encode_entities(text) {
  363. // Simple entitize exports.for = function for composing XML
  364. if (text == null) return '';
  365. if (text && text.replace) {
  366. text = text.replace(/\&/g, "&amp;"); // MUST BE FIRST
  367. text = text.replace(/</g, "&lt;");
  368. text = text.replace(/>/g, "&gt;");
  369. }
  370. return text;
  371. };
  372. var encode_attrib_entities = exports.encodeAttribEntities = function encode_attrib_entities(text) {
  373. // Simple entitize exports.for = function for composing XML attributes
  374. if (text == null) return '';
  375. if (text && text.replace) {
  376. text = text.replace(/\&/g, "&amp;"); // MUST BE FIRST
  377. text = text.replace(/</g, "&lt;");
  378. text = text.replace(/>/g, "&gt;");
  379. text = text.replace(/\"/g, "&quot;");
  380. text = text.replace(/\'/g, "&apos;");
  381. }
  382. return text;
  383. };
  384. var decode_entities = exports.decodeEntities = function decode_entities(text) {
  385. // Decode XML entities into raw ASCII
  386. if (text == null) return '';
  387. if (text && text.replace && text.match(/\&/)) {
  388. text = text.replace(/\&lt\;/g, "<");
  389. text = text.replace(/\&gt\;/g, ">");
  390. text = text.replace(/\&quot\;/g, '"');
  391. text = text.replace(/\&apos\;/g, "'");
  392. text = text.replace(/\&amp\;/g, "&"); // MUST BE LAST
  393. }
  394. return text;
  395. };
  396. var compose_xml = exports.stringify = function compose_xml(node, name, indent) {
  397. // Compose node into XML including attributes
  398. // Recurse for child nodes
  399. var xml = "";
  400. // If this is the root node, set the indent to 0
  401. // and setup the XML header (PI node)
  402. if (!indent) {
  403. indent = 0;
  404. xml = xml_header + "\n";
  405. if (!name) {
  406. // no name provided, assume content is wrapped in it
  407. name = first_key(node);
  408. node = node[name];
  409. }
  410. }
  411. // Setup the indent text
  412. var indent_text = "";
  413. for (var k = 0; k < indent; k++) indent_text += indent_string;
  414. if ((typeof(node) == 'object') && (node != null)) {
  415. // node is object -- now see if it is an array or hash
  416. if (!node.length) { // what about zero-length array?
  417. // node is hash
  418. xml += indent_text + "<" + name;
  419. var num_keys = 0;
  420. var has_attribs = 0;
  421. for (var key in node) num_keys++; // there must be a better way...
  422. if (node["_Attribs"]) {
  423. has_attribs = 1;
  424. var sorted_keys = hash_keys_to_array(node["_Attribs"]).sort();
  425. for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
  426. var key = sorted_keys[idx];
  427. xml += " " + key + "=\"" + encode_attrib_entities(node["_Attribs"][key]) + "\"";
  428. }
  429. } // has attribs
  430. if (num_keys > has_attribs) {
  431. // has child elements
  432. xml += ">";
  433. if (node["_Data"]) {
  434. // simple text child node
  435. xml += encode_entities(node["_Data"]) + "</" + name + ">\n";
  436. } // just text
  437. else {
  438. xml += "\n";
  439. var sorted_keys = hash_keys_to_array(node).sort();
  440. for (var idx = 0, len = sorted_keys.length; idx < len; idx++) {
  441. var key = sorted_keys[idx];
  442. if ((key != "_Attribs") && key.match(re_valid_tag_name)) {
  443. // recurse for node, with incremented indent value
  444. xml += compose_xml( node[key], key, indent + 1 );
  445. } // not _Attribs key
  446. } // foreach key
  447. xml += indent_text + "</" + name + ">\n";
  448. } // real children
  449. }
  450. else {
  451. // no child elements, so self-close
  452. xml += "/>\n";
  453. }
  454. } // standard node
  455. else {
  456. // node is array
  457. for (var idx = 0; idx < node.length; idx++) {
  458. // recurse for node in array with same indent
  459. xml += compose_xml( node[idx], name, indent );
  460. }
  461. } // array of nodes
  462. } // complex node
  463. else {
  464. // node is simple string
  465. xml += indent_text + "<" + name + ">" + encode_entities(node) + "</" + name + ">\n";
  466. } // simple text node
  467. return xml;
  468. };
  469. var always_array = exports.alwaysArray = function always_array(obj, key) {
  470. // if object is not array, return array containing object
  471. // if key is passed, work like XMLalwaysarray() instead
  472. if (key) {
  473. if ((typeof(obj[key]) != 'object') || (typeof(obj[key].length) == 'undefined')) {
  474. var temp = obj[key];
  475. delete obj[key];
  476. obj[key] = new Array();
  477. obj[key][0] = temp;
  478. }
  479. return null;
  480. }
  481. else {
  482. if ((typeof(obj) != 'object') || (typeof(obj.length) == 'undefined')) { return [ obj ]; }
  483. else return obj;
  484. }
  485. };
  486. var hash_keys_to_array = exports.hashKeysToArray = function hash_keys_to_array(hash) {
  487. // convert hash keys to array (discard values)
  488. var array = [];
  489. for (var key in hash) array.push(key);
  490. return array;
  491. };
  492. var isa_hash = exports.isaHash = function isa_hash(arg) {
  493. // determine if arg is a hash
  494. return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) == 'undefined') );
  495. };
  496. var isa_array = exports.isaArray = function isa_array(arg) {
  497. // determine if arg is an array or is array-like
  498. if (typeof(arg) == 'array') return true;
  499. return( !!arg && (typeof(arg) == 'object') && (typeof(arg.length) != 'undefined') );
  500. };
  501. var first_key = exports.firstKey = function first_key(hash) {
  502. // return first key from hash (unordered)
  503. for (var key in hash) return key;
  504. return null; // no keys in hash
  505. };
  506. var num_keys = exports.numKeys = function num_keys(hash) {
  507. // count the number of keys in a hash
  508. var count = 0;
  509. for (var a in hash) count++;
  510. return count;
  511. };