DocumentTest.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657
  1. <?php
  2. declare(strict_types=1);
  3. namespace DiDom\Tests;
  4. use DiDom\Document;
  5. use DiDom\Query;
  6. use DOMDocument;
  7. use InvalidArgumentException;
  8. use RuntimeException;
  9. class DocumentTest extends TestCase
  10. {
  11. public function testConstructWithNotExistingFile()
  12. {
  13. $this->expectException(RuntimeException::class);
  14. $this->expectExceptionMessage('Could not load file path/to/file.');
  15. new Document('path/to/file', true);
  16. }
  17. public function testConstructorWithInvalidDocumentType()
  18. {
  19. $this->expectException(RuntimeException::class);
  20. $this->expectExceptionMessage('Document type must be "xml" or "html", bar given.');
  21. new Document('foo', false, 'UTF-8', 'bar');
  22. }
  23. /**
  24. * @dataProvider loadHtmlCharsetTests
  25. */
  26. public function testLoadHtmlCharset($html, $text)
  27. {
  28. $document = new Document($html, false, 'UTF-8');
  29. $this->assertEquals($text, $document->first('div')->text());
  30. }
  31. public function loadHtmlCharsetTests()
  32. {
  33. return array(
  34. array('<html><div class="foo">English language</html>', 'English language'),
  35. array('<html><div class="foo">Русский язык</html>', 'Русский язык'),
  36. array('<html><div class="foo">اللغة العربية</html>', 'اللغة العربية'),
  37. array('<html><div class="foo">漢語</html>', '漢語'),
  38. array('<html><div class="foo">Tiếng Việt</html>', 'Tiếng Việt'),
  39. );
  40. }
  41. public function testCreate()
  42. {
  43. $this->assertInstanceOf('DiDom\Document', Document::create());
  44. }
  45. public function testCreateElement()
  46. {
  47. $html = $this->loadFixture('posts.html');
  48. $document = new Document($html, false);
  49. $element = $document->createElement('span', 'value');
  50. $this->assertInstanceOf('DiDom\Element', $element);
  51. $this->assertEquals('span', $element->getNode()->tagName);
  52. $this->assertEquals('value', $element->getNode()->textContent);
  53. $element = $document->createElement('span');
  54. $this->assertEquals('', $element->text());
  55. $element = $document->createElement('input', '', ['name' => 'username']);
  56. $this->assertEquals('username', $element->getNode()->getAttribute('name'));
  57. }
  58. public function testCreateElementBySelector()
  59. {
  60. $document = new Document();
  61. $element = $document->createElementBySelector('a.external-link[href=http://example.com]');
  62. $this->assertEquals('a', $element->getNode()->tagName);
  63. $this->assertEquals('', $element->text());
  64. $this->assertEquals(['href' => 'http://example.com', 'class' => 'external-link'], $element->attributes());
  65. $element = $document->createElementBySelector('#block', 'Foo');
  66. $this->assertEquals('div', $element->getNode()->tagName);
  67. $this->assertEquals('Foo', $element->text());
  68. $this->assertEquals(['id' => 'block'], $element->attributes());
  69. $element = $document->createElementBySelector('input', null, ['name' => 'name', 'placeholder' => 'Enter your name']);
  70. $this->assertEquals('input', $element->getNode()->tagName);
  71. $this->assertEquals('', $element->text());
  72. $this->assertEquals(['name' => 'name', 'placeholder' => 'Enter your name'], $element->attributes());
  73. }
  74. public function testCreateTextNode()
  75. {
  76. $document = new Document();
  77. $textNode = $document->createTextNode('foo bar baz');
  78. $this->assertInstanceOf('DiDom\Element', $textNode);
  79. $this->assertInstanceOf('DOMText', $textNode->getNode());
  80. $this->assertEquals('foo bar baz', $textNode->text());
  81. }
  82. public function testCreateComment()
  83. {
  84. $document = new Document();
  85. $comment = $document->createComment('foo bar baz');
  86. $this->assertInstanceOf('DiDom\Element', $comment);
  87. $this->assertInstanceOf('DOMComment', $comment->getNode());
  88. $this->assertEquals('foo bar baz', $comment->text());
  89. }
  90. public function testCreateCdataSection()
  91. {
  92. $document = new Document();
  93. $cdataSection = $document->createCdataSection('foo bar baz');
  94. $this->assertInstanceOf('DiDom\Element', $cdataSection);
  95. $this->assertInstanceOf('DOMCdataSection', $cdataSection->getNode());
  96. $this->assertEquals('foo bar baz', $cdataSection->text());
  97. }
  98. public function testCreateDocumentFragment()
  99. {
  100. $document = new Document();
  101. $documentFragment = $document->createDocumentFragment();
  102. $this->assertInstanceOf('DiDom\DocumentFragment', $documentFragment);
  103. $this->assertInstanceOf('DOMDocumentFragment', $documentFragment->getNode());
  104. }
  105. public function testAppendChildWithInvalidArgument()
  106. {
  107. $this->expectException(InvalidArgumentException::class);
  108. $this->expectExceptionMessage('Argument 1 passed to DiDom\Document::appendChild must be an instance of DiDom\Element or DOMNode, string given.');
  109. $html = $this->loadFixture('posts.html');
  110. $document = new Document($html);
  111. $document->appendChild('foo');
  112. }
  113. public function testAppendChild()
  114. {
  115. $html = '<!DOCTYPE html>
  116. <html lang="en">
  117. <head>
  118. <meta charset="UTF-8">
  119. <title>Document</title>
  120. </head>
  121. <body>
  122. </body>
  123. </html>';
  124. $document = new Document($html);
  125. $this->assertCount(0, $document->find('span'));
  126. $node = $document->createElement('span');
  127. $appendedChild = $document->appendChild($node);
  128. $this->assertCount(1, $document->find('span'));
  129. $this->assertTrue($appendedChild->is($document->first('span')));
  130. $appendedChild->remove();
  131. $this->assertCount(0, $document->find('span'));
  132. $nodes = [];
  133. $nodes[] = $document->createElement('span');
  134. $nodes[] = $document->createElement('span');
  135. $appendedChildren = $document->appendChild($nodes);
  136. $nodes = $document->find('span');
  137. $this->assertCount(2, $appendedChildren);
  138. $this->assertCount(2, $nodes);
  139. foreach ($appendedChildren as $index => $child) {
  140. $this->assertTrue($child->is($nodes[$index]));
  141. }
  142. }
  143. public function testLoadWithNotExistingFile()
  144. {
  145. $this->expectException(RuntimeException::class);
  146. $this->expectExceptionMessage('Could not load file path/to/file.');
  147. $document = new Document();
  148. $document->load('path/to/file', true);
  149. }
  150. public function testLoadWithInvalidDocumentType()
  151. {
  152. $this->expectException(RuntimeException::class);
  153. $this->expectExceptionMessage('Document type must be "xml" or "html", bar given.');
  154. $document = new Document();
  155. $document->load('foo', false, 'bar');
  156. }
  157. public function testLoadHtmlDocument()
  158. {
  159. $html = '
  160. <!DOCTYPE html>
  161. <html>
  162. <head>
  163. <title>Document</title>
  164. </head>
  165. <body>
  166. <div class="foo">Foo — Bar — Baz</div>
  167. </body>
  168. </html>
  169. ';
  170. $document = new Document();
  171. $document->load($html, false, 'html');
  172. $this->assertEquals('Foo — Bar — Baz', $document->first('.foo')->text());
  173. }
  174. public function testLoadXmlDocument()
  175. {
  176. $xml = '
  177. <?xml version="1.0" encoding="UTF-8"?>
  178. <root>
  179. <foo>Foo — Bar — Baz</foo>
  180. </root>
  181. ';
  182. $document = new Document();
  183. $document->load($xml, false, 'xml');
  184. $this->assertEquals('Foo — Bar — Baz', $document->first('foo')->text());
  185. }
  186. public function testLoadHtmlFileWithNotExistingFile()
  187. {
  188. $this->expectException(RuntimeException::class);
  189. $this->expectExceptionMessage('Could not load file path/to/file.');
  190. $document = new Document();
  191. $document->loadHtmlFile('path/to/file');
  192. }
  193. public function testLoadXmlFileWithNotExistingFile()
  194. {
  195. $this->expectException(RuntimeException::class);
  196. $this->expectExceptionMessage('Could not load file path/to/file.');
  197. $document = new Document();
  198. $document->loadXmlFile('path/to/file');
  199. }
  200. public function testHas()
  201. {
  202. $document = new Document($this->loadFixture('posts.html'));
  203. $this->assertTrue($document->has('.posts'));
  204. $this->assertFalse($document->has('.fake'));
  205. }
  206. /**
  207. * @dataProvider findTests
  208. */
  209. public function testFind($html, $selector, $type, $count)
  210. {
  211. $document = new Document($html);
  212. $elements = $document->find($selector, $type);
  213. $this->assertTrue(is_array($elements));
  214. $this->assertEquals($count, count($elements));
  215. foreach ($elements as $element) {
  216. $this->assertInstanceOf('DiDom\Element', $element);
  217. }
  218. }
  219. /**
  220. * @dataProvider findTests
  221. */
  222. public function testFindAndReturnDomElement($html, $selector, $type, $count)
  223. {
  224. $document = new Document($html);
  225. $elements = $document->find($selector, $type, false);
  226. $this->assertTrue(is_array($elements));
  227. $this->assertEquals($count, count($elements));
  228. foreach ($elements as $element) {
  229. $this->assertInstanceOf('DOMElement', $element);
  230. }
  231. }
  232. public function testFindWithContext()
  233. {
  234. $document = new Document($this->loadFixture('posts.html'));
  235. $post = $document->find('.post')[1];
  236. $title = $document->find('.post .title')[1];
  237. $titleInContext = $document->find('.title', Query::TYPE_CSS, true, $post)[0];
  238. $this->assertTrue($title->is($titleInContext));
  239. $this->assertFalse($title->is($post->find('.title')[0]));
  240. }
  241. public function testFindText()
  242. {
  243. $html = $this->loadFixture('menu.html');
  244. $document = new Document($html);
  245. $texts = $document->find('//a/text()', Query::TYPE_XPATH);
  246. $this->assertTrue(is_array($texts));
  247. $this->assertEquals(3, count($texts));
  248. $this->assertEquals(['Link 1', 'Link 2', 'Link 3'], $texts);
  249. }
  250. public function testFindComment()
  251. {
  252. $html = $this->loadFixture('menu.html');
  253. $document = new Document($html);
  254. $comment = $document->xpath('/html/body/ul/li/a/comment()');
  255. $this->assertTrue($comment[0]->isCommentNode());
  256. $this->assertTrue($comment[1]->isCommentNode());
  257. $this->assertTrue(is_array($comment));
  258. $this->assertEquals(2, count($comment));
  259. $comment = $document->xpath('/html/body/comment()');
  260. $this->assertTrue($comment[0]->isCommentNode());
  261. $this->assertTrue(is_array($comment));
  262. $this->assertEquals(1, count($comment));
  263. }
  264. public function testFindAttribute()
  265. {
  266. $html = $this->loadFixture('menu.html');
  267. $document = new Document($html);
  268. $links = $document->find('//a/@href', Query::TYPE_XPATH);
  269. $this->assertTrue(is_array($links));
  270. $this->assertEquals(3, count($links));
  271. foreach ($links as $link) {
  272. $this->assertEquals('http://example.com', $link);
  273. }
  274. }
  275. public function findTests()
  276. {
  277. $html = $this->loadFixture('posts.html');
  278. return array(
  279. array($html, '.post h2', Query::TYPE_CSS, 3),
  280. array($html, '.fake h2', Query::TYPE_CSS, 0),
  281. array($html, '.post h2, .post p', Query::TYPE_CSS, 6),
  282. array($html, "//*[contains(concat(' ', normalize-space(@class), ' '), ' post ')]", Query::TYPE_XPATH, 3),
  283. );
  284. }
  285. public function testFirst()
  286. {
  287. $html = '<ul><li>One</li><li>Two</li><li>Three</li></ul>';
  288. $document = new Document($html, false);
  289. $items = $document->find('ul > li');
  290. $this->assertEquals($items[0]->getNode(), $document->first('ul > li')->getNode());
  291. $this->assertEquals('One', $document->first('ul > li::text'));
  292. $document = new Document();
  293. $this->assertNull($document->first('ul > li'));
  294. }
  295. public function testFirstWithContext()
  296. {
  297. $html = '
  298. <div class="root">
  299. <span>Foo</span>
  300. <div><span>Bar</span></div>
  301. </div>
  302. ';
  303. $document = new Document($html);
  304. $div = $document->first('.root div');
  305. $span = $document->first('.root div span');
  306. $result = $document->first('span', Query::TYPE_CSS, true, $div);
  307. $this->assertTrue($span->is($result));
  308. }
  309. public function testXpath()
  310. {
  311. $html = $this->loadFixture('posts.html');
  312. $document = new Document($html, false);
  313. $elements = $document->xpath("//*[contains(concat(' ', normalize-space(@class), ' '), ' post ')]");
  314. $this->assertTrue(is_array($elements));
  315. $this->assertEquals(3, count($elements));
  316. foreach ($elements as $element) {
  317. $this->assertInstanceOf('DiDom\Element', $element);
  318. }
  319. }
  320. public function testCount()
  321. {
  322. $html = '<ul><li>One</li><li>Two</li><li>Three</li></ul>';
  323. $document = new Document($html, false);
  324. $this->assertIsInt($document->count('li'));
  325. $this->assertEquals(3, $document->count('li'));
  326. $document = new Document();
  327. $this->assertIsInt($document->count('li'));
  328. $this->assertEquals(0, $document->count('li'));
  329. }
  330. public function testCreateXpath()
  331. {
  332. $document = new Document();
  333. $xpath =$document->createXpath();
  334. $this->assertInstanceOf('DOMXPath', $xpath);
  335. $this->assertEquals($document->getDocument(), $xpath->document);
  336. }
  337. public function testHtml()
  338. {
  339. $html = '
  340. <!DOCTYPE html>
  341. <html lang="en">
  342. <head>
  343. <meta charset="UTF-8">
  344. <title>Document</title>
  345. </head>
  346. <body>
  347. English language <br>
  348. Русский язык <br>
  349. اللغة العربية <br>
  350. 漢語 <br>
  351. Tiếng Việt <br>
  352. &lt; &gt;
  353. </body>
  354. </html>
  355. ';
  356. $document = new Document($html);
  357. $this->assertEquals(trim($html), $document->html());
  358. }
  359. public function testXml()
  360. {
  361. $xml = $this->loadFixture('books.xml');
  362. $document = new Document($xml, false, 'UTF-8', 'xml');
  363. $this->assertTrue(is_string($document->xml()));
  364. }
  365. public function testXmlWithOptions()
  366. {
  367. $xml = '<foo><bar></bar></foo>';
  368. $document = new Document();
  369. $document->loadXml($xml);
  370. $prolog = '<?xml version="1.0" encoding="UTF-8"?>'."\n";
  371. $this->assertEquals($prolog.'<foo><bar/></foo>', $document->xml());
  372. $this->assertEquals($prolog.'<foo><bar></bar></foo>', $document->xml(LIBXML_NOEMPTYTAG));
  373. }
  374. public function testFormat()
  375. {
  376. $html = $this->loadFixture('posts.html');
  377. $document = new Document($html, false);
  378. $this->assertFalse($document->getDocument()->formatOutput);
  379. $document->format();
  380. $this->assertTrue($document->getDocument()->formatOutput);
  381. }
  382. public function testText()
  383. {
  384. $html = '<html>foo</html>';
  385. $document = new Document($html, false);
  386. $this->assertEquals('foo', $document->text());
  387. }
  388. public function testIsWithInvalidArgument()
  389. {
  390. $this->expectException(InvalidArgumentException::class);
  391. $document = new Document();
  392. $document->is(null);
  393. }
  394. public function testIs()
  395. {
  396. $html = $this->loadFixture('posts.html');
  397. $document = new Document($html, false);
  398. $document2 = new Document($html, false);
  399. $this->assertTrue($document->is($document));
  400. $this->assertFalse($document->is($document2));
  401. }
  402. public function testIsWithEmptyDocument()
  403. {
  404. $html = $this->loadFixture('posts.html');
  405. $document = new Document($html, false);
  406. $document2 = new Document();
  407. $this->assertFalse($document->is($document2));
  408. }
  409. public function testGetType()
  410. {
  411. // empty document
  412. $document = new Document();
  413. $this->assertNull($document->getType());
  414. // html
  415. $html = $this->loadFixture('posts.html');
  416. $document = new Document($html);
  417. $this->assertEquals('html', $document->getType());
  418. $document = new Document();
  419. $document->loadHtml($html);
  420. $this->assertEquals('html', $document->getType());
  421. $document = new Document();
  422. $document->load($html, false, 'html');
  423. $this->assertEquals('html', $document->getType());
  424. // xml
  425. $xml = $this->loadFixture('books.xml');
  426. $document = new Document($xml, false, 'UTF-8', 'xml');
  427. $this->assertEquals('xml', $document->getType());
  428. $document = new Document();
  429. $document->loadXml($xml);
  430. $this->assertEquals('xml', $document->getType());
  431. $document = new Document();
  432. $document->load($xml, false, 'xml');
  433. $this->assertEquals('xml', $document->getType());
  434. }
  435. public function testGetEncoding()
  436. {
  437. $document = new Document();
  438. $this->assertEquals('UTF-8', $document->getEncoding());
  439. $document = new Document(null, false, 'CP-1251');
  440. $this->assertEquals('CP-1251', $document->getEncoding());
  441. }
  442. public function testGetDocument()
  443. {
  444. $domDocument = new DOMDocument();
  445. $document = new Document($domDocument);
  446. $this->assertEquals($domDocument, $document->getDocument());
  447. }
  448. public function testGetElement()
  449. {
  450. $html = $this->loadFixture('posts.html');
  451. $document = new Document($html, false);
  452. $this->assertInstanceOf('DOMElement', $document->getElement());
  453. }
  454. public function testEmptyDocumentToElement()
  455. {
  456. $this->expectException(RuntimeException::class);
  457. $document = new Document();
  458. $document->toElement();
  459. }
  460. public function testToElement()
  461. {
  462. $html = $this->loadFixture('posts.html');
  463. $document = new Document($html, false);
  464. $this->assertInstanceOf('DiDom\Element', $document->toElement());
  465. }
  466. public function testToStringHtml()
  467. {
  468. $html = $this->loadFixture('posts.html');
  469. $document = new Document($html, false);
  470. $this->assertEquals($document->html(), $document->__toString());
  471. }
  472. public function testToStringXml()
  473. {
  474. $xml = $this->loadFixture('books.xml');
  475. $document = new Document($xml, false, 'UTF-8', 'xml');
  476. $this->assertEquals($document->xml(), $document->__toString());
  477. }
  478. }