QueryTest.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. <?php
  2. declare(strict_types=1);
  3. namespace DiDom\Tests;
  4. use DiDom\Exceptions\InvalidSelectorException;
  5. use DiDom\Query;
  6. use InvalidArgumentException;
  7. use RuntimeException;
  8. class QueryTest extends TestCase
  9. {
  10. public function testCompileWithUnknownExpressionType()
  11. {
  12. $this->expectException(RuntimeException::class);
  13. $this->expectExceptionMessage('Unknown expression type "foo"');
  14. Query::compile('h1', 'foo');
  15. }
  16. /**
  17. * @dataProvider compileCssTests
  18. */
  19. public function testCompileCssSelector($selector, $xpath)
  20. {
  21. $this->assertEquals($xpath, Query::compile($selector));
  22. }
  23. /**
  24. * @dataProvider getSegmentsTests
  25. *
  26. * @param string $selector
  27. * @param array $segments
  28. */
  29. public function testGetSegments($selector, $segments)
  30. {
  31. $this->assertEquals($segments, Query::getSegments($selector));
  32. }
  33. /**
  34. * @dataProvider buildXpathTests
  35. *
  36. * @param array $segments
  37. * @param string $xpath
  38. */
  39. public function testBuildXpath($segments, $xpath)
  40. {
  41. $this->assertEquals($xpath, Query::buildXpath($segments));
  42. }
  43. public function testBuildXpathWithEmptyArray()
  44. {
  45. $this->expectException(InvalidArgumentException::class);
  46. Query::buildXpath([]);
  47. }
  48. public function testCompileWithEmptyXpathExpression()
  49. {
  50. $this->expectException(InvalidSelectorException::class);
  51. $this->expectExceptionMessage('The expression must not be empty.');
  52. Query::compile('', Query::TYPE_XPATH);
  53. }
  54. public function testCompileWithEmptyCssExpression()
  55. {
  56. $this->expectException(InvalidSelectorException::class);
  57. $this->expectExceptionMessage('The expression must not be empty.');
  58. Query::compile('', Query::TYPE_CSS);
  59. }
  60. public function testGetSegmentsWithEmptySelector()
  61. {
  62. $this->expectException(InvalidSelectorException::class);
  63. $this->expectExceptionMessage('The selector must not be empty.');
  64. Query::getSegments('');
  65. }
  66. public function testEmptyAttributeName()
  67. {
  68. $this->expectException(InvalidSelectorException::class);
  69. $this->expectExceptionMessage('Invalid selector "input[=foo]": attribute name must not be empty.');
  70. Query::compile('input[=foo]');
  71. }
  72. public function testUnknownPseudoClass()
  73. {
  74. $this->expectException(InvalidSelectorException::class);
  75. $this->expectExceptionMessage('Unknown pseudo-class "unknown-pseudo-class".');
  76. Query::compile('li:unknown-pseudo-class');
  77. }
  78. /**
  79. * @dataProvider containsInvalidCaseSensitiveParameterDataProvider
  80. */
  81. public function testContainsInvalidCaseSensitiveParameter($caseSensitive)
  82. {
  83. $message = sprintf('Parameter 2 of "contains" pseudo-class must be equal true or false, "%s" given', $caseSensitive);
  84. $this->expectException(InvalidSelectorException::class, $message);
  85. Query::compile("a:contains('Log in', {$caseSensitive})");
  86. }
  87. public function containsInvalidCaseSensitiveParameterDataProvider(): array
  88. {
  89. return [
  90. ['foo'],
  91. ['TRUE'],
  92. ['FALSE'],
  93. ];
  94. }
  95. public function testEmptyNthExpression()
  96. {
  97. $this->expectException(InvalidSelectorException::class);
  98. $this->expectExceptionMessage('nth-child (or nth-last-child) expression must not be empty.');
  99. Query::compile('li:nth-child()');
  100. }
  101. public function testEmptyProperty()
  102. {
  103. $this->expectException(InvalidSelectorException::class);
  104. $this->expectExceptionMessage('Invalid property "::".');
  105. Query::compile('li::');
  106. }
  107. public function testInvalidProperty()
  108. {
  109. $this->expectException(InvalidSelectorException::class);
  110. $this->expectExceptionMessage('Unknown property "foo".');
  111. Query::compile('li::foo');
  112. }
  113. public function testUnknownNthExpression()
  114. {
  115. $this->expectException(InvalidSelectorException::class);
  116. $this->expectExceptionMessage('Invalid nth-child expression "foo".');
  117. Query::compile('li:nth-child(foo)');
  118. }
  119. public function testGetSegmentsWithEmptyClassName()
  120. {
  121. $this->expectException(InvalidSelectorException::class);
  122. $this->expectExceptionMessage('Invalid selector ".".');
  123. Query::getSegments('.');
  124. }
  125. public function testCompileWithEmptyClassName()
  126. {
  127. $this->expectException(InvalidSelectorException::class);
  128. $this->expectExceptionMessage('Invalid selector ".".');
  129. Query::compile('span.');
  130. }
  131. public function testCompileXpath()
  132. {
  133. $this->assertEquals('//div', Query::compile('//div', Query::TYPE_XPATH));
  134. }
  135. public function testSetCompiled()
  136. {
  137. $xpath = "//*[@id='foo']//*[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]//baz";
  138. $compiled = ['#foo .bar baz' => $xpath];
  139. Query::setCompiled($compiled);
  140. $this->assertEquals($compiled, Query::getCompiled());
  141. }
  142. public function testGetCompiled()
  143. {
  144. Query::setCompiled([]);
  145. $selector = '#foo .bar baz';
  146. $xpath = '//*[@id="foo"]//*[contains(concat(" ", normalize-space(@class), " "), " bar ")]//baz';
  147. $compiled = [$selector => $xpath];
  148. Query::compile($selector);
  149. $this->assertEquals($compiled, Query::getCompiled());
  150. }
  151. public function compileCssTests()
  152. {
  153. $compiled = [
  154. ['a', '//a'],
  155. ['foo bar baz', '//foo//bar//baz'],
  156. ['foo > bar > baz', '//foo/bar/baz'],
  157. ['#foo', '//*[@id="foo"]'],
  158. ['.foo', '//*[contains(concat(" ", normalize-space(@class), " "), " foo ")]'],
  159. ['.foo.bar', '//*[(contains(concat(" ", normalize-space(@class), " "), " foo ")) and (contains(concat(" ", normalize-space(@class), " "), " bar "))]'],
  160. ['*[foo=bar]', '//*[@foo="bar"]'],
  161. ['*[foo="bar"]', '//*[@foo="bar"]'],
  162. ['*[foo=\'bar\']', '//*[@foo="bar"]'],
  163. ['select[name=category] option[selected=selected]', '//select[@name="category"]//option[@selected="selected"]'],
  164. ['*[^data-]', '//*[@*[starts-with(name(), "data-")]]'],
  165. ['*[^data-=foo]', '//*[@*[starts-with(name(), "data-")]="foo"]'],
  166. ['a[href^=https]', '//a[starts-with(@href, "https")]'],
  167. ['img[src$=png]', '//img[substring(@src, string-length(@src) - string-length("png") + 1) = "png"]'],
  168. ['a[href*=example.com]', '//a[contains(@href, "example.com")]'],
  169. ['script[!src]', '//script[not(@src)]'],
  170. ['a[href!="http://foo.com/"]', '//a[not(@href="http://foo.com/")]'],
  171. ['a[foo~="bar"]', '//a[contains(concat(" ", normalize-space(@foo), " "), " bar ")]'],
  172. ['input, textarea, select', '//input|//textarea|//select'],
  173. ['input[name="name"], textarea[name="description"], select[name="type"]', '//input[@name="name"]|//textarea[@name="description"]|//select[@name="type"]'],
  174. ['li:first-child', '//li[position() = 1]'],
  175. ['li:last-child', '//li[position() = last()]'],
  176. ['*:not(a[href*="example.com"])', '//*[not(self::a[contains(@href, "example.com")])]'],
  177. ['*:not(a[href*="example.com"]):not(.foo)', '//*[(not(self::a[contains(@href, "example.com")])) and (not(self::*[contains(concat(" ", normalize-space(@class), " "), " foo ")]))]'],
  178. ['ul:empty', '//ul[count(descendant::*) = 0]'],
  179. ['ul:not-empty', '//ul[count(descendant::*) > 0]'],
  180. ['li:nth-child(odd)', '//*[(name()="li") and (position() mod 2 = 1 and position() >= 1)]'],
  181. ['li:nth-child(even)', '//*[(name()="li") and (position() mod 2 = 0 and position() >= 0)]'],
  182. ['li:nth-child(3)', '//*[(name()="li") and (position() = 3)]'],
  183. ['li:nth-child(-3)', '//*[(name()="li") and (position() = -3)]'],
  184. ['li:nth-child(3n)', '//*[(name()="li") and ((position() + 0) mod 3 = 0 and position() >= 0)]'],
  185. ['li:nth-child(3n+1)', '//*[(name()="li") and ((position() - 1) mod 3 = 0 and position() >= 1)]'],
  186. ['li:nth-child(3n-1)', '//*[(name()="li") and ((position() + 1) mod 3 = 0 and position() >= 1)]'],
  187. ['li:nth-child(n+3)', '//*[(name()="li") and ((position() - 3) mod 1 = 0 and position() >= 3)]'],
  188. ['li:nth-child(n-3)', '//*[(name()="li") and ((position() + 3) mod 1 = 0 and position() >= 3)]'],
  189. ['li:nth-of-type(odd)', '//li[position() mod 2 = 1 and position() >= 1]'],
  190. ['li:nth-of-type(even)', '//li[position() mod 2 = 0 and position() >= 0]'],
  191. ['li:nth-of-type(3)', '//li[position() = 3]'],
  192. ['li:nth-of-type(-3)', '//li[position() = -3]'],
  193. ['li:nth-of-type(3n)', '//li[(position() + 0) mod 3 = 0 and position() >= 0]'],
  194. ['li:nth-of-type(3n+1)', '//li[(position() - 1) mod 3 = 0 and position() >= 1]'],
  195. ['li:nth-of-type(3n-1)', '//li[(position() + 1) mod 3 = 0 and position() >= 1]'],
  196. ['li:nth-of-type(n+3)', '//li[(position() - 3) mod 1 = 0 and position() >= 3]'],
  197. ['li:nth-of-type(n-3)', '//li[(position() + 3) mod 1 = 0 and position() >= 3]'],
  198. ['ul:has(li.item)', '//ul[.//li[contains(concat(" ", normalize-space(@class), " "), " item ")]]'],
  199. ['form[name=register]:has(input[name=foo])', '//form[(@name="register") and (.//input[@name="foo"])]'],
  200. ['ul li a::text', '//ul//li//a/text()'],
  201. ['ul li a::text()', '//ul//li//a/text()'],
  202. ['ul li a::attr(href)', '//ul//li//a/@*[name() = "href"]'],
  203. ['ul li a::attr(href, title)', '//ul//li//a/@*[name() = "href" or name() = "title"]'],
  204. ['> ul li a', '/ul//li//a'],
  205. ];
  206. $compiled = array_merge($compiled, $this->getContainsPseudoClassTests());
  207. $compiled = array_merge($compiled, $this->getPropertiesTests());
  208. $compiled = array_merge($compiled, [
  209. ['a[title="foo, bar::baz"]', '//a[@title="foo, bar::baz"]'],
  210. ]);
  211. return $compiled;
  212. }
  213. private function getContainsPseudoClassTests(): array
  214. {
  215. $strToLowerFunction = function_exists('mb_strtolower') ? 'mb_strtolower' : 'strtolower';
  216. $containsXpath = [
  217. // caseSensitive = true, fullMatch = false
  218. ['li:contains(foo)', '//li[contains(text(), "foo")]'],
  219. ['li:contains("foo")', '//li[contains(text(), "foo")]'],
  220. ['li:contains(\'foo\')', '//li[contains(text(), "foo")]'],
  221. // caseSensitive = true, fullMatch = false
  222. ['li:contains(foo, true)', '//li[contains(text(), "foo")]'],
  223. ['li:contains("foo", true)', '//li[contains(text(), "foo")]'],
  224. ['li:contains(\'foo\', true)', '//li[contains(text(), "foo")]'],
  225. // caseSensitive = true, fullMatch = false
  226. ['li:contains(foo, true, false)', '//li[contains(text(), "foo")]'],
  227. ['li:contains("foo", true, false)', '//li[contains(text(), "foo")]'],
  228. ['li:contains(\'foo\', true, false)', '//li[contains(text(), "foo")]'],
  229. // caseSensitive = true, fullMatch = true
  230. ['li:contains(foo, true, true)', '//li[text() = "foo"]'],
  231. ['li:contains("foo", true, true)', '//li[text() = "foo"]'],
  232. ['li:contains(\'foo\', true, true)', '//li[text() = "foo"]'],
  233. // caseSensitive = false, fullMatch = false
  234. ['li:contains(foo, false)', "//li[contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"foo\"))]"],
  235. ['li:contains("foo", false)', "//li[contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"foo\"))]"],
  236. ['li:contains(\'foo\', false)', "//li[contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"foo\"))]"],
  237. // caseSensitive = false, fullMatch = false
  238. ['li:contains(foo, false, false)', "//li[contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"foo\"))]"],
  239. ['li:contains("foo", false, false)', "//li[contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"foo\"))]"],
  240. ['li:contains(\'foo\', false, false)', "//li[contains(php:functionString(\"{$strToLowerFunction}\", .), php:functionString(\"{$strToLowerFunction}\", \"foo\"))]"],
  241. // caseSensitive = false, fullMatch = true
  242. ['li:contains(foo, false, true)', "//li[php:functionString(\"{$strToLowerFunction}\", .) = php:functionString(\"{$strToLowerFunction}\", \"foo\")]"],
  243. ['li:contains("foo", false, true)', "//li[php:functionString(\"{$strToLowerFunction}\", .) = php:functionString(\"{$strToLowerFunction}\", \"foo\")]"],
  244. ['li:contains(\'foo\', false, true)', "//li[php:functionString(\"{$strToLowerFunction}\", .) = php:functionString(\"{$strToLowerFunction}\", \"foo\")]"],
  245. ];
  246. return $containsXpath;
  247. }
  248. private function getPropertiesTests(): array
  249. {
  250. return [
  251. ['a::text', '//a/text()'],
  252. ['a::text()', '//a/text()'],
  253. ['a::attr', '//a/@*'],
  254. ['a::attr()', '//a/@*'],
  255. ['a::attr(href)', '//a/@*[name() = "href"]'],
  256. ['a::attr(href,title)', '//a/@*[name() = "href" or name() = "title"]'],
  257. ['a::attr(href, title)', '//a/@*[name() = "href" or name() = "title"]'],
  258. ];
  259. }
  260. public function buildXpathTests(): array
  261. {
  262. $xpath = [
  263. '//a',
  264. '//*[@id="foo"]',
  265. '//a[@id="foo"]',
  266. '//a[contains(concat(" ", normalize-space(@class), " "), " foo ")]',
  267. '//a[(contains(concat(" ", normalize-space(@class), " "), " foo ")) and (contains(concat(" ", normalize-space(@class), " "), " bar "))]',
  268. '//a[@href]',
  269. '//a[@href="http://example.com/"]',
  270. '//a[(@href="http://example.com/") and (@title="Example Domain")]',
  271. '//a[(@target="_blank") and (starts-with(@href, "https"))]',
  272. '//a[substring(@href, string-length(@href) - string-length(".com") + 1) = ".com"]',
  273. '//a[contains(@href, "example")]',
  274. '//a[not(@href="http://foo.com/")]',
  275. '//script[not(@src)]',
  276. '//li[position() = 1]',
  277. '//*[(@id="id") and (contains(concat(" ", normalize-space(@class), " "), " foo ")) and (@name="value") and (position() = 1)]',
  278. ];
  279. $segments = [
  280. ['tag' => 'a'],
  281. ['id' => 'foo'],
  282. ['tag' => 'a', 'id' => 'foo'],
  283. ['tag' => 'a', 'classes' => ['foo']],
  284. ['tag' => 'a', 'classes' => ['foo', 'bar']],
  285. ['tag' => 'a', 'attributes' => ['href' => null]],
  286. ['tag' => 'a', 'attributes' => ['href' => 'http://example.com/']],
  287. ['tag' => 'a', 'attributes' => ['href' => 'http://example.com/', 'title' => 'Example Domain']],
  288. ['tag' => 'a', 'attributes' => ['target' => '_blank', 'href^' => 'https']],
  289. ['tag' => 'a', 'attributes' => ['href$' => '.com']],
  290. ['tag' => 'a', 'attributes' => ['href*' => 'example']],
  291. ['tag' => 'a', 'attributes' => ['href!' => 'http://foo.com/']],
  292. ['tag' => 'script', 'attributes' => ['!src' => null]],
  293. ['tag' => 'li', 'pseudo' => [['type' => 'first-child', 'expression' => null]]],
  294. ['tag' => '*', 'id' => 'id', 'classes' => ['foo'], 'attributes' => ['name' => 'value'], 'pseudo' => [['type' => 'first-child', 'expression' => null]], 'rel' => '>'],
  295. ];
  296. $parameters = [];
  297. foreach ($segments as $index => $segment) {
  298. $parameters[] = [$segment, $xpath[$index]];
  299. }
  300. return $parameters;
  301. }
  302. public function getSegmentsTests(): array
  303. {
  304. $segments = [
  305. ['selector' => 'a', 'tag' => 'a'],
  306. ['selector' => '#foo', 'id' => 'foo'],
  307. ['selector' => 'a#foo', 'tag' => 'a', 'id' => 'foo'],
  308. ['selector' => 'a.foo', 'tag' => 'a', 'classes' => ['foo']],
  309. ['selector' => 'a.foo.bar', 'tag' => 'a', 'classes' => ['foo', 'bar']],
  310. ['selector' => 'a[href]', 'tag' => 'a', 'attributes' => ['href' => null]],
  311. ['selector' => 'a[href=http://example.com/]', 'tag' => 'a', 'attributes' => ['href' => 'http://example.com/']],
  312. ['selector' => 'a[href="http://example.com/"]', 'tag' => 'a', 'attributes' => ['href' => 'http://example.com/']],
  313. ['selector' => 'a[href=\'http://example.com/\']', 'tag' => 'a', 'attributes' => ['href' => 'http://example.com/']],
  314. ['selector' => 'a[href=http://example.com/][title=Example Domain]', 'tag' => 'a', 'attributes' => ['href' => 'http://example.com/', 'title' => 'Example Domain']],
  315. ['selector' => 'a[href=http://example.com/][href=http://example.com/404]', 'tag' => 'a', 'attributes' => ['href' => 'http://example.com/404']],
  316. ['selector' => 'a[href^=https]', 'tag' => 'a', 'attributes' => ['href^' => 'https']],
  317. ['selector' => 'li:first-child', 'tag' => 'li', 'pseudo' => [['type' => 'first-child', 'expression' => null]]],
  318. ['selector' => 'ul >', 'tag' => 'ul', 'rel' => '>'],
  319. ['selector' => '#id.foo[name=value]:first-child >', 'id' => 'id', 'classes' => ['foo'], 'attributes' => ['name' => 'value'], 'pseudo' => [['type' => 'first-child', 'expression' => null]], 'rel' => '>'],
  320. ['selector' => 'li.bar:nth-child(2n)', 'tag' => 'li', 'classes' => ['bar'], 'pseudo' => [['type' => 'nth-child', 'expression' => '2n']]],
  321. ];
  322. $parameters = [];
  323. foreach ($segments as $segment) {
  324. $parameters[] = [$segment['selector'], $segment];
  325. }
  326. return $parameters;
  327. }
  328. }