ValidURL.php 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. <?php
  2. /**
  3. * @package koseven/Codebench
  4. * @category Tests
  5. * @author Geert De Deckere <geert@idoe.be>
  6. */
  7. class Bench_ValidURL extends Codebench {
  8. public $description =
  9. 'filter_var vs regex:
  10. http://koseven.dev/issues/2847';
  11. public $loops = 1000;
  12. public $subjects = [
  13. // Valid
  14. 'http://google.com',
  15. 'http://google.com/',
  16. 'http://google.com/?q=abc',
  17. 'http://google.com/#hash',
  18. 'http://localhost',
  19. 'http://hello-world.pl',
  20. 'http://hello--world.pl',
  21. 'http://h.e.l.l.0.pl',
  22. 'http://server.tld/get/info',
  23. 'http://127.0.0.1',
  24. 'http://127.0.0.1:80',
  25. 'http://user@127.0.0.1',
  26. 'http://user:pass@127.0.0.1',
  27. 'ftp://my.server.com',
  28. 'rss+xml://rss.example.com',
  29. // Invalid
  30. 'http://google.2com',
  31. 'http://google.com?q=abc',
  32. 'http://google.com#hash',
  33. 'http://hello-.pl',
  34. 'http://hel.-lo.world.pl',
  35. 'http://ww£.google.com',
  36. 'http://127.0.0.1234',
  37. 'http://127.0.0.1.1',
  38. 'http://user:@127.0.0.1',
  39. "http://finalnewline.com\n",
  40. ];
  41. public function bench_filter_var($url)
  42. {
  43. return (bool) filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_HOST_REQUIRED);
  44. }
  45. public function bench_regex($url)
  46. {
  47. // Based on http://www.apps.ietf.org/rfc/rfc1738.html#sec-5
  48. if ( ! preg_match(
  49. '~^
  50. # scheme
  51. [-a-z0-9+.]++://
  52. # username:password (optional)
  53. (?:
  54. [-a-z0-9$_.+!*\'(),;?&=%]++ # username
  55. (?::[-a-z0-9$_.+!*\'(),;?&=%]++)? # password (optional)
  56. @
  57. )?
  58. (?:
  59. # ip address
  60. \d{1,3}+(?:\.\d{1,3}+){3}+
  61. | # or
  62. # hostname (captured)
  63. (
  64. (?!-)[-a-z0-9]{1,63}+(?<!-)
  65. (?:\.(?!-)[-a-z0-9]{1,63}+(?<!-)){0,126}+
  66. )
  67. )
  68. # port (optional)
  69. (?::\d{1,5}+)?
  70. # path (optional)
  71. (?:/.*)?
  72. $~iDx', $url, $matches))
  73. return FALSE;
  74. // We matched an IP address
  75. if ( ! isset($matches[1]))
  76. return TRUE;
  77. // Check maximum length of the whole hostname
  78. // http://en.wikipedia.org/wiki/Domain_name#cite_note-0
  79. if (strlen($matches[1]) > 253)
  80. return FALSE;
  81. // An extra check for the top level domain
  82. // It must start with a letter
  83. $tld = ltrim(substr($matches[1], (int) strrpos($matches[1], '.')), '.');
  84. return ctype_alpha($tld[0]);
  85. }
  86. }