test_validate_links.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # -*- coding: utf-8 -*-
  2. import unittest
  3. from validate.links import find_links_in_text
  4. from validate.links import check_duplicate_links
  5. from validate.links import fake_user_agent
  6. from validate.links import get_host_from_link
  7. from validate.links import has_cloudflare_protection
  8. class FakeResponse():
  9. def __init__(self, code: int, headers: dict, text: str) -> None:
  10. self.status_code = code
  11. self.headers = headers
  12. self.text = text
  13. class TestValidateLinks(unittest.TestCase):
  14. def setUp(self):
  15. self.duplicate_links = [
  16. 'https://www.example.com',
  17. 'https://www.example.com',
  18. 'https://www.example.com',
  19. 'https://www.anotherexample.com',
  20. ]
  21. self.no_duplicate_links = [
  22. 'https://www.firstexample.com',
  23. 'https://www.secondexample.com',
  24. 'https://www.anotherexample.com',
  25. ]
  26. self.code_200 = 200
  27. self.code_403 = 403
  28. self.code_503 = 503
  29. self.cloudflare_headers = {'Server': 'cloudflare'}
  30. self.no_cloudflare_headers = {'Server': 'google'}
  31. self.text_with_cloudflare_flags = '403 Forbidden Cloudflare We are checking your browser...'
  32. self.text_without_cloudflare_flags = 'Lorem Ipsum'
  33. def test_find_link_in_text(self):
  34. text = """
  35. # this is valid
  36. http://example.com?param1=1&param2=2#anchor
  37. https://www.example.com?param1=1&param2=2#anchor
  38. https://www.example.com.br
  39. https://www.example.com.gov.br
  40. [Example](https://www.example.com?param1=1&param2=2#anchor)
  41. lorem ipsum https://www.example.com?param1=1&param2=2#anchor
  42. https://www.example.com?param1=1&param2=2#anchor lorem ipsum
  43. # this not is valid
  44. example.com
  45. https:example.com
  46. https:/example.com
  47. https//example.com
  48. https//.com
  49. """
  50. links = find_links_in_text(text)
  51. self.assertIsInstance(links, list)
  52. self.assertEqual(len(links), 7)
  53. for link in links:
  54. with self.subTest():
  55. self.assertIsInstance(link, str)
  56. def test_find_link_in_text_with_invalid_argument(self):
  57. with self.assertRaises(TypeError):
  58. find_links_in_text()
  59. find_links_in_text(1)
  60. find_links_in_text(True)
  61. def test_if_check_duplicate_links_has_the_correct_return(self):
  62. result_1 = check_duplicate_links(self.duplicate_links)
  63. result_2 = check_duplicate_links(self.no_duplicate_links)
  64. self.assertIsInstance(result_1, tuple)
  65. self.assertIsInstance(result_2, tuple)
  66. has_duplicate_links, links = result_1
  67. no_duplicate_links, no_links = result_2
  68. self.assertTrue(has_duplicate_links)
  69. self.assertFalse(no_duplicate_links)
  70. self.assertIsInstance(links, list)
  71. self.assertIsInstance(no_links, list)
  72. self.assertEqual(len(links), 2)
  73. self.assertEqual(len(no_links), 0)
  74. def test_if_fake_user_agent_has_a_str_as_return(self):
  75. user_agent = fake_user_agent()
  76. self.assertIsInstance(user_agent, str)
  77. def test_get_host_from_link(self):
  78. links = [
  79. 'example.com',
  80. 'https://example.com',
  81. 'https://www.example.com',
  82. 'https://www.example.com.br',
  83. 'https://www.example.com/route',
  84. 'https://www.example.com?p=1&q=2',
  85. 'https://www.example.com#anchor'
  86. ]
  87. for link in links:
  88. host = get_host_from_link(link)
  89. with self.subTest():
  90. self.assertIsInstance(host, str)
  91. self.assertNotIn('://', host)
  92. self.assertNotIn('/', host)
  93. self.assertNotIn('?', host)
  94. self.assertNotIn('#', host)
  95. with self.assertRaises(TypeError):
  96. get_host_from_link()
  97. def test_has_cloudflare_protection_with_code_403_and_503_in_response(self):
  98. resp_with_cloudflare_protection_code_403 = FakeResponse(
  99. code=self.code_403,
  100. headers=self.cloudflare_headers,
  101. text=self.text_with_cloudflare_flags
  102. )
  103. resp_with_cloudflare_protection_code_503 = FakeResponse(
  104. code=self.code_503,
  105. headers=self.cloudflare_headers,
  106. text=self.text_with_cloudflare_flags
  107. )
  108. result1 = has_cloudflare_protection(resp_with_cloudflare_protection_code_403)
  109. result2 = has_cloudflare_protection(resp_with_cloudflare_protection_code_503)
  110. self.assertTrue(result1)
  111. self.assertTrue(result2)
  112. def test_has_cloudflare_protection_when_there_is_no_protection(self):
  113. resp_without_cloudflare_protection1 = FakeResponse(
  114. code=self.code_200,
  115. headers=self.no_cloudflare_headers,
  116. text=self.text_without_cloudflare_flags
  117. )
  118. resp_without_cloudflare_protection2 = FakeResponse(
  119. code=self.code_403,
  120. headers=self.no_cloudflare_headers,
  121. text=self.text_without_cloudflare_flags
  122. )
  123. resp_without_cloudflare_protection3 = FakeResponse(
  124. code=self.code_503,
  125. headers=self.no_cloudflare_headers,
  126. text=self.text_without_cloudflare_flags
  127. )
  128. result1 = has_cloudflare_protection(resp_without_cloudflare_protection1)
  129. result2 = has_cloudflare_protection(resp_without_cloudflare_protection2)
  130. result3 = has_cloudflare_protection(resp_without_cloudflare_protection3)
  131. self.assertFalse(result1)
  132. self.assertFalse(result2)
  133. self.assertFalse(result3)