thisoldhouse.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. import json
  2. from .brightcove import BrightcoveNewIE
  3. from .common import InfoExtractor
  4. from .zype import ZypeIE
  5. from ..networking import HEADRequest
  6. from ..networking.exceptions import HTTPError
  7. from ..utils import (
  8. ExtractorError,
  9. filter_dict,
  10. parse_qs,
  11. smuggle_url,
  12. try_call,
  13. urlencode_postdata,
  14. )
  15. class ThisOldHouseIE(InfoExtractor):
  16. _NETRC_MACHINE = 'thisoldhouse'
  17. _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
  18. _TESTS = [{
  19. # Unresolved Brightcove URL embed (formerly Zype), free
  20. 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
  21. 'info_dict': {
  22. 'id': '6325298523112',
  23. 'ext': 'mp4',
  24. 'title': 'How to Build a Storage Bench',
  25. 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
  26. 'timestamp': 1681793639,
  27. 'upload_date': '20230418',
  28. 'duration': 674.54,
  29. 'tags': 'count:11',
  30. 'uploader_id': '6314471934001',
  31. 'thumbnail': r're:^https?://.*\.jpg',
  32. },
  33. 'params': {
  34. 'skip_download': True,
  35. },
  36. }, {
  37. # Brightcove embed, authwalled
  38. 'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
  39. 'info_dict': {
  40. 'id': '6349675446112',
  41. 'ext': 'mp4',
  42. 'title': 'E17 | Glen Ridge Generational | Multi-Generational',
  43. 'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
  44. 'timestamp': 1711382202,
  45. 'upload_date': '20240325',
  46. 'duration': 1422.229,
  47. 'tags': 'count:13',
  48. 'uploader_id': '6314471934001',
  49. 'thumbnail': r're:^https?://.*\.jpg',
  50. },
  51. 'expected_warnings': ['Login with password is not supported for this website'],
  52. 'params': {
  53. 'skip_download': True,
  54. },
  55. 'skip': 'Requires subscription',
  56. }, {
  57. # Page no longer has video
  58. 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
  59. 'only_matching': True,
  60. }, {
  61. # 404 Not Found
  62. 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
  63. 'only_matching': True,
  64. }, {
  65. # 404 Not Found
  66. 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
  67. 'only_matching': True,
  68. }, {
  69. 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
  70. 'only_matching': True,
  71. }, {
  72. # iframe www.thisoldhouse.com
  73. 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
  74. 'only_matching': True,
  75. }]
  76. _LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
  77. def _perform_login(self, username, password):
  78. self._request_webpage(
  79. HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
  80. urlh = self._request_webpage(
  81. 'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
  82. errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
  83. try:
  84. auth_form = self._download_webpage(
  85. self._LOGIN_URL, None, 'Submitting credentials', headers={
  86. 'Content-Type': 'application/json',
  87. 'Referer': urlh.url,
  88. }, data=json.dumps(filter_dict({
  89. **{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
  90. 'tenant': 'thisoldhouse',
  91. 'username': username,
  92. 'password': password,
  93. 'popup_options': {},
  94. 'sso': True,
  95. '_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
  96. '_intstate': 'deprecated',
  97. }), separators=(',', ':')).encode())
  98. except ExtractorError as e:
  99. if isinstance(e.cause, HTTPError) and e.cause.status == 401:
  100. raise ExtractorError('Invalid username or password', expected=True)
  101. raise
  102. self._request_webpage(
  103. 'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
  104. data=urlencode_postdata(self._hidden_inputs(auth_form)))
  105. def _real_extract(self, url):
  106. display_id = self._match_id(url)
  107. webpage = self._download_webpage(url, display_id)
  108. if 'To Unlock This content' in webpage:
  109. self.raise_login_required(
  110. 'This video is only available for subscribers. '
  111. 'Note that --cookies-from-browser may not work due to this site using session cookies')
  112. video_url, video_id = self._search_regex(
  113. r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
  114. webpage, 'zype url', group=(1, 2), default=(None, None))
  115. if video_url:
  116. video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
  117. return self.url_result(video_url, ZypeIE, video_id)
  118. video_url, video_id = self._search_regex([
  119. r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
  120. r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
  121. webpage, 'iframe url', group=(1, 2))
  122. if not parse_qs(video_url).get('videoId'):
  123. video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
  124. return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)