googlesearch.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import itertools
  2. import re
  3. from .common import SearchInfoExtractor
  4. class GoogleSearchIE(SearchInfoExtractor):
  5. IE_DESC = 'Google Video search'
  6. IE_NAME = 'video.google:search'
  7. _SEARCH_KEY = 'gvsearch'
  8. _TESTS = [{
  9. 'url': 'gvsearch15:python language',
  10. 'info_dict': {
  11. 'id': 'python language',
  12. 'title': 'python language',
  13. },
  14. 'playlist_count': 15,
  15. }]
  16. _PAGE_SIZE = 100
  17. def _search_results(self, query):
  18. for pagenum in itertools.count():
  19. webpage = self._download_webpage(
  20. 'http://www.google.com/search', f'gvsearch:{query}',
  21. note=f'Downloading result page {pagenum + 1}',
  22. query={
  23. 'tbm': 'vid',
  24. 'q': query,
  25. 'start': pagenum * self._PAGE_SIZE,
  26. 'num': self._PAGE_SIZE,
  27. 'hl': 'en',
  28. })
  29. for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
  30. yield self.url_result(url)
  31. if not re.search(r'id="pnnext"', webpage):
  32. return