make_lazy_extractors.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/env python3
  2. # Allow direct execution
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  6. from inspect import getsource
  7. from devscripts.utils import get_filename_args, read_file, write_file
  8. NO_ATTR = object()
  9. STATIC_CLASS_PROPERTIES = [
  10. 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching
  11. '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions
  12. 'age_limit', # Used for --age-limit (evaluated)
  13. '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated)
  14. ]
  15. CLASS_METHODS = [
  16. 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching
  17. 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation
  18. 'description', # Used for --extractor-descriptions
  19. 'is_suitable', # Used for --age-limit
  20. 'supports_login', 'is_single_video', # Accessed in CLI only with instance
  21. ]
  22. IE_TEMPLATE = '''
  23. class {name}({bases}):
  24. _module = {module!r}
  25. '''
  26. MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py')
  27. def main():
  28. os.environ['YTDLP_NO_PLUGINS'] = 'true'
  29. os.environ['YTDLP_NO_LAZY_EXTRACTORS'] = 'true'
  30. lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py')
  31. from yt_dlp.extractor.extractors import _ALL_CLASSES
  32. from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor
  33. DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR})
  34. module_src = '\n'.join((
  35. MODULE_TEMPLATE,
  36. ' _module = None',
  37. *extra_ie_code(DummyInfoExtractor),
  38. '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n',
  39. *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor),
  40. ))
  41. write_file(lazy_extractors_filename, f'{module_src}\n')
  42. def extra_ie_code(ie, base=None):
  43. for var in STATIC_CLASS_PROPERTIES:
  44. val = getattr(ie, var)
  45. if val != (getattr(base, var) if base else NO_ATTR):
  46. yield f' {var} = {val!r}'
  47. yield ''
  48. for name in CLASS_METHODS:
  49. f = getattr(ie, name)
  50. if not base or f.__func__ != getattr(base, name).__func__:
  51. yield getsource(f)
  52. def build_ies(ies, bases, attr_base):
  53. names = []
  54. for ie in sort_ies(ies, bases):
  55. yield build_lazy_ie(ie, ie.__name__, attr_base)
  56. if ie in ies:
  57. names.append(ie.__name__)
  58. yield f'\n_ALL_CLASSES = [{", ".join(names)}]'
  59. def sort_ies(ies, ignored_bases):
  60. """find the correct sorting and add the required base classes so that subclasses can be correctly created"""
  61. classes, returned_classes = ies[:-1], set()
  62. assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE'
  63. while classes:
  64. for c in classes[:]:
  65. bases = set(c.__bases__) - {object, *ignored_bases}
  66. restart = False
  67. for b in sorted(bases, key=lambda x: x.__name__):
  68. if b not in classes and b not in returned_classes:
  69. assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE'
  70. classes.insert(0, b)
  71. restart = True
  72. if restart:
  73. break
  74. if bases <= returned_classes:
  75. yield c
  76. returned_classes.add(c)
  77. classes.remove(c)
  78. break
  79. yield ies[-1]
  80. def build_lazy_ie(ie, name, attr_base):
  81. bases = ', '.join({
  82. 'InfoExtractor': 'LazyLoadExtractor',
  83. 'SearchInfoExtractor': 'LazyLoadSearchExtractor',
  84. }.get(base.__name__, base.__name__) for base in ie.__bases__)
  85. s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases)
  86. return s + '\n'.join(extra_ie_code(ie, attr_base))
  87. if __name__ == '__main__':
  88. main()