Browse Source

Fix lazy extractor bug in fe7866d0ed6bfa3904ce12b049a3424fdc0ea1fa

and add test

Fixes https://github.com/yt-dlp/yt-dlp/pull/3234#issuecomment-1225347071
pukkandan 2 years ago
parent
commit
e5458d1d88

+ 7 - 4
devscripts/lazy_load_template.py

@@ -11,14 +11,17 @@ from ..utils import (
 
 
 # These bloat the lazy_extractors, so allow them to passthrough silently
 # These bloat the lazy_extractors, so allow them to passthrough silently
 ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
 ALLOWED_CLASSMETHODS = {'get_testcases', 'extract_from_webpage'}
+_WARNED = False
 
 
 
 
 class LazyLoadMetaClass(type):
 class LazyLoadMetaClass(type):
     def __getattr__(cls, name):
     def __getattr__(cls, name):
-        if '_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS:
-            write_string(
-                'WARNING: Falling back to normal extractor since lazy extractor '
-                f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
+        global _WARNED
+        if ('_real_class' not in cls.__dict__
+                and name not in ALLOWED_CLASSMETHODS and not _WARNED):
+            _WARNED = True
+            write_string('WARNING: Falling back to normal extractor since lazy extractor '
+                         f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n')
         return getattr(cls.real_class, name)
         return getattr(cls.real_class, name)
 
 
 
 

+ 3 - 1
devscripts/make_lazy_extractors.py

@@ -12,7 +12,9 @@ from inspect import getsource
 from devscripts.utils import get_filename_args, read_file, write_file
 from devscripts.utils import get_filename_args, read_file, write_file
 
 
 NO_ATTR = object()
 NO_ATTR = object()
-STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_NETRC_MACHINE', 'age_limit']
+STATIC_CLASS_PROPERTIES = [
+    'IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_VALID_URL', '_WORKING', '_ENABLED', '_NETRC_MACHINE', 'age_limit'
+]
 CLASS_METHODS = [
 CLASS_METHODS = [
     'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
     'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable'
 ]
 ]

+ 23 - 18
test/test_execution.py

@@ -11,41 +11,46 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import contextlib
 import contextlib
 import subprocess
 import subprocess
 
 
-from yt_dlp.utils import encodeArgument
+from yt_dlp.utils import Popen
 
 
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py'
 
 
 
 
-try:
-    _DEV_NULL = subprocess.DEVNULL
-except AttributeError:
-    _DEV_NULL = open(os.devnull, 'wb')
+class TestExecution(unittest.TestCase):
+    def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )):
+        stdout, stderr, returncode = Popen.run(
+            [*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        print(stderr, file=sys.stderr)
+        self.assertEqual(returncode, 0)
+        return stdout.strip(), stderr.strip()
 
 
+    def test_main_exec(self):
+        self.run_yt_dlp()
 
 
-class TestExecution(unittest.TestCase):
     def test_import(self):
     def test_import(self):
-        subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir)
+        self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp'))
 
 
     def test_module_exec(self):
     def test_module_exec(self):
-        subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
-
-    def test_main_exec(self):
-        subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+        self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp'))
 
 
     def test_cmdline_umlauts(self):
     def test_cmdline_umlauts(self):
-        p = subprocess.Popen(
-            [sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('ä'), '--version'],
-            cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
-        _, stderr = p.communicate()
+        _, stderr = self.run_yt_dlp(opts=('ä', '--version'))
         self.assertFalse(stderr)
         self.assertFalse(stderr)
 
 
     def test_lazy_extractors(self):
     def test_lazy_extractors(self):
         try:
         try:
-            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL)
-            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL)
+            subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS],
+                                  cwd=rootDir, stdout=subprocess.DEVNULL)
+            self.assertTrue(os.path.exists(LAZY_EXTRACTORS))
+
+            _, stderr = self.run_yt_dlp(opts=('-s', 'test:'))
+            self.assertFalse(stderr)
+
+            subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL)
         finally:
         finally:
             with contextlib.suppress(OSError):
             with contextlib.suppress(OSError):
-                os.remove('yt_dlp/extractor/lazy_extractors.py')
+                os.remove(LAZY_EXTRACTORS)
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':

+ 3 - 1
yt_dlp/extractor/testurl.py

@@ -8,12 +8,14 @@ class TestURLIE(InfoExtractor):
     """ Allows addressing of the test cases as test:yout.*be_1 """
     """ Allows addressing of the test cases as test:yout.*be_1 """
 
 
     IE_DESC = False  # Do not list
     IE_DESC = False  # Do not list
-    _VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$'
+    _VALID_URL = r'test(?:url)?:(?P<extractor>.*?)(?:_(?P<num>[0-9]+))?$'
 
 
     def _real_extract(self, url):
     def _real_extract(self, url):
         from . import gen_extractor_classes
         from . import gen_extractor_classes
 
 
         extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
         extractor_id, num = self._match_valid_url(url).group('extractor', 'num')
+        if not extractor_id:
+            return {'id': ':test', 'title': '', 'url': url}
 
 
         rex = re.compile(extractor_id, flags=re.IGNORECASE)
         rex = re.compile(extractor_id, flags=re.IGNORECASE)
         matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]
         matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)]