Browse Source

[networking] Ignore invalid proxies in env (#7704)

Authored by: coletdjnz
coletdjnz 1 year ago
parent
commit
bbeacff7fc
3 changed files with 19 additions and 7 deletions
  1. 5 3
      test/test_networking.py
  2. 7 3
      yt_dlp/networking/common.py
  3. 7 1
      yt_dlp/utils/networking.py

+ 5 - 3
test/test_networking.py

@@ -930,10 +930,10 @@ class TestRequestHandlerValidation:
         run_validation(handler, False, Request('http://', proxies={'http': None}))
         run_validation(handler, False, Request('http://'), proxies={'http': None})
 
-    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1'])
+    @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c'])
     @pytest.mark.parametrize('handler', ['Urllib'], indirect=True)
-    def test_missing_proxy_scheme(self, handler, proxy_url):
-        run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': 'example.com'}))
+    def test_invalid_proxy_url(self, handler, proxy_url):
+        run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url}))
 
     @pytest.mark.parametrize('handler,extensions,fail', [
         (handler_tests[0], extensions, fail)
@@ -1126,9 +1126,11 @@ class TestYoutubeDLNetworking:
         ('http', '__noproxy__', None),
         ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'),
         ('https', 'example.com', 'http://example.com'),
+        ('https', '//example.com', 'http://example.com'),
         ('https', 'socks5://example.com', 'socks5h://example.com'),
         ('http', 'socks://example.com', 'socks4://example.com'),
         ('http', 'socks4://example.com', 'socks4://example.com'),
+        ('unrelated', '/bad/proxy', '/bad/proxy'),  # clean_proxies should ignore bad proxies
     ])
     def test_clean_proxy(self, proxy_key, proxy_url, expected):
         # proxies should be cleaned in urlopen()

+ 7 - 3
yt_dlp/networking/common.py

@@ -262,9 +262,13 @@ class RequestHandler(abc.ABC):
                 # Skip proxy scheme checks
                 continue
 
-            # Scheme-less proxies are not supported
-            if urllib.request._parse_proxy(proxy_url)[0] is None:
-                raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+            try:
+                if urllib.request._parse_proxy(proxy_url)[0] is None:
+                    # Scheme-less proxies are not supported
+                    raise UnsupportedRequest(f'Proxy "{proxy_url}" missing scheme')
+            except ValueError as e:
+                # parse_proxy may raise on some invalid proxy urls such as "/a/b/c"
+                raise UnsupportedRequest(f'Invalid proxy url "{proxy_url}": {e}')
 
             scheme = urllib.parse.urlparse(proxy_url).scheme.lower()
             if scheme not in self._SUPPORTED_PROXY_SCHEMES:

+ 7 - 1
yt_dlp/utils/networking.py

@@ -98,7 +98,13 @@ def clean_proxies(proxies: dict, headers: HTTPHeaderDict):
             continue
         if proxy_url is not None:
             # Ensure proxies without a scheme are http.
-            proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+            try:
+                proxy_scheme = urllib.request._parse_proxy(proxy_url)[0]
+            except ValueError:
+                # Ignore invalid proxy URLs. Sometimes these may be introduced through environment
+                # variables unrelated to proxy settings - e.g. Colab `COLAB_LANGUAGE_SERVER_PROXY`.
+                # If the proxy is going to be used, the Request Handler proxy validation will handle it.
+                continue
             if proxy_scheme is None:
                 proxies[proxy_key] = 'http://' + remove_start(proxy_url, '//')