Browse Source

[cookies] Move `YoutubeDLCookieJar` to cookies module (#7091)

Authored by: coletdjnz
coletdjnz 1 year ago
parent
commit
b87e01c123

+ 7 - 1
test/test_YoutubeDLCookieJar.py

@@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 import re
 import tempfile
 
-from yt_dlp.utils import YoutubeDLCookieJar
+from yt_dlp.cookies import YoutubeDLCookieJar
 
 
 class TestYoutubeDLCookieJar(unittest.TestCase):
@@ -47,6 +47,12 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
         # will be ignored
         self.assertFalse(cookiejar._cookies)
 
+    def test_get_cookie_header(self):
+        cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt')
+        cookiejar.load(ignore_discard=True, ignore_expires=True)
+        header = cookiejar.get_cookie_header('https://www.foobar.foobar')
+        self.assertIn('HTTPONLY_COOKIE', header)
+
 
 if __name__ == '__main__':
     unittest.main()

+ 3 - 4
yt_dlp/YoutubeDL.py

@@ -2404,7 +2404,7 @@ class YoutubeDL:
         if 'Youtubedl-No-Compression' in res:  # deprecated
             res.pop('Youtubedl-No-Compression', None)
             res['Accept-Encoding'] = 'identity'
-        cookies = self._calc_cookies(info_dict['url'])
+        cookies = self.cookiejar.get_cookie_header(info_dict['url'])
         if cookies:
             res['Cookie'] = cookies
 
@@ -2416,9 +2416,8 @@ class YoutubeDL:
         return res
 
     def _calc_cookies(self, url):
-        pr = sanitized_Request(url)
-        self.cookiejar.add_cookie_header(pr)
-        return pr.get_header('Cookie')
+        self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version')
+        return self.cookiejar.get_cookie_header(url)
 
     def _sort_thumbnails(self, thumbnails):
         thumbnails.sort(key=lambda t: (

+ 143 - 1
yt_dlp/cookies.py

@@ -1,7 +1,9 @@
 import base64
+import collections
 import contextlib
 import http.cookiejar
 import http.cookies
+import io
 import json
 import os
 import re
@@ -11,6 +13,7 @@ import subprocess
 import sys
 import tempfile
 import time
+import urllib.request
 from datetime import datetime, timedelta, timezone
 from enum import Enum, auto
 from hashlib import pbkdf2_hmac
@@ -29,11 +32,14 @@ from .dependencies import (
 from .minicurses import MultilinePrinter, QuietMultilinePrinter
 from .utils import (
     Popen,
-    YoutubeDLCookieJar,
     error_to_str,
+    escape_url,
     expand_path,
     is_path_like,
+    sanitize_url,
+    str_or_none,
     try_call,
+    write_string,
 )
 
 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'}
@@ -1091,3 +1097,139 @@ class LenientSimpleCookie(http.cookies.SimpleCookie):
 
             else:
                 morsel = None
+
+
+class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
+    """
+    See [1] for cookie file format.
+
+    1. https://curl.haxx.se/docs/http-cookies.html
+    """
+    _HTTPONLY_PREFIX = '#HttpOnly_'
+    _ENTRY_LEN = 7
+    _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by yt-dlp.  Do not edit.
+
+'''
+    _CookieFileEntry = collections.namedtuple(
+        'CookieFileEntry',
+        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
+
+    def __init__(self, filename=None, *args, **kwargs):
+        super().__init__(None, *args, **kwargs)
+        if is_path_like(filename):
+            filename = os.fspath(filename)
+        self.filename = filename
+
+    @staticmethod
+    def _true_or_false(cndn):
+        return 'TRUE' if cndn else 'FALSE'
+
+    @contextlib.contextmanager
+    def open(self, file, *, write=False):
+        if is_path_like(file):
+            with open(file, 'w' if write else 'r', encoding='utf-8') as f:
+                yield f
+        else:
+            if write:
+                file.truncate(0)
+            yield file
+
+    def _really_save(self, f, ignore_discard=False, ignore_expires=False):
+        now = time.time()
+        for cookie in self:
+            if (not ignore_discard and cookie.discard
+                    or not ignore_expires and cookie.is_expired(now)):
+                continue
+            name, value = cookie.name, cookie.value
+            if value is None:
+                # cookies.txt regards 'Set-Cookie: foo' as a cookie
+                # with no name, whereas http.cookiejar regards it as a
+                # cookie with no value.
+                name, value = '', name
+            f.write('%s\n' % '\t'.join((
+                cookie.domain,
+                self._true_or_false(cookie.domain.startswith('.')),
+                cookie.path,
+                self._true_or_false(cookie.secure),
+                str_or_none(cookie.expires, default=''),
+                name, value
+            )))
+
+    def save(self, filename=None, *args, **kwargs):
+        """
+        Save cookies to a file.
+        Code is taken from CPython 3.6
+        https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
+
+        if filename is None:
+            if self.filename is not None:
+                filename = self.filename
+            else:
+                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
+
+        # Store session cookies with `expires` set to 0 instead of an empty string
+        for cookie in self:
+            if cookie.expires is None:
+                cookie.expires = 0
+
+        with self.open(filename, write=True) as f:
+            f.write(self._HEADER)
+            self._really_save(f, *args, **kwargs)
+
+    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+        """Load cookies from a file."""
+        if filename is None:
+            if self.filename is not None:
+                filename = self.filename
+            else:
+                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
+
+        def prepare_line(line):
+            if line.startswith(self._HTTPONLY_PREFIX):
+                line = line[len(self._HTTPONLY_PREFIX):]
+            # comments and empty lines are fine
+            if line.startswith('#') or not line.strip():
+                return line
+            cookie_list = line.split('\t')
+            if len(cookie_list) != self._ENTRY_LEN:
+                raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
+            cookie = self._CookieFileEntry(*cookie_list)
+            if cookie.expires_at and not cookie.expires_at.isdigit():
+                raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+            return line
+
+        cf = io.StringIO()
+        with self.open(filename) as f:
+            for line in f:
+                try:
+                    cf.write(prepare_line(line))
+                except http.cookiejar.LoadError as e:
+                    if f'{line.strip()} '[0] in '[{"':
+                        raise http.cookiejar.LoadError(
+                            'Cookies file must be Netscape formatted, not JSON. See  '
+                            'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
+                    write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
+                    continue
+        cf.seek(0)
+        self._really_load(cf, filename, ignore_discard, ignore_expires)
+        # Session cookies are denoted by either `expires` field set to
+        # an empty string or 0. MozillaCookieJar only recognizes the former
+        # (see [1]). So we need force the latter to be recognized as session
+        # cookies on our own.
+        # Session cookies may be important for cookies-based authentication,
+        # e.g. usually, when user does not check 'Remember me' check box while
+        # logging in on a site, some important cookies are stored as session
+        # cookies so that not recognizing them will result in failed login.
+        # 1. https://bugs.python.org/issue17164
+        for cookie in self:
+            # Treat `expires=0` cookies as session cookies
+            if cookie.expires == 0:
+                cookie.expires = None
+                cookie.discard = True
+
+    def get_cookie_header(self, url):
+        """Generate a Cookie HTTP header for a given url"""
+        cookie_req = urllib.request.Request(escape_url(sanitize_url(url)))
+        self.add_cookie_header(cookie_req)
+        return cookie_req.get_header('Cookie')

+ 1 - 1
yt_dlp/extractor/common.py

@@ -3444,7 +3444,7 @@ class InfoExtractor:
 
     def _get_cookies(self, url):
         """ Return a http.cookies.SimpleCookie with the cookies for the url """
-        return LenientSimpleCookie(self._downloader._calc_cookies(url))
+        return LenientSimpleCookie(self._downloader.cookiejar.get_cookie_header(url))
 
     def _apply_first_set_cookie_header(self, url_handle, cookie):
         """

+ 3 - 0
yt_dlp/utils/_legacy.py

@@ -10,6 +10,9 @@ from ._utils import decode_base_n, preferredencoding
 from .traversal import traverse_obj
 from ..dependencies import certifi, websockets
 
+# isort: split
+from ..cookies import YoutubeDLCookieJar  # noqa: F401
+
 has_certifi = bool(certifi)
 has_websockets = bool(websockets)
 

+ 0 - 130
yt_dlp/utils/_utils.py

@@ -1518,136 +1518,6 @@ def is_path_like(f):
     return isinstance(f, (str, bytes, os.PathLike))
 
 
-class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
-    """
-    See [1] for cookie file format.
-
-    1. https://curl.haxx.se/docs/http-cookies.html
-    """
-    _HTTPONLY_PREFIX = '#HttpOnly_'
-    _ENTRY_LEN = 7
-    _HEADER = '''# Netscape HTTP Cookie File
-# This file is generated by yt-dlp.  Do not edit.
-
-'''
-    _CookieFileEntry = collections.namedtuple(
-        'CookieFileEntry',
-        ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
-
-    def __init__(self, filename=None, *args, **kwargs):
-        super().__init__(None, *args, **kwargs)
-        if is_path_like(filename):
-            filename = os.fspath(filename)
-        self.filename = filename
-
-    @staticmethod
-    def _true_or_false(cndn):
-        return 'TRUE' if cndn else 'FALSE'
-
-    @contextlib.contextmanager
-    def open(self, file, *, write=False):
-        if is_path_like(file):
-            with open(file, 'w' if write else 'r', encoding='utf-8') as f:
-                yield f
-        else:
-            if write:
-                file.truncate(0)
-            yield file
-
-    def _really_save(self, f, ignore_discard=False, ignore_expires=False):
-        now = time.time()
-        for cookie in self:
-            if (not ignore_discard and cookie.discard
-                    or not ignore_expires and cookie.is_expired(now)):
-                continue
-            name, value = cookie.name, cookie.value
-            if value is None:
-                # cookies.txt regards 'Set-Cookie: foo' as a cookie
-                # with no name, whereas http.cookiejar regards it as a
-                # cookie with no value.
-                name, value = '', name
-            f.write('%s\n' % '\t'.join((
-                cookie.domain,
-                self._true_or_false(cookie.domain.startswith('.')),
-                cookie.path,
-                self._true_or_false(cookie.secure),
-                str_or_none(cookie.expires, default=''),
-                name, value
-            )))
-
-    def save(self, filename=None, *args, **kwargs):
-        """
-        Save cookies to a file.
-        Code is taken from CPython 3.6
-        https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
-
-        if filename is None:
-            if self.filename is not None:
-                filename = self.filename
-            else:
-                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
-
-        # Store session cookies with `expires` set to 0 instead of an empty string
-        for cookie in self:
-            if cookie.expires is None:
-                cookie.expires = 0
-
-        with self.open(filename, write=True) as f:
-            f.write(self._HEADER)
-            self._really_save(f, *args, **kwargs)
-
-    def load(self, filename=None, ignore_discard=False, ignore_expires=False):
-        """Load cookies from a file."""
-        if filename is None:
-            if self.filename is not None:
-                filename = self.filename
-            else:
-                raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
-
-        def prepare_line(line):
-            if line.startswith(self._HTTPONLY_PREFIX):
-                line = line[len(self._HTTPONLY_PREFIX):]
-            # comments and empty lines are fine
-            if line.startswith('#') or not line.strip():
-                return line
-            cookie_list = line.split('\t')
-            if len(cookie_list) != self._ENTRY_LEN:
-                raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
-            cookie = self._CookieFileEntry(*cookie_list)
-            if cookie.expires_at and not cookie.expires_at.isdigit():
-                raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
-            return line
-
-        cf = io.StringIO()
-        with self.open(filename) as f:
-            for line in f:
-                try:
-                    cf.write(prepare_line(line))
-                except http.cookiejar.LoadError as e:
-                    if f'{line.strip()} '[0] in '[{"':
-                        raise http.cookiejar.LoadError(
-                            'Cookies file must be Netscape formatted, not JSON. See  '
-                            'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
-                    write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
-                    continue
-        cf.seek(0)
-        self._really_load(cf, filename, ignore_discard, ignore_expires)
-        # Session cookies are denoted by either `expires` field set to
-        # an empty string or 0. MozillaCookieJar only recognizes the former
-        # (see [1]). So we need force the latter to be recognized as session
-        # cookies on our own.
-        # Session cookies may be important for cookies-based authentication,
-        # e.g. usually, when user does not check 'Remember me' check box while
-        # logging in on a site, some important cookies are stored as session
-        # cookies so that not recognizing them will result in failed login.
-        # 1. https://bugs.python.org/issue17164
-        for cookie in self:
-            # Treat `expires=0` cookies as session cookies
-            if cookie.expires == 0:
-                cookie.expires = None
-                cookie.discard = True
-
-
 class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
     def __init__(self, cookiejar=None):
         urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)