useragents.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. import re
  2. import typing as t
  3. import warnings
  4. from .user_agent import UserAgent as _BaseUserAgent
  5. if t.TYPE_CHECKING:
  6. from _typeshed.wsgi import WSGIEnvironment
  7. class _UserAgentParser:
  8. platform_rules: t.ClassVar[t.Iterable[t.Tuple[str, str]]] = (
  9. (" cros ", "chromeos"),
  10. ("iphone|ios", "iphone"),
  11. ("ipad", "ipad"),
  12. (r"darwin\b|mac\b|os\s*x", "macos"),
  13. ("win", "windows"),
  14. (r"android", "android"),
  15. ("netbsd", "netbsd"),
  16. ("openbsd", "openbsd"),
  17. ("freebsd", "freebsd"),
  18. ("dragonfly", "dragonflybsd"),
  19. ("(sun|i86)os", "solaris"),
  20. (r"x11\b|lin(\b|ux)?", "linux"),
  21. (r"nintendo\s+wii", "wii"),
  22. ("irix", "irix"),
  23. ("hp-?ux", "hpux"),
  24. ("aix", "aix"),
  25. ("sco|unix_sv", "sco"),
  26. ("bsd", "bsd"),
  27. ("amiga", "amiga"),
  28. ("blackberry|playbook", "blackberry"),
  29. ("symbian", "symbian"),
  30. )
  31. browser_rules: t.ClassVar[t.Iterable[t.Tuple[str, str]]] = (
  32. ("googlebot", "google"),
  33. ("msnbot", "msn"),
  34. ("yahoo", "yahoo"),
  35. ("ask jeeves", "ask"),
  36. (r"aol|america\s+online\s+browser", "aol"),
  37. (r"opera|opr", "opera"),
  38. ("edge|edg", "edge"),
  39. ("chrome|crios", "chrome"),
  40. ("seamonkey", "seamonkey"),
  41. ("firefox|firebird|phoenix|iceweasel", "firefox"),
  42. ("galeon", "galeon"),
  43. ("safari|version", "safari"),
  44. ("webkit", "webkit"),
  45. ("camino", "camino"),
  46. ("konqueror", "konqueror"),
  47. ("k-meleon", "kmeleon"),
  48. ("netscape", "netscape"),
  49. (r"msie|microsoft\s+internet\s+explorer|trident/.+? rv:", "msie"),
  50. ("lynx", "lynx"),
  51. ("links", "links"),
  52. ("Baiduspider", "baidu"),
  53. ("bingbot", "bing"),
  54. ("mozilla", "mozilla"),
  55. )
  56. _browser_version_re = r"(?:{pattern})[/\sa-z(]*(\d+[.\da-z]+)?"
  57. _language_re = re.compile(
  58. r"(?:;\s*|\s+)(\b\w{2}\b(?:-\b\w{2}\b)?)\s*;|"
  59. r"(?:\(|\[|;)\s*(\b\w{2}\b(?:-\b\w{2}\b)?)\s*(?:\]|\)|;)"
  60. )
  61. def __init__(self) -> None:
  62. self.platforms = [(b, re.compile(a, re.I)) for a, b in self.platform_rules]
  63. self.browsers = [
  64. (b, re.compile(self._browser_version_re.format(pattern=a), re.I))
  65. for a, b in self.browser_rules
  66. ]
  67. def __call__(
  68. self, user_agent: str
  69. ) -> t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str], t.Optional[str]]:
  70. platform: t.Optional[str]
  71. browser: t.Optional[str]
  72. version: t.Optional[str]
  73. language: t.Optional[str]
  74. for platform, regex in self.platforms: # noqa: B007
  75. match = regex.search(user_agent)
  76. if match is not None:
  77. break
  78. else:
  79. platform = None
  80. # Except for Trident, all browser key words come after the last ')'
  81. last_closing_paren = 0
  82. if (
  83. not re.compile(r"trident/.+? rv:", re.I).search(user_agent)
  84. and ")" in user_agent
  85. and user_agent[-1] != ")"
  86. ):
  87. last_closing_paren = user_agent.rindex(")")
  88. for browser, regex in self.browsers: # noqa: B007
  89. match = regex.search(user_agent[last_closing_paren:])
  90. if match is not None:
  91. version = match.group(1)
  92. break
  93. else:
  94. browser = version = None
  95. match = self._language_re.search(user_agent)
  96. if match is not None:
  97. language = match.group(1) or match.group(2)
  98. else:
  99. language = None
  100. return platform, browser, version, language
  101. # It wasn't public, but users might have imported it anyway, show a
  102. # warning if a user created an instance.
  103. class UserAgentParser(_UserAgentParser):
  104. """A simple user agent parser. Used by the `UserAgent`.
  105. .. deprecated:: 2.0
  106. Will be removed in Werkzeug 2.1. Use a dedicated parser library
  107. instead.
  108. """
  109. def __init__(self) -> None:
  110. warnings.warn(
  111. "'UserAgentParser' is deprecated and will be removed in"
  112. " Werkzeug 2.1. Use a dedicated parser library instead.",
  113. DeprecationWarning,
  114. stacklevel=2,
  115. )
  116. super().__init__()
  117. class _deprecated_property(property):
  118. def __init__(self, fget: t.Callable[["_UserAgent"], t.Any]) -> None:
  119. super().__init__(fget)
  120. self.message = (
  121. "The built-in user agent parser is deprecated and will be"
  122. f" removed in Werkzeug 2.1. The {fget.__name__!r} property"
  123. " will be 'None'. Subclass 'werkzeug.user_agent.UserAgent'"
  124. " and set 'Request.user_agent_class' to use a different"
  125. " parser."
  126. )
  127. def __get__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
  128. warnings.warn(self.message, DeprecationWarning, stacklevel=3)
  129. return super().__get__(*args, **kwargs)
  130. # This is what Request.user_agent returns for now, only show warnings on
  131. # attribute access, not creation.
  132. class _UserAgent(_BaseUserAgent):
  133. _parser = _UserAgentParser()
  134. def __init__(self, string: str) -> None:
  135. super().__init__(string)
  136. info = self._parser(string)
  137. self._platform, self._browser, self._version, self._language = info
  138. @_deprecated_property
  139. def platform(self) -> t.Optional[str]: # type: ignore
  140. return self._platform
  141. @_deprecated_property
  142. def browser(self) -> t.Optional[str]: # type: ignore
  143. return self._browser
  144. @_deprecated_property
  145. def version(self) -> t.Optional[str]: # type: ignore
  146. return self._version
  147. @_deprecated_property
  148. def language(self) -> t.Optional[str]: # type: ignore
  149. return self._language
  150. # This is what users might be importing, show warnings on create.
  151. class UserAgent(_UserAgent):
  152. """Represents a parsed user agent header value.
  153. This uses a basic parser to try to extract some information from the
  154. header.
  155. :param environ_or_string: The header value to parse, or a WSGI
  156. environ containing the header.
  157. .. deprecated:: 2.0
  158. Will be removed in Werkzeug 2.1. Subclass
  159. :class:`werkzeug.user_agent.UserAgent` (note the new module
  160. name) to use a dedicated parser instead.
  161. .. versionchanged:: 2.0
  162. Passing a WSGI environ is deprecated and will be removed in 2.1.
  163. """
  164. def __init__(self, environ_or_string: "t.Union[str, WSGIEnvironment]") -> None:
  165. if isinstance(environ_or_string, dict):
  166. warnings.warn(
  167. "Passing an environ to 'UserAgent' is deprecated and"
  168. " will be removed in Werkzeug 2.1. Pass the header"
  169. " value string instead.",
  170. DeprecationWarning,
  171. stacklevel=2,
  172. )
  173. string = environ_or_string.get("HTTP_USER_AGENT", "")
  174. else:
  175. string = environ_or_string
  176. warnings.warn(
  177. "The 'werkzeug.useragents' module is deprecated and will be"
  178. " removed in Werkzeug 2.1. The new base API is"
  179. " 'werkzeug.user_agent.UserAgent'.",
  180. DeprecationWarning,
  181. stacklevel=2,
  182. )
  183. super().__init__(string)