metadataparser.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import re
  2. from .common import PostProcessor
  3. from ..utils import Namespace, filter_dict, function_with_repr
  4. class MetadataParserPP(PostProcessor):
  5. def __init__(self, downloader, actions):
  6. super().__init__(downloader)
  7. self._actions = []
  8. for f in actions:
  9. action, *args = f
  10. assert action in self.Actions
  11. self._actions.append(action(self, *args))
  12. @classmethod
  13. def validate_action(cls, action, *data):
  14. """Each action can be:
  15. (Actions.INTERPRET, from, to) OR
  16. (Actions.REPLACE, field, search, replace)
  17. """
  18. if action not in cls.Actions:
  19. raise ValueError(f'{action!r} is not a valid action')
  20. action(cls, *data) # So this can raise error to validate
  21. @staticmethod
  22. def field_to_template(tmpl):
  23. if re.match(r'[a-zA-Z_]+$', tmpl):
  24. return f'%({tmpl})s'
  25. from ..YoutubeDL import YoutubeDL
  26. err = YoutubeDL.validate_outtmpl(tmpl)
  27. if err:
  28. raise err
  29. return tmpl
  30. @staticmethod
  31. def format_to_regex(fmt):
  32. r"""
  33. Converts a string like
  34. '%(title)s - %(artist)s'
  35. to a regex like
  36. '(?P<title>.+)\ \-\ (?P<artist>.+)'
  37. """
  38. if not re.search(r'%\(\w+\)s', fmt):
  39. return fmt
  40. lastpos = 0
  41. regex = ''
  42. # replace %(..)s with regex group and escape other string parts
  43. for match in re.finditer(r'%\((\w+)\)s', fmt):
  44. regex += re.escape(fmt[lastpos:match.start()])
  45. regex += rf'(?P<{match.group(1)}>.+)'
  46. lastpos = match.end()
  47. if lastpos < len(fmt):
  48. regex += re.escape(fmt[lastpos:])
  49. return regex
  50. def run(self, info):
  51. for f in self._actions:
  52. f(info)
  53. return [], info
  54. @function_with_repr
  55. def interpretter(self, inp, out):
  56. def f(info):
  57. data_to_parse = self._downloader.evaluate_outtmpl(template, info)
  58. self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}')
  59. match = out_re.search(data_to_parse)
  60. if match is None:
  61. self.to_screen(f'Could not interpret {inp!r} as {out!r}')
  62. return
  63. for attribute, value in filter_dict(match.groupdict()).items():
  64. info[attribute] = value
  65. self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}')
  66. template = self.field_to_template(inp)
  67. out_re = re.compile(self.format_to_regex(out))
  68. return f
  69. @function_with_repr
  70. def replacer(self, field, search, replace):
  71. def f(info):
  72. val = info.get(field)
  73. if val is None:
  74. self.to_screen(f'Video does not have a {field}')
  75. return
  76. elif not isinstance(val, str):
  77. self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
  78. return
  79. self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}')
  80. info[field], n = search_re.subn(replace, val)
  81. if n:
  82. self.to_screen(f'Changed {field} to: {info[field]}')
  83. else:
  84. self.to_screen(f'Did not find {search!r} in {field}')
  85. search_re = re.compile(search)
  86. return f
  87. Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer)
  88. class MetadataFromFieldPP(MetadataParserPP):
  89. @classmethod
  90. def to_action(cls, f):
  91. match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
  92. if match is None:
  93. raise ValueError(f'it should be FROM:TO, not {f!r}')
  94. return (
  95. cls.Actions.INTERPRET,
  96. match.group('in').replace('\\:', ':'),
  97. match.group('out'),
  98. )
  99. def __init__(self, downloader, formats):
  100. super().__init__(downloader, [self.to_action(f) for f in formats])
  101. # Deprecated
  102. class MetadataFromTitlePP(MetadataParserPP):
  103. def __init__(self, downloader, titleformat):
  104. super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)])
  105. self.deprecation_warning(
  106. 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated '
  107. 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead')