make_changelog.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. from __future__ import annotations
  2. # Allow direct execution
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  6. import enum
  7. import itertools
  8. import json
  9. import logging
  10. import re
  11. from collections import defaultdict
  12. from dataclasses import dataclass
  13. from functools import lru_cache
  14. from pathlib import Path
  15. from devscripts.utils import read_file, run_process, write_file
  16. BASE_URL = 'https://github.com'
  17. LOCATION_PATH = Path(__file__).parent
  18. HASH_LENGTH = 7
  19. logger = logging.getLogger(__name__)
  20. class CommitGroup(enum.Enum):
  21. UPSTREAM = None
  22. PRIORITY = 'Important'
  23. CORE = 'Core'
  24. EXTRACTOR = 'Extractor'
  25. DOWNLOADER = 'Downloader'
  26. POSTPROCESSOR = 'Postprocessor'
  27. MISC = 'Misc.'
  28. @classmethod
  29. @lru_cache
  30. def commit_lookup(cls):
  31. return {
  32. name: group
  33. for group, names in {
  34. cls.PRIORITY: {''},
  35. cls.UPSTREAM: {'upstream'},
  36. cls.CORE: {
  37. 'aes',
  38. 'cache',
  39. 'compat_utils',
  40. 'compat',
  41. 'cookies',
  42. 'core',
  43. 'dependencies',
  44. 'jsinterp',
  45. 'outtmpl',
  46. 'plugins',
  47. 'update',
  48. 'utils',
  49. },
  50. cls.MISC: {
  51. 'build',
  52. 'cleanup',
  53. 'devscripts',
  54. 'docs',
  55. 'misc',
  56. 'test',
  57. },
  58. cls.EXTRACTOR: {'extractor', 'extractors'},
  59. cls.DOWNLOADER: {'downloader'},
  60. cls.POSTPROCESSOR: {'postprocessor'},
  61. }.items()
  62. for name in names
  63. }
  64. @classmethod
  65. def get(cls, value):
  66. result = cls.commit_lookup().get(value)
  67. if result:
  68. logger.debug(f'Mapped {value!r} => {result.name}')
  69. return result
  70. @dataclass
  71. class Commit:
  72. hash: str | None
  73. short: str
  74. authors: list[str]
  75. def __str__(self):
  76. result = f'{self.short!r}'
  77. if self.hash:
  78. result += f' ({self.hash[:HASH_LENGTH]})'
  79. if self.authors:
  80. authors = ', '.join(self.authors)
  81. result += f' by {authors}'
  82. return result
  83. @dataclass
  84. class CommitInfo:
  85. details: str | None
  86. sub_details: tuple[str, ...]
  87. message: str
  88. issues: list[str]
  89. commit: Commit
  90. fixes: list[Commit]
  91. def key(self):
  92. return ((self.details or '').lower(), self.sub_details, self.message)
  93. class Changelog:
  94. MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
  95. def __init__(self, groups, repo):
  96. self._groups = groups
  97. self._repo = repo
  98. def __str__(self):
  99. return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
  100. def _format_groups(self, groups):
  101. for item in CommitGroup:
  102. group = groups[item]
  103. if group:
  104. yield self.format_module(item.value, group)
  105. def format_module(self, name, group):
  106. result = f'\n#### {name} changes\n' if name else '\n'
  107. return result + '\n'.join(self._format_group(group))
  108. def _format_group(self, group):
  109. sorted_group = sorted(group, key=CommitInfo.key)
  110. detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
  111. for _, items in detail_groups:
  112. items = list(items)
  113. details = items[0].details
  114. if not details:
  115. indent = ''
  116. else:
  117. yield f'- {details}'
  118. indent = '\t'
  119. if details == 'cleanup':
  120. items, cleanup_misc_items = self._filter_cleanup_misc_items(items)
  121. sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
  122. for sub_details, entries in sub_detail_groups:
  123. if not sub_details:
  124. for entry in entries:
  125. yield f'{indent}- {self.format_single_change(entry)}'
  126. continue
  127. entries = list(entries)
  128. prefix = f'{indent}- {", ".join(entries[0].sub_details)}'
  129. if len(entries) == 1:
  130. yield f'{prefix}: {self.format_single_change(entries[0])}'
  131. continue
  132. yield prefix
  133. for entry in entries:
  134. yield f'{indent}\t- {self.format_single_change(entry)}'
  135. if details == 'cleanup' and cleanup_misc_items:
  136. yield from self._format_cleanup_misc_sub_group(cleanup_misc_items)
  137. def _filter_cleanup_misc_items(self, items):
  138. cleanup_misc_items = defaultdict(list)
  139. non_misc_items = []
  140. for item in items:
  141. if self.MISC_RE.search(item.message):
  142. cleanup_misc_items[tuple(item.commit.authors)].append(item)
  143. else:
  144. non_misc_items.append(item)
  145. return non_misc_items, cleanup_misc_items
  146. def _format_cleanup_misc_sub_group(self, group):
  147. prefix = '\t- Miscellaneous'
  148. if len(group) == 1:
  149. yield f'{prefix}: {next(self._format_cleanup_misc_items(group))}'
  150. return
  151. yield prefix
  152. for message in self._format_cleanup_misc_items(group):
  153. yield f'\t\t- {message}'
  154. def _format_cleanup_misc_items(self, group):
  155. for authors, infos in group.items():
  156. message = ', '.join(
  157. self._format_message_link(None, info.commit.hash)
  158. for info in sorted(infos, key=lambda item: item.commit.hash or ''))
  159. yield f'{message} by {self._format_authors(authors)}'
  160. def format_single_change(self, info):
  161. message = self._format_message_link(info.message, info.commit.hash)
  162. if info.issues:
  163. message = f'{message} ({self._format_issues(info.issues)})'
  164. if info.commit.authors:
  165. message = f'{message} by {self._format_authors(info.commit.authors)}'
  166. if info.fixes:
  167. fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
  168. authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
  169. if authors != info.commit.authors:
  170. fix_message = f'{fix_message} by {self._format_authors(authors)}'
  171. message = f'{message} (With fixes in {fix_message})'
  172. return message
  173. def _format_message_link(self, message, hash):
  174. assert message or hash, 'Improperly defined commit message or override'
  175. message = message if message else hash[:HASH_LENGTH]
  176. return f'[{message}]({self.repo_url}/commit/{hash})' if hash else message
  177. def _format_issues(self, issues):
  178. return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
  179. @staticmethod
  180. def _format_authors(authors):
  181. return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
  182. @property
  183. def repo_url(self):
  184. return f'{BASE_URL}/{self._repo}'
  185. class CommitRange:
  186. COMMAND = 'git'
  187. COMMIT_SEPARATOR = '-----'
  188. AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
  189. MESSAGE_RE = re.compile(r'''
  190. (?:\[
  191. (?P<prefix>[^\]\/:,]+)
  192. (?:/(?P<details>[^\]:,]+))?
  193. (?:[:,](?P<sub_details>[^\]]+))?
  194. \]\ )?
  195. (?:(?P<sub_details_alt>`?[^:`]+`?): )?
  196. (?P<message>.+?)
  197. (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
  198. ''', re.VERBOSE | re.DOTALL)
  199. EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
  200. FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert)\s+([\da-f]{40})')
  201. UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
  202. def __init__(self, start, end, default_author=None):
  203. self._start, self._end = start, end
  204. self._commits, self._fixes = self._get_commits_and_fixes(default_author)
  205. self._commits_added = []
  206. def __iter__(self):
  207. return iter(itertools.chain(self._commits.values(), self._commits_added))
  208. def __len__(self):
  209. return len(self._commits) + len(self._commits_added)
  210. def __contains__(self, commit):
  211. if isinstance(commit, Commit):
  212. if not commit.hash:
  213. return False
  214. commit = commit.hash
  215. return commit in self._commits
  216. def _get_commits_and_fixes(self, default_author):
  217. result = run_process(
  218. self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
  219. f'{self._start}..{self._end}' if self._start else self._end).stdout
  220. commits = {}
  221. fixes = defaultdict(list)
  222. lines = iter(result.splitlines(False))
  223. for i, commit_hash in enumerate(lines):
  224. short = next(lines)
  225. skip = short.startswith('Release ') or short == '[version] update'
  226. authors = [default_author] if default_author else []
  227. for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
  228. match = self.AUTHOR_INDICATOR_RE.match(line)
  229. if match:
  230. authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
  231. commit = Commit(commit_hash, short, authors)
  232. if skip and (self._start or not i):
  233. logger.debug(f'Skipped commit: {commit}')
  234. continue
  235. elif skip:
  236. logger.debug(f'Reached Release commit, breaking: {commit}')
  237. break
  238. fix_match = self.FIXES_RE.search(commit.short)
  239. if fix_match:
  240. commitish = fix_match.group(1)
  241. fixes[commitish].append(commit)
  242. commits[commit.hash] = commit
  243. for commitish, fix_commits in fixes.items():
  244. if commitish in commits:
  245. hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
  246. logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
  247. for fix_commit in fix_commits:
  248. del commits[fix_commit.hash]
  249. else:
  250. logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
  251. return commits, fixes
  252. def apply_overrides(self, overrides):
  253. for override in overrides:
  254. when = override.get('when')
  255. if when and when not in self and when != self._start:
  256. logger.debug(f'Ignored {when!r}, not in commits {self._start!r}')
  257. continue
  258. override_hash = override.get('hash')
  259. if override['action'] == 'add':
  260. commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
  261. logger.info(f'ADD {commit}')
  262. self._commits_added.append(commit)
  263. elif override['action'] == 'remove':
  264. if override_hash in self._commits:
  265. logger.info(f'REMOVE {self._commits[override_hash]}')
  266. del self._commits[override_hash]
  267. elif override['action'] == 'change':
  268. if override_hash not in self._commits:
  269. continue
  270. commit = Commit(override_hash, override['short'], override['authors'])
  271. logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
  272. self._commits[commit.hash] = commit
  273. self._commits = {key: value for key, value in reversed(self._commits.items())}
  274. def groups(self):
  275. groups = defaultdict(list)
  276. for commit in self:
  277. upstream_re = self.UPSTREAM_MERGE_RE.match(commit.short)
  278. if upstream_re:
  279. commit.short = f'[upstream] Merge up to youtube-dl {upstream_re.group(1)}'
  280. match = self.MESSAGE_RE.fullmatch(commit.short)
  281. if not match:
  282. logger.error(f'Error parsing short commit message: {commit.short!r}')
  283. continue
  284. prefix, details, sub_details, sub_details_alt, message, issues = match.groups()
  285. group = None
  286. if prefix:
  287. if prefix == 'priority':
  288. prefix, _, details = (details or '').partition('/')
  289. logger.debug(f'Priority: {message!r}')
  290. group = CommitGroup.PRIORITY
  291. if not details and prefix:
  292. if prefix not in ('core', 'downloader', 'extractor', 'misc', 'postprocessor', 'upstream'):
  293. logger.debug(f'Replaced details with {prefix!r}')
  294. details = prefix or None
  295. if details == 'common':
  296. details = None
  297. if details:
  298. details = details.strip()
  299. else:
  300. group = CommitGroup.CORE
  301. sub_details = f'{sub_details or ""},{sub_details_alt or ""}'.replace(':', ',')
  302. sub_details = tuple(filter(None, map(str.strip, sub_details.split(','))))
  303. issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
  304. if not group:
  305. group = CommitGroup.get(prefix.lower())
  306. if not group:
  307. if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
  308. group = CommitGroup.EXTRACTOR
  309. else:
  310. group = CommitGroup.POSTPROCESSOR
  311. logger.warning(f'Failed to map {commit.short!r}, selected {group.name}')
  312. commit_info = CommitInfo(
  313. details, sub_details, message.strip(),
  314. issues, commit, self._fixes[commit.hash])
  315. logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
  316. groups[group].append(commit_info)
  317. return groups
  318. def get_new_contributors(contributors_path, commits):
  319. contributors = set()
  320. if contributors_path.exists():
  321. for line in read_file(contributors_path).splitlines():
  322. author, _, _ = line.strip().partition(' (')
  323. authors = author.split('/')
  324. contributors.update(map(str.casefold, authors))
  325. new_contributors = set()
  326. for commit in commits:
  327. for author in commit.authors:
  328. author_folded = author.casefold()
  329. if author_folded not in contributors:
  330. contributors.add(author_folded)
  331. new_contributors.add(author)
  332. return sorted(new_contributors, key=str.casefold)
  333. if __name__ == '__main__':
  334. import argparse
  335. parser = argparse.ArgumentParser(
  336. description='Create a changelog markdown from a git commit range')
  337. parser.add_argument(
  338. 'commitish', default='HEAD', nargs='?',
  339. help='The commitish to create the range from (default: %(default)s)')
  340. parser.add_argument(
  341. '-v', '--verbosity', action='count', default=0,
  342. help='increase verbosity (can be used twice)')
  343. parser.add_argument(
  344. '-c', '--contributors', action='store_true',
  345. help='update CONTRIBUTORS file (default: %(default)s)')
  346. parser.add_argument(
  347. '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
  348. help='path to the CONTRIBUTORS file')
  349. parser.add_argument(
  350. '--no-override', action='store_true',
  351. help='skip override json in commit generation (default: %(default)s)')
  352. parser.add_argument(
  353. '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
  354. help='path to the changelog_override.json file')
  355. parser.add_argument(
  356. '--default-author', default='pukkandan',
  357. help='the author to use without a author indicator (default: %(default)s)')
  358. parser.add_argument(
  359. '--repo', default='yt-dlp/yt-dlp',
  360. help='the github repository to use for the operations (default: %(default)s)')
  361. args = parser.parse_args()
  362. logging.basicConfig(
  363. datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
  364. level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
  365. commits = CommitRange(None, args.commitish, args.default_author)
  366. if not args.no_override:
  367. if args.override_path.exists():
  368. overrides = json.loads(read_file(args.override_path))
  369. commits.apply_overrides(overrides)
  370. else:
  371. logger.warning(f'File {args.override_path.as_posix()} does not exist')
  372. logger.info(f'Loaded {len(commits)} commits')
  373. new_contributors = get_new_contributors(args.contributors_path, commits)
  374. if new_contributors:
  375. if args.contributors:
  376. write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
  377. logger.info(f'New contributors: {", ".join(new_contributors)}')
  378. print(Changelog(commits.groups(), args.repo))