format_changelog.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. #!/usr/bin/env python
  2. # Copyright (c) 2014-2019, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. #
  5. # This script reformats a section of the changelog to wrap everything to
  6. # the right width and put blank lines in the right places. Eventually,
  7. # it might include a linter.
  8. #
  9. # To run it, pipe a section of the changelog (starting with "Changes
  10. # in Tor 0.x.y.z-alpha" through the script.)
  11. # Future imports for Python 2.7, mandatory in 3.0
  12. from __future__ import division
  13. from __future__ import print_function
  14. from __future__ import unicode_literals
  15. import os
  16. import re
  17. import sys
  18. import optparse
  19. # ==============================
  20. # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
  21. # algorithm, with totally ad hoc parameters!
  22. #
  23. # We're trying to minimize:
  24. # The total of the cubes of ragged space on underflowed intermediate lines,
  25. # PLUS
  26. # 100 * the fourth power of overflowed characters
  27. # PLUS
  28. # .1 * a bit more than the cube of ragged space on the last line.
  29. # PLUS
  30. # OPENPAREN_PENALTY for each line that starts with (
  31. #
  32. # We use an obvious dynamic programming algorithm to sorta approximate this.
  33. # It's not coded right or optimally, but it's fast enough for changelogs
  34. #
  35. # (Code found in an old directory of mine, lightly cleaned. -NM)
  36. NO_HYPHENATE=set("""
  37. pf-divert
  38. tor-resolve
  39. tor-gencert
  40. """.split())
  41. LASTLINE_UNDERFLOW_EXPONENT = 1
  42. LASTLINE_UNDERFLOW_PENALTY = 1
  43. UNDERFLOW_EXPONENT = 3
  44. UNDERFLOW_PENALTY = 1
  45. OVERFLOW_EXPONENT = 4
  46. OVERFLOW_PENALTY = 2000
  47. ORPHAN_PENALTY = 10000
  48. OPENPAREN_PENALTY = 200
  49. def generate_wrapping(words, divisions):
  50. lines = []
  51. last = 0
  52. for i in divisions:
  53. w = words[last:i]
  54. last = i
  55. line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  56. lines.append(line.strip())
  57. return lines
  58. def wrapping_quality(words, divisions, width1, width2):
  59. total = 0.0
  60. lines = generate_wrapping(words, divisions)
  61. for line in lines:
  62. length = len(line)
  63. if line is lines[0]:
  64. width = width1
  65. else:
  66. width = width2
  67. if line[0:1] == '(':
  68. total += OPENPAREN_PENALTY
  69. if length > width:
  70. total += OVERFLOW_PENALTY * (
  71. (length - width) ** OVERFLOW_EXPONENT )
  72. else:
  73. if line is lines[-1]:
  74. e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  75. if " " not in line:
  76. total += ORPHAN_PENALTY
  77. else:
  78. e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  79. total += p * ((width - length) ** e)
  80. return total
  81. def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  82. wrapping_after = [ (0,), ]
  83. w1 = width - prefix_len1
  84. w2 = width - prefix_len2
  85. for i in range(1, len(words)+1):
  86. best_so_far = None
  87. best_score = 1e300
  88. for j in range(i):
  89. t = wrapping_after[j]
  90. t1 = t[:-1] + (i,)
  91. t2 = t + (i,)
  92. wq1 = wrapping_quality(words, t1, w1, w2)
  93. wq2 = wrapping_quality(words, t2, w1, w2)
  94. if wq1 < best_score:
  95. best_so_far = t1
  96. best_score = wq1
  97. if wq2 < best_score:
  98. best_so_far = t2
  99. best_score = wq2
  100. wrapping_after.append( best_so_far )
  101. lines = generate_wrapping(words, wrapping_after[-1])
  102. return lines
  103. def hyphenatable(word):
  104. if "--" in word:
  105. return False
  106. if re.match(r'^[^\d\-]\D*-', word):
  107. stripped = re.sub(r'^\W+','',word)
  108. stripped = re.sub(r'\W+$','',word)
  109. return stripped not in NO_HYPHENATE
  110. else:
  111. return False
  112. def split_paragraph(s):
  113. "Split paragraph into words; tuned for Tor."
  114. r = []
  115. for word in s.split():
  116. if hyphenatable(word):
  117. while "-" in word:
  118. a,word = word.split("-",1)
  119. r.append(a+"\xff")
  120. r.append(word)
  121. return r
  122. def fill(text, width, initial_indent, subsequent_indent):
  123. words = split_paragraph(text)
  124. lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
  125. width)
  126. res = [ initial_indent, lines[0], "\n" ]
  127. for line in lines[1:]:
  128. res.append(subsequent_indent)
  129. res.append(line)
  130. res.append("\n")
  131. return "".join(res)
  132. # ==============================
  133. TP_MAINHEAD = 0
  134. TP_HEADTEXT = 1
  135. TP_BLANK = 2
  136. TP_SECHEAD = 3
  137. TP_ITEMFIRST = 4
  138. TP_ITEMBODY = 5
  139. TP_END = 6
  140. TP_PREHEAD = 7
  141. def head_parser(line):
  142. if re.match(r'^Changes in', line):
  143. return TP_MAINHEAD
  144. elif re.match(r'^[A-Za-z]', line):
  145. return TP_PREHEAD
  146. elif re.match(r'^ o ', line):
  147. return TP_SECHEAD
  148. elif re.match(r'^\s*$', line):
  149. return TP_BLANK
  150. else:
  151. return TP_HEADTEXT
  152. def body_parser(line):
  153. if re.match(r'^ o ', line):
  154. return TP_SECHEAD
  155. elif re.match(r'^ -',line):
  156. return TP_ITEMFIRST
  157. elif re.match(r'^ \S', line):
  158. return TP_ITEMBODY
  159. elif re.match(r'^\s*$', line):
  160. return TP_BLANK
  161. elif re.match(r'^Changes in', line):
  162. return TP_END
  163. elif re.match(r'^\s+\S', line):
  164. return TP_HEADTEXT
  165. else:
  166. print("Weird line %r"%line, file=sys.stderr)
  167. def clean_head(head):
  168. return head
  169. def head_score(s):
  170. m = re.match(r'^ +o (.*)', s)
  171. if not m:
  172. print("Can't score %r"%s, file=sys.stderr)
  173. return 99999
  174. lw = m.group(1).lower()
  175. if lw.startswith("security") and "feature" not in lw:
  176. score = -300
  177. elif lw.startswith("deprecated version"):
  178. score = -200
  179. elif lw.startswith("directory auth"):
  180. score = -150
  181. elif (('new' in lw and 'requirement' in lw) or
  182. ('new' in lw and 'dependenc' in lw) or
  183. ('build' in lw and 'requirement' in lw) or
  184. ('removed' in lw and 'platform' in lw)):
  185. score = -100
  186. elif lw.startswith("major feature"):
  187. score = 00
  188. elif lw.startswith("major bug"):
  189. score = 50
  190. elif lw.startswith("major"):
  191. score = 70
  192. elif lw.startswith("minor feature"):
  193. score = 200
  194. elif lw.startswith("minor bug"):
  195. score = 250
  196. elif lw.startswith("minor"):
  197. score = 270
  198. else:
  199. score = 1000
  200. if 'secur' in lw:
  201. score -= 2
  202. if "(other)" in lw:
  203. score += 2
  204. if '(' not in lw:
  205. score -= 1
  206. return score
  207. class ChangeLog(object):
  208. def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
  209. self.prehead = []
  210. self.mainhead = None
  211. self.headtext = []
  212. self.curgraf = None
  213. self.sections = []
  214. self.cursection = None
  215. self.lineno = 0
  216. self.wrapText = wrapText
  217. self.blogOrder = blogOrder
  218. self.drupalBreak = drupalBreak
  219. def addLine(self, tp, line):
  220. self.lineno += 1
  221. if tp == TP_MAINHEAD:
  222. assert not self.mainhead
  223. self.mainhead = line
  224. elif tp == TP_PREHEAD:
  225. self.prehead.append(line)
  226. elif tp == TP_HEADTEXT:
  227. if self.curgraf is None:
  228. self.curgraf = []
  229. self.headtext.append(self.curgraf)
  230. self.curgraf.append(line)
  231. elif tp == TP_BLANK:
  232. self.curgraf = None
  233. elif tp == TP_SECHEAD:
  234. self.cursection = [ self.lineno, line, [] ]
  235. self.sections.append(self.cursection)
  236. elif tp == TP_ITEMFIRST:
  237. item = ( self.lineno, [ [line] ])
  238. self.curgraf = item[1][0]
  239. self.cursection[2].append(item)
  240. elif tp == TP_ITEMBODY:
  241. if self.curgraf is None:
  242. self.curgraf = []
  243. self.cursection[2][-1][1].append(self.curgraf)
  244. self.curgraf.append(line)
  245. else:
  246. assert False # This should be unreachable.
  247. def lint_head(self, line, head):
  248. m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
  249. if not m:
  250. print("Weird header format on line %s"%line, file=sys.stderr)
  251. def lint_item(self, line, grafs, head_type):
  252. pass
  253. def lint(self):
  254. self.head_lines = {}
  255. for sec_line, sec_head, items in self.sections:
  256. head_type = self.lint_head(sec_line, sec_head)
  257. for item_line, grafs in items:
  258. self.lint_item(item_line, grafs, head_type)
  259. def dumpGraf(self,par,indent1,indent2=-1):
  260. if not self.wrapText:
  261. for line in par:
  262. print(line)
  263. return
  264. if indent2 == -1:
  265. indent2 = indent1
  266. text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
  267. sys.stdout.write(fill(text,
  268. width=72,
  269. initial_indent=" "*indent1,
  270. subsequent_indent=" "*indent2))
  271. def dumpPreheader(self, graf):
  272. self.dumpGraf(graf, 0)
  273. print()
  274. def dumpMainhead(self, head):
  275. print(head)
  276. def dumpHeadGraf(self, graf):
  277. self.dumpGraf(graf, 2)
  278. print()
  279. def dumpSectionHeader(self, header):
  280. print(header)
  281. def dumpStartOfSections(self):
  282. pass
  283. def dumpEndOfSections(self):
  284. pass
  285. def dumpEndOfSection(self):
  286. print()
  287. def dumpEndOfChangelog(self):
  288. print()
  289. def dumpDrupalBreak(self):
  290. pass
  291. def dumpItem(self, grafs):
  292. self.dumpGraf(grafs[0],4,6)
  293. for par in grafs[1:]:
  294. print()
  295. self.dumpGraf(par,6,6)
  296. def collateAndSortSections(self):
  297. heads = []
  298. sectionsByHead = { }
  299. for _, head, items in self.sections:
  300. head = clean_head(head)
  301. try:
  302. s = sectionsByHead[head]
  303. except KeyError:
  304. s = sectionsByHead[head] = []
  305. heads.append( (head_score(head), head.lower(), head, s) )
  306. s.extend(items)
  307. heads.sort()
  308. self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
  309. def dump(self):
  310. if self.prehead:
  311. self.dumpPreheader(self.prehead)
  312. if not self.blogOrder:
  313. self.dumpMainhead(self.mainhead)
  314. for par in self.headtext:
  315. self.dumpHeadGraf(par)
  316. if self.blogOrder:
  317. self.dumpMainhead(self.mainhead)
  318. drupalBreakAfter = None
  319. if self.drupalBreak and len(self.sections) > 4:
  320. drupalBreakAfter = self.sections[1][2]
  321. self.dumpStartOfSections()
  322. for _,head,items in self.sections:
  323. if not head.endswith(':'):
  324. print("adding : to %r"%head, file=sys.stderr)
  325. head = head + ":"
  326. self.dumpSectionHeader(head)
  327. for _,grafs in items:
  328. self.dumpItem(grafs)
  329. self.dumpEndOfSection()
  330. if items is drupalBreakAfter:
  331. self.dumpDrupalBreak()
  332. self.dumpEndOfSections()
  333. self.dumpEndOfChangelog()
  334. # Map from issue prefix to pair of (visible prefix, url prefix)
  335. ISSUE_PREFIX_MAP = {
  336. "" : ( "", "tpo/core/tor" ),
  337. "tor#" : ( "", "tpo/core/tor" ),
  338. "chutney#" : ( "chutney#", "tpo/core/chutney" ),
  339. "torspec#" : ( "torspec#", "tpo/core/torspec" ),
  340. "trunnel#" : ( "trunnel#", "tpo/core/trunnel" ),
  341. "torsocks#" : ( "torsocks#", "tpo/core/torsocks"),
  342. }
  343. # Let's turn bugs to html.
  344. BUG_PAT = re.compile('(bug|ticket|issue|feature)\s+([\w/]+#)?(\d{4,6})', re.I)
  345. def bug_html(m):
  346. kind = m.group(1)
  347. prefix = m.group(2) or ""
  348. bugno = m.group(3)
  349. try:
  350. disp_prefix, url_prefix = ISSUE_PREFIX_MAP[prefix]
  351. except KeyError:
  352. print("Can't figure out URL for {}{}".formt(prefix,bugno),
  353. file=sys.stderr)
  354. return "{} {}{}".format(kind, prefix, bugno)
  355. return "{} <a href='https://bugs.torproject.org/{}/{}'>{}{}</a>".format(
  356. kind, url_prefix, bugno, disp_prefix, bugno)
  357. class HTMLChangeLog(ChangeLog):
  358. def __init__(self, *args, **kwargs):
  359. ChangeLog.__init__(self, *args, **kwargs)
  360. def htmlText(self, graf):
  361. output = []
  362. for line in graf:
  363. line = line.rstrip().replace("&","&amp;")
  364. line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
  365. output.append(line.strip())
  366. output = " ".join(output)
  367. output = BUG_PAT.sub(bug_html, output)
  368. sys.stdout.write(output)
  369. def htmlPar(self, graf):
  370. sys.stdout.write("<p>")
  371. self.htmlText(graf)
  372. sys.stdout.write("</p>\n")
  373. def dumpPreheader(self, graf):
  374. self.htmlPar(graf)
  375. def dumpMainhead(self, head):
  376. sys.stdout.write("<h2>%s</h2>"%head)
  377. def dumpHeadGraf(self, graf):
  378. self.htmlPar(graf)
  379. def dumpSectionHeader(self, header):
  380. header = header.replace(" o ", "", 1).lstrip()
  381. sys.stdout.write(" <li>%s\n"%header)
  382. sys.stdout.write(" <ul>\n")
  383. def dumpEndOfSection(self):
  384. sys.stdout.write(" </ul>\n\n")
  385. def dumpEndOfChangelog(self):
  386. pass
  387. def dumpStartOfSections(self):
  388. print("<ul>\n")
  389. def dumpEndOfSections(self):
  390. print("</ul>\n")
  391. def dumpDrupalBreak(self):
  392. print("\n</ul>\n")
  393. print("<p>&nbsp;</p>")
  394. print("\n<!--break-->\n\n")
  395. print("<ul>")
  396. def dumpItem(self, grafs):
  397. grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
  398. sys.stdout.write(" <li>")
  399. if len(grafs) > 1:
  400. for par in grafs:
  401. self.htmlPar(par)
  402. else:
  403. self.htmlText(grafs[0])
  404. print()
  405. op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
  406. op.add_option('-W', '--no-wrap', action='store_false',
  407. dest='wrapText', default=True,
  408. help='Do not re-wrap paragraphs')
  409. op.add_option('-S', '--no-sort', action='store_false',
  410. dest='sort', default=True,
  411. help='Do not sort or collate sections')
  412. op.add_option('-o', '--output', dest='output',
  413. default='-', metavar='FILE', help="write output to FILE")
  414. op.add_option('-H', '--html', action='store_true',
  415. dest='html', default=False,
  416. help="generate an HTML fragment")
  417. op.add_option('-1', '--first', action='store_true',
  418. dest='firstOnly', default=False,
  419. help="write only the first section")
  420. op.add_option('-b', '--blog-header', action='store_true',
  421. dest='blogOrder', default=False,
  422. help="Write the header in blog order")
  423. op.add_option('-B', '--blog', action='store_true',
  424. dest='blogFormat', default=False,
  425. help="Set all other options as appropriate for a blog post")
  426. op.add_option('--inplace', action='store_true',
  427. dest='inplace', default=False,
  428. help="Alter the ChangeLog in place")
  429. op.add_option('--drupal-break', action='store_true',
  430. dest='drupalBreak', default=False,
  431. help='Insert a drupal-friendly <!--break--> as needed')
  432. options,args = op.parse_args()
  433. if options.blogFormat:
  434. options.blogOrder = True
  435. options.html = True
  436. options.sort = False
  437. options.wrapText = False
  438. options.firstOnly = True
  439. options.drupalBreak = True
  440. if len(args) > 1:
  441. op.error("Too many arguments")
  442. elif len(args) == 0:
  443. fname = 'ChangeLog'
  444. else:
  445. fname = args[0]
  446. if options.inplace:
  447. assert options.output == '-'
  448. options.output = fname
  449. if fname != '-':
  450. sys.stdin = open(fname, 'r')
  451. nextline = None
  452. if options.html:
  453. ChangeLogClass = HTMLChangeLog
  454. else:
  455. ChangeLogClass = ChangeLog
  456. CL = ChangeLogClass(wrapText=options.wrapText,
  457. blogOrder=options.blogOrder,
  458. drupalBreak=options.drupalBreak)
  459. parser = head_parser
  460. for line in sys.stdin:
  461. line = line.rstrip()
  462. tp = parser(line)
  463. if tp == TP_SECHEAD:
  464. parser = body_parser
  465. elif tp == TP_END:
  466. nextline = line
  467. break
  468. CL.addLine(tp,line)
  469. CL.lint()
  470. if options.output != '-':
  471. fname_new = options.output+".new"
  472. fname_out = options.output
  473. sys.stdout = open(fname_new, 'w')
  474. else:
  475. fname_new = fname_out = None
  476. if options.sort:
  477. CL.collateAndSortSections()
  478. CL.dump()
  479. if options.firstOnly:
  480. sys.exit(0)
  481. if nextline is not None:
  482. print(nextline)
  483. for line in sys.stdin:
  484. sys.stdout.write(line)
  485. if fname_new is not None:
  486. os.rename(fname_new, fname_out)