annotate_ifdef_directives.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. #!/usr/bin/env python
  2. # Copyright (c) 2017-2019, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. r"""
  5. This script iterates over a list of C files. For each file, it looks at the
  6. #if/#else C macros, and annotates them with comments explaining what they
  7. match.
  8. For example, it replaces this kind of input...
  9. >>> INPUT = '''
  10. ... #ifdef HAVE_OCELOT
  11. ... C code here
  12. ... #if MIMSY == BOROGROVE
  13. ... block 1
  14. ... block 1
  15. ... block 1
  16. ... block 1
  17. ... #else
  18. ... block 2
  19. ... block 2
  20. ... block 2
  21. ... block 2
  22. ... #endif
  23. ... #endif
  24. ... '''
  25. With this kind of output:
  26. >>> EXPECTED_OUTPUT = '''
  27. ... #ifdef HAVE_OCELOT
  28. ... C code here
  29. ... #if MIMSY == BOROGROVE
  30. ... block 1
  31. ... block 1
  32. ... block 1
  33. ... block 1
  34. ... #else /* !(MIMSY == BOROGROVE) */
  35. ... block 2
  36. ... block 2
  37. ... block 2
  38. ... block 2
  39. ... #endif /* MIMSY == BOROGROVE */
  40. ... #endif /* defined(HAVE_OCELOT) */
  41. ... '''
  42. Here's how to use it:
  43. >>> import sys
  44. >>> if sys.version_info.major < 3: from cStringIO import StringIO
  45. >>> if sys.version_info.major >= 3: from io import StringIO
  46. >>> OUTPUT = StringIO()
  47. >>> translate(StringIO(INPUT), OUTPUT)
  48. >>> assert OUTPUT.getvalue() == EXPECTED_OUTPUT
  49. Note that only #else and #endif lines are annotated. Existing comments
  50. on those lines are removed.
  51. """
  52. # Future imports for Python 2.7, mandatory in 3.0
  53. from __future__ import division
  54. from __future__ import print_function
  55. from __future__ import unicode_literals
  56. import re
  57. # Any block with fewer than this many lines does not need annotations.
  58. LINE_OBVIOUSNESS_LIMIT = 4
  59. # Maximum line width. This includes a terminating newline character.
  60. #
  61. # (This is the maximum before encoding, so that if the the operating system
  62. # uses multiple characters to encode newline, that's still okay.)
  63. LINE_WIDTH=80
  64. class Problem(Exception):
  65. pass
  66. def close_parens_needed(expr):
  67. """Return the number of left-parentheses needed to make 'expr'
  68. balanced.
  69. >>> close_parens_needed("1+2")
  70. 0
  71. >>> close_parens_needed("(1 + 2)")
  72. 0
  73. >>> close_parens_needed("(1 + 2")
  74. 1
  75. >>> close_parens_needed("(1 + (2 *")
  76. 2
  77. >>> close_parens_needed("(1 + (2 * 3) + (4")
  78. 2
  79. """
  80. return expr.count("(") - expr.count(")")
  81. def truncate_expression(expr, new_width):
  82. """Given a parenthesized C expression in 'expr', try to return a new
  83. expression that is similar to 'expr', but no more than 'new_width'
  84. characters long.
  85. Try to return an expression with balanced parentheses.
  86. >>> truncate_expression("1+2+3", 8)
  87. '1+2+3'
  88. >>> truncate_expression("1+2+3+4+5", 8)
  89. '1+2+3...'
  90. >>> truncate_expression("(1+2+3+4)", 8)
  91. '(1+2...)'
  92. >>> truncate_expression("(1+(2+3+4))", 8)
  93. '(1+...)'
  94. >>> truncate_expression("(((((((((", 8)
  95. '((...))'
  96. """
  97. if len(expr) <= new_width:
  98. # The expression is already short enough.
  99. return expr
  100. ellipsis = "..."
  101. # Start this at the minimum that we might truncate.
  102. n_to_remove = len(expr) + len(ellipsis) - new_width
  103. # Try removing characters, one by one, until we get something where
  104. # re-balancing the parentheses still fits within the limit.
  105. while n_to_remove < len(expr):
  106. truncated = expr[:-n_to_remove] + ellipsis
  107. truncated += ")" * close_parens_needed(truncated)
  108. if len(truncated) <= new_width:
  109. return truncated
  110. n_to_remove += 1
  111. return ellipsis
  112. def commented_line(fmt, argument, maxwidth=LINE_WIDTH):
  113. # (This is a raw docstring so that our doctests can use \.)
  114. r"""
  115. Return fmt%argument, for use as a commented line. If the line would
  116. be longer than maxwidth, truncate argument but try to keep its
  117. parentheses balanced.
  118. Requires that fmt%"..." will fit into maxwidth characters.
  119. Requires that fmt ends with a newline.
  120. >>> commented_line("/* %s */\n", "hello world", 32)
  121. '/* hello world */\n'
  122. >>> commented_line("/* %s */\n", "hello world", 15)
  123. '/* hello... */\n'
  124. >>> commented_line("#endif /* %s */\n", "((1+2) && defined(FOO))", 32)
  125. '#endif /* ((1+2) && defi...) */\n'
  126. The default line limit is 80 characters including the newline:
  127. >>> long_argument = "long " * 100
  128. >>> long_line = commented_line("#endif /* %s */\n", long_argument)
  129. >>> len(long_line)
  130. 80
  131. >>> long_line[:40]
  132. '#endif /* long long long long long long '
  133. >>> long_line[40:]
  134. 'long long long long long long lon... */\n'
  135. If a line works out to being 80 characters naturally, it isn't truncated,
  136. and no ellipsis is added.
  137. >>> medium_argument = "a"*66
  138. >>> medium_line = commented_line("#endif /* %s */\n", medium_argument)
  139. >>> len(medium_line)
  140. 80
  141. >>> "..." in medium_line
  142. False
  143. >>> medium_line[:40]
  144. '#endif /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
  145. >>> medium_line[40:]
  146. 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */\n'
  147. """
  148. assert fmt.endswith("\n")
  149. result = fmt % argument
  150. if len(result) <= maxwidth:
  151. return result
  152. else:
  153. # How long can we let the argument be? Try filling in the
  154. # format with an empty argument to find out.
  155. max_arg_width = maxwidth - len(fmt % "")
  156. result = fmt % truncate_expression(argument, max_arg_width)
  157. assert len(result) <= maxwidth
  158. return result
  159. def negate(expr):
  160. """Return a negated version of expr; try to avoid double-negation.
  161. We usually wrap expressions in parentheses and add a "!".
  162. >>> negate("A && B")
  163. '!(A && B)'
  164. But if we recognize the expression as negated, we can restore it.
  165. >>> negate(negate("A && B"))
  166. 'A && B'
  167. The same applies for defined(FOO).
  168. >>> negate("defined(FOO)")
  169. '!defined(FOO)'
  170. >>> negate(negate("defined(FOO)"))
  171. 'defined(FOO)'
  172. Internal parentheses don't confuse us:
  173. >>> negate("!(FOO) && !(BAR)")
  174. '!(!(FOO) && !(BAR))'
  175. """
  176. expr = expr.strip()
  177. # See whether we match !(...), with no intervening close-parens.
  178. m = re.match(r'^!\s*\(([^\)]*)\)$', expr)
  179. if m:
  180. return m.group(1)
  181. # See whether we match !?defined(...), with no intervening close-parens.
  182. m = re.match(r'^(!?)\s*(defined\([^\)]*\))$', expr)
  183. if m:
  184. if m.group(1) == "!":
  185. prefix = ""
  186. else:
  187. prefix = "!"
  188. return prefix + m.group(2)
  189. return "!(%s)" % expr
  190. def uncomment(s):
  191. """
  192. Remove existing trailing comments from an #else or #endif line.
  193. """
  194. s = re.sub(r'//.*','',s)
  195. s = re.sub(r'/\*.*','',s)
  196. return s.strip()
  197. def translate(f_in, f_out):
  198. """
  199. Read a file from f_in, and write its annotated version to f_out.
  200. """
  201. # A stack listing our current if/else state. Each member of the stack
  202. # is a list of directives. Each directive is a 3-tuple of
  203. # (command, rest, lineno)
  204. # where "command" is one of if/ifdef/ifndef/else/elif, and where
  205. # "rest" is an expression in a format suitable for use with #if, and where
  206. # lineno is the line number where the directive occurred.
  207. stack = []
  208. # the stack element corresponding to the top level of the file.
  209. whole_file = []
  210. cur_level = whole_file
  211. lineno = 0
  212. for line in f_in:
  213. lineno += 1
  214. m = re.match(r'\s*#\s*(if|ifdef|ifndef|else|endif|elif)\b\s*(.*)',
  215. line)
  216. if not m:
  217. # no directive, so we can just write it out.
  218. f_out.write(line)
  219. continue
  220. command,rest = m.groups()
  221. if command in ("if", "ifdef", "ifndef"):
  222. # The #if directive pushes us one level lower on the stack.
  223. if command == 'ifdef':
  224. rest = "defined(%s)"%uncomment(rest)
  225. elif command == 'ifndef':
  226. rest = "!defined(%s)"%uncomment(rest)
  227. elif rest.endswith("\\"):
  228. rest = rest[:-1]+"..."
  229. rest = uncomment(rest)
  230. new_level = [ (command, rest, lineno) ]
  231. stack.append(cur_level)
  232. cur_level = new_level
  233. f_out.write(line)
  234. elif command in ("else", "elif"):
  235. # We stay at the same level on the stack. If we have an #else,
  236. # we comment it.
  237. if len(cur_level) == 0 or cur_level[-1][0] == 'else':
  238. raise Problem("Unexpected #%s on %d"% (command,lineno))
  239. if (len(cur_level) == 1 and command == 'else' and
  240. lineno > cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT):
  241. f_out.write(commented_line("#else /* %s */\n",
  242. negate(cur_level[0][1])))
  243. else:
  244. f_out.write(line)
  245. cur_level.append((command, rest, lineno))
  246. else:
  247. # We pop one element on the stack, and comment an endif.
  248. assert command == 'endif'
  249. if len(stack) == 0:
  250. raise Problem("Unmatched #%s on %s"% (command,lineno))
  251. if lineno <= cur_level[0][2] + LINE_OBVIOUSNESS_LIMIT:
  252. f_out.write(line)
  253. elif len(cur_level) == 1 or (
  254. len(cur_level) == 2 and cur_level[1][0] == 'else'):
  255. f_out.write(commented_line("#endif /* %s */\n",
  256. cur_level[0][1]))
  257. else:
  258. f_out.write(commented_line("#endif /* %s || ... */\n",
  259. cur_level[0][1]))
  260. cur_level = stack.pop()
  261. if len(stack) or cur_level != whole_file:
  262. raise Problem("Missing #endif")
  263. if __name__ == '__main__':
  264. import sys,os
  265. if sys.argv[1] == "--self-test":
  266. import doctest
  267. doctest.testmod()
  268. sys.exit(0)
  269. for fn in sys.argv[1:]:
  270. with open(fn+"_OUT", 'w') as output_file:
  271. translate(open(fn, 'r'), output_file)
  272. os.rename(fn+"_OUT", fn)