codetool.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #!/usr/bin/env python3
  2. # Copyright (c) 2020, The Tor Project, Inc.
  3. # See LICENSE for licensing information.
  4. #
  5. # DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
  6. #
  7. # WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
  8. # ALONG WITH THE TOOLS THAT ACHIEVE IT.
  9. # (12 Feb 2020)
  10. #
  11. """
  12. This program uses a set of plugable filters to inspect and transform
  13. our C code.
  14. """
  15. import os
  16. import re
  17. import sys
  18. class Filter:
  19. """A Filter transforms a string containing a C program."""
  20. def __init__(self):
  21. pass
  22. def transform(self, s):
  23. return s
  24. class CompoundFilt(Filter):
  25. """A CompoundFilt runs another set of filters, in sequence."""
  26. def __init__(self, items=()):
  27. super().__init__()
  28. self._filters = list(items)
  29. def add(self, filt):
  30. self._filters.append(filt)
  31. return self
  32. def transform(self, s):
  33. for f in self._filters:
  34. s = f.transform(s)
  35. return s
  36. class SplitError(Exception):
  37. """Exception: raised if split_comments() can't understand a C file."""
  38. pass
  39. def split_comments(s):
  40. r"""Iterate over the C code in 's', and yield a sequence of (code,
  41. comment) pairs. Each pair will contain either a nonempty piece
  42. of code, a nonempty comment, or both.
  43. >>> list(split_comments("hello // world\n"))
  44. [('hello ', '// world'), ('\n', '')]
  45. >>> list(split_comments("a /* b cd */ efg // hi"))
  46. [('a ', '/* b cd */'), (' efg ', '// hi')]
  47. """
  48. # Matches a block of code without any comments.
  49. PAT_CODE = re.compile(r'''^(?: [^/"']+ |
  50. "(?:[^\\"]+|\\.)*" |
  51. '(?:[^\\']+|\\.)*' |
  52. /[^/*]
  53. )*''', re.VERBOSE|re.DOTALL)
  54. # Matches a C99 "//" comment.
  55. PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
  56. # Matches a C "/* */" comment.
  57. PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
  58. while True:
  59. # Find some non-comment code at the start of the string.
  60. m = PAT_CODE.match(s)
  61. # If we found some code here, save it and advance the string.
  62. # Otherwise set 'code' to "".
  63. if m:
  64. code = m.group(0)
  65. s = s[m.end():]
  66. else:
  67. code = ""
  68. # Now we have a comment, or the end of the string. Find out which
  69. # one, and how long it is.
  70. if s.startswith("//"):
  71. m = PAT_C99_COMMENT.match(s)
  72. else:
  73. m = PAT_C_COMMENT.match(s)
  74. # If we got a comment, save it and advance the string. Otherwise
  75. # set 'comment' to "".
  76. if m:
  77. comment = m.group(0)
  78. s = s[m.end():]
  79. else:
  80. comment = ""
  81. # If we found no code and no comment, we should be at the end of
  82. # the string...
  83. if code == "" and comment == "":
  84. if s:
  85. # But in case we *aren't* at the end of the string, raise
  86. # an error.
  87. raise SplitError()
  88. # ... all is well, we're done scanning the code.
  89. return
  90. yield (code, comment)
  91. class IgnoreCommentsFilt(Filter):
  92. """Wrapper: applies another filter to C code only, excluding comments.
  93. """
  94. def __init__(self, filt):
  95. super().__init__()
  96. self._filt = filt
  97. def transform(self, s):
  98. result = []
  99. for code, comment in split_comments(s):
  100. result.append(self._filt.transform(code))
  101. result.append(comment)
  102. return "".join(result)
  103. class RegexFilt(Filter):
  104. """A regex filter applies a regular expression to some C code."""
  105. def __init__(self, pat, replacement, flags=0):
  106. super().__init__()
  107. self._pat = re.compile(pat, flags)
  108. self._replacement = replacement
  109. def transform(self, s):
  110. s, _ = self._pat.subn(self._replacement, s)
  111. return s
  112. def revise(fname, filt):
  113. """Run 'filt' on the contents of the file in 'fname'. If any
  114. changes are made, then replace the file with its new contents.
  115. Otherwise, leave the file alone.
  116. """
  117. contents = open(fname, 'r').read()
  118. result = filt.transform(contents)
  119. if result == contents:
  120. return
  121. tmpname = "{}_codetool_tmp".format(fname)
  122. try:
  123. with open(tmpname, 'w') as f:
  124. f.write(result)
  125. os.rename(tmpname, fname)
  126. except:
  127. os.unlink(tmpname)
  128. raise
  129. ##############################
  130. # Filtering rules.
  131. ##############################
  132. # Make sure that there is a newline after the first comma in a MOCK_IMPL()
  133. BREAK_MOCK_IMPL = RegexFilt(
  134. r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
  135. r'MOCK_IMPL(\1,\n\2',
  136. re.MULTILINE)
  137. # Make sure there is no newline between } and a loop iteration terminator.
  138. RESTORE_SMARTLIST_END = RegexFilt(
  139. r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
  140. r'} \1_FOREACH_END (',
  141. re.MULTILINE)
  142. F = CompoundFilt()
  143. F.add(IgnoreCommentsFilt(CompoundFilt([
  144. RESTORE_SMARTLIST_END,
  145. BREAK_MOCK_IMPL])))
  146. if __name__ == '__main__':
  147. for fname in sys.argv[1:]:
  148. revise(fname, F)