123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- #!/usr/bin/env python3
- # Copyright (c) 2020, The Tor Project, Inc.
- # See LICENSE for licensing information.
- #
- # DO NOT COMMIT OR MERGE CODE THAT IS RUN THROUGH THIS TOOL YET.
- #
- # WE ARE STILL DISCUSSING OUR DESIRED STYLE AND ITERATING ON IT,
- # ALONG WITH THE TOOLS THAT ACHIEVE IT.
- # (12 Feb 2020)
- #
- """
- This program uses a set of pluggable filters to inspect and transform
- our C code.
- """
- import os
- import re
- import sys
- class Filter:
- """A Filter transforms a string containing a C program."""
- def __init__(self):
- pass
- def transform(self, s):
- return s
- class CompoundFilt(Filter):
- """A CompoundFilt runs another set of filters, in sequence."""
- def __init__(self, items=()):
- super().__init__()
- self._filters = list(items)
- def add(self, filt):
- self._filters.append(filt)
- return self
- def transform(self, s):
- for f in self._filters:
- s = f.transform(s)
- return s
- class SplitError(Exception):
- """Exception: raised if split_comments() can't understand a C file."""
- pass
- def split_comments(s):
- r"""Iterate over the C code in 's', and yield a sequence of (code,
- comment) pairs. Each pair will contain either a nonempty piece
- of code, a nonempty comment, or both.
- >>> list(split_comments("hello // world\n"))
- [('hello ', '// world'), ('\n', '')]
- >>> list(split_comments("a /* b cd */ efg // hi"))
- [('a ', '/* b cd */'), (' efg ', '// hi')]
- """
- # Matches a block of code without any comments.
- PAT_CODE = re.compile(r'''^(?: [^/"']+ |
- "(?:[^\\"]+|\\.)*" |
- '(?:[^\\']+|\\.)*' |
- /[^/*]
- )*''', re.VERBOSE|re.DOTALL)
- # Matches a C99 "//" comment.
- PAT_C99_COMMENT = re.compile(r'^//.*$', re.MULTILINE)
- # Matches a C "/* */" comment.
- PAT_C_COMMENT = re.compile(r'^/\*(?:[^*]|\*+[^*/])*\*+/', re.DOTALL)
- while True:
- # Find some non-comment code at the start of the string.
- m = PAT_CODE.match(s)
- # If we found some code here, save it and advance the string.
- # Otherwise set 'code' to "".
- if m:
- code = m.group(0)
- s = s[m.end():]
- else:
- code = ""
- # Now we have a comment, or the end of the string. Find out which
- # one, and how long it is.
- if s.startswith("//"):
- m = PAT_C99_COMMENT.match(s)
- else:
- m = PAT_C_COMMENT.match(s)
- # If we got a comment, save it and advance the string. Otherwise
- # set 'comment' to "".
- if m:
- comment = m.group(0)
- s = s[m.end():]
- else:
- comment = ""
- # If we found no code and no comment, we should be at the end of
- # the string...
- if code == "" and comment == "":
- if s:
- # But in case we *aren't* at the end of the string, raise
- # an error.
- raise SplitError()
- # ... all is well, we're done scanning the code.
- return
- yield (code, comment)
- class IgnoreCommentsFilt(Filter):
- """Wrapper: applies another filter to C code only, excluding comments.
- """
- def __init__(self, filt):
- super().__init__()
- self._filt = filt
- def transform(self, s):
- result = []
- for code, comment in split_comments(s):
- result.append(self._filt.transform(code))
- result.append(comment)
- return "".join(result)
- class RegexFilt(Filter):
- """A regex filter applies a regular expression to some C code."""
- def __init__(self, pat, replacement, flags=0):
- super().__init__()
- self._pat = re.compile(pat, flags)
- self._replacement = replacement
- def transform(self, s):
- s, _ = self._pat.subn(self._replacement, s)
- return s
- def revise(fname, filt):
- """Run 'filt' on the contents of the file in 'fname'. If any
- changes are made, then replace the file with its new contents.
- Otherwise, leave the file alone.
- """
- contents = open(fname, 'r').read()
- result = filt.transform(contents)
- if result == contents:
- return
- tmpname = "{}_codetool_tmp".format(fname)
- try:
- with open(tmpname, 'w') as f:
- f.write(result)
- os.rename(tmpname, fname)
- except:
- os.unlink(tmpname)
- raise
- ##############################
- # Filtering rules.
- ##############################
- # Make sure that there is a newline after the first comma in a MOCK_IMPL()
- BREAK_MOCK_IMPL = RegexFilt(
- r'^MOCK_IMPL\(([^,]+),\s*(\S+)',
- r'MOCK_IMPL(\1,\n\2',
- re.MULTILINE)
- # Make sure there is no newline between } and a loop iteration terminator.
- RESTORE_SMARTLIST_END = RegexFilt(
- r'}\s*(SMARTLIST|DIGESTMAP|DIGEST256MAP|STRMAP|MAP)_FOREACH_END\s*\(',
- r'} \1_FOREACH_END (',
- re.MULTILINE)
- F = CompoundFilt()
- F.add(IgnoreCommentsFilt(CompoundFilt([
- RESTORE_SMARTLIST_END,
- BREAK_MOCK_IMPL])))
- if __name__ == '__main__':
- for fname in sys.argv[1:]:
- revise(fname, F)
|