coverage_test.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2015 Google Inc. All Rights Reserved.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. """Routines for checking character coverage of Roboto fonts.
  17. This scripts takes the name of the directory where the fonts are and checks
  18. that they cover all characters required in the Roboto extension contract.
  19. The data is in res/char_requirements.tsv.
  20. """
  21. __author__ = (
  22. "roozbeh@google.com (Roozbeh Pournader) and "
  23. "cibu@google.com (Cibu Johny)")
  24. import sys
  25. import glob
  26. from fontTools import ttLib
  27. from nototools import coverage
  28. from nototools import font_data
  29. from nototools import unicode_data
  30. def load_fonts():
  31. """Load all fonts built for Android."""
  32. all_fonts = (glob.glob('out/RobotoTTF/*.ttf')
  33. + glob.glob('out/RobotoCondensedTTF/*.ttf'))
  34. all_fonts = [ttLib.TTFont(font) for font in all_fonts]
  35. return all_fonts
  36. def _character_name(code):
  37. """Returns the printable name of a character."""
  38. return unicode_data.name(unichr(code), '<Unassigned>')
  39. def _print_char(code, additional_info=None):
  40. """Print a Unicode character as code and name and perhaps extra info."""
  41. sys.stdout.write('U+%04X %s' % (code, _character_name(code)))
  42. if additional_info is not None:
  43. sys.stdout.write('\t' + additional_info)
  44. sys.stdout.write('\n')
  45. def _range_string_to_set(range_str):
  46. """Convert a range encoding in a string to a set."""
  47. if '..' in range_str:
  48. range_start, range_end = range_str.split('..')
  49. range_start = int(range_start, 16)
  50. range_end = int(range_end, 16)
  51. return set(range(range_start, range_end+1))
  52. else:
  53. return {int(range_str, 16)}
  54. def _multiple_range_string_to_set(ranges_str):
  55. """Convert a string of multiple ranges to a set."""
  56. char_set = set()
  57. for range_str in ranges_str.split(', '):
  58. if range_str.startswith('and '):
  59. range_str = range_str[4:] # drop the 'and '
  60. char_set.update(_range_string_to_set(range_str))
  61. return char_set
  62. def _defined_characters_in_range(range_str):
  63. """Given a range string, returns defined Unicode characters in the range."""
  64. characters = set()
  65. for code in _range_string_to_set(range_str):
  66. if unicode_data.is_defined(code) and unicode_data.age(code) is not None:
  67. characters.add(code)
  68. return characters
  69. _EXCEPTION_STARTER = 'Everything except '
  70. def _find_required_chars(block_range, full_coverage_required, exceptions):
  71. """Finds required coverage based on a row of the spreadsheet."""
  72. chars_defined_in_block = _defined_characters_in_range(block_range)
  73. if full_coverage_required:
  74. return chars_defined_in_block
  75. else:
  76. if not exceptions:
  77. return set()
  78. if exceptions.startswith(_EXCEPTION_STARTER):
  79. exceptions = exceptions[len(_EXCEPTION_STARTER):]
  80. chars_to_exclude = _multiple_range_string_to_set(exceptions)
  81. return chars_defined_in_block - chars_to_exclude
  82. else:
  83. chars_to_limit_to = _multiple_range_string_to_set(exceptions)
  84. return chars_defined_in_block & chars_to_limit_to
  85. def main():
  86. """Checkes the coverage of all Roboto fonts."""
  87. with open('res/char_requirements.tsv') as char_reqs_file:
  88. char_reqs_data = char_reqs_file.read()
  89. # The format of the data to be parsed is like the following:
  90. # General Punctuation\t2000..206F\t111\t35\t54\t0\tEverything except 2028..202E, 2060..2064, and 2066..206F
  91. # Currency Symbols\t20A0..20CF\t29\t5\t24\t1\t
  92. required_set = set()
  93. for line in char_reqs_data.split('\n'):
  94. if line.startswith('#'): # Skip comment lines
  95. continue
  96. line = line.split('\t')
  97. if not line[0]:
  98. continue # Skip the first line and empty lines
  99. block_range = line[1]
  100. full_coverage_required = (line[5] == '1')
  101. exceptions = line[6]
  102. required_set.update(
  103. _find_required_chars(block_range,
  104. full_coverage_required,
  105. exceptions))
  106. # Skip Unicode 8.0 characters
  107. required_set = {ch for ch in required_set
  108. if float(unicode_data.age(ch)) <= 7.0}
  109. # Skip ASCII and C1 controls
  110. required_set -= set(range(0, 0x20) + range(0x7F, 0xA0))
  111. missing_char_found = False
  112. for font in load_fonts():
  113. font_coverage = coverage.character_set(font)
  114. missing_chars = required_set - font_coverage
  115. if missing_chars:
  116. missing_char_found = True
  117. font_name = font_data.font_name(font)
  118. print 'Characters missing from %s:' % font_name
  119. for char in sorted(missing_chars):
  120. _print_char(char)
  121. print
  122. if missing_char_found:
  123. sys.exit(1)
  124. if __name__ == '__main__':
  125. main()