supported_languages.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright 2022 The Google Fonts Tools Authors.
  4. # Copyright 2017,2022 Google LLC All Rights Reserved.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS-IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. # =======================================================================
  19. # ======= This code-snippet uses hyperglot, which is licensed =======
  20. # ======= under the GNU GPLv3. So, the resulting license for =======
  21. # ======= any program using this snippet will also have to be =======
  22. # ======= the GNU GPLv3. =======
  23. # =======================================================================
  24. from gflanguages import LoadLanguages
  25. from hyperglot import parse as hyperglot_parse
  26. def _ParseFontChars(path):
  27. """
  28. Open the provided font path and extract the codepoints encoded in the font
  29. @return list of characters
  30. """
  31. from fontTools.ttLib import TTFont
  32. font = TTFont(path, lazy=True)
  33. cmap = font["cmap"].getBestCmap()
  34. font.close()
  35. # The cmap keys are int codepoints
  36. return [chr(c) for c in cmap.keys()]
  37. def SupportedLanguages(font_path, languages=None):
  38. """
  39. Get languages supported by given font file.
  40. Languages are pulled from the given set. Based on whether exemplar character
  41. sets are present in the given font.
  42. Logic based on Hyperglot:
  43. https://github.com/rosettatype/hyperglot/blob/3172061ca05a62c0ff330eb802a17d4fad8b1a4d/lib/hyperglot/language.py#L273-L301
  44. """
  45. if languages is None:
  46. languages = LoadLanguages()
  47. chars = _ParseFontChars(font_path)
  48. supported = []
  49. for lang in languages.values():
  50. if not lang.HasField('exemplar_chars') or \
  51. not lang.exemplar_chars.HasField('base'):
  52. continue
  53. base = hyperglot_parse.parse_chars(lang.exemplar_chars.base,
  54. decompose=False,
  55. retainDecomposed=False)
  56. if set(base).issubset(chars):
  57. supported.append(lang)
  58. return supported
  59. def portable_path(p):
  60. import os
  61. return os.path.join(*p.split('/'))
  62. def TEST_FILE(f):
  63. return portable_path("data/test/" + f)
  64. def test_SupportedLanguages():
  65. font = TEST_FILE('nunito/Nunito-Regular.ttf')
  66. supported = SupportedLanguages(font)
  67. langs = [supported[i].name for i, _ in enumerate(supported)]
  68. assert len(langs) == 225
  69. assert 'Lithuanian' in langs