__init__.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #!/usr/bin/env python3
  2. #
  3. # Copyright 2022 The Google Fonts Tools Authors.
  4. # Copyright 2017,2022 Google LLC All Rights Reserved.
  5. #
  6. # Licensed under the Apache License, Version 2.0 (the "License");
  7. # you may not use this file except in compliance with the License.
  8. # You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS-IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. """
  19. Helper API for interaction with languages/regions/scripts
  20. data on the Google Fonts collection.
  21. """
  22. import glob
  23. import os
  24. import unicodedata
  25. import sys
  26. from gflanguages import languages_public_pb2
  27. from google.protobuf import text_format
  28. if sys.version_info < (3, 10):
  29. from importlib_resources import files
  30. else:
  31. from importlib.resources import files
  32. try:
  33. from ._version import version as __version__ # type: ignore
  34. except ImportError:
  35. __version__ = "0.0.0+unknown"
  36. def _load_thing(thing_type, proto_class, base_dir=None):
  37. things = {}
  38. def read_a_thing(contents):
  39. proto = proto_class()
  40. thing = text_format.Parse(contents, proto)
  41. assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}"
  42. things[thing.id] = thing
  43. if base_dir is not None:
  44. thing_dir = os.path.join(base_dir, thing_type)
  45. for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")):
  46. with open(textproto_file, "r", encoding="utf-8") as f:
  47. read_a_thing(f.read())
  48. else:
  49. for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir():
  50. if not textproto_file.name.endswith(".textproto"):
  51. continue
  52. read_a_thing(textproto_file.read_text(encoding="utf-8"))
  53. return things
  54. def LoadLanguages(base_dir=None):
  55. return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir)
  56. def LoadScripts(base_dir=None):
  57. return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir)
  58. def LoadRegions(base_dir=None):
  59. return _load_thing("regions", languages_public_pb2.RegionProto, base_dir)
  60. def parse(exemplars: str):
  61. """Parses a list of exemplar characters into a set of codepoints."""
  62. codepoints = set()
  63. for chars in exemplars.split():
  64. if len(chars) > 1:
  65. chars = chars.lstrip("{").rstrip("}")
  66. normalized_chars = unicodedata.normalize("NFC", chars)
  67. if normalized_chars != chars:
  68. for char in normalized_chars:
  69. codepoints.add(char)
  70. for char in chars:
  71. codepoints.add(char)
  72. return codepoints