123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687 |
- #!/usr/bin/env python3
- #
- # Copyright 2022 The Google Fonts Tools Authors.
- # Copyright 2017,2022 Google LLC All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS-IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- """
- Helper API for interaction with languages/regions/scripts
- data on the Google Fonts collection.
- """
- import glob
- import os
- import unicodedata
- import sys
- from gflanguages import languages_public_pb2
- from google.protobuf import text_format
- if sys.version_info < (3, 10):
- from importlib_resources import files
- else:
- from importlib.resources import files
- try:
- from ._version import version as __version__ # type: ignore
- except ImportError:
- __version__ = "0.0.0+unknown"
- def _load_thing(thing_type, proto_class, base_dir=None):
- things = {}
- def read_a_thing(contents):
- proto = proto_class()
- thing = text_format.Parse(contents, proto)
- assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}"
- things[thing.id] = thing
- if base_dir is not None:
- thing_dir = os.path.join(base_dir, thing_type)
- for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")):
- with open(textproto_file, "r", encoding="utf-8") as f:
- read_a_thing(f.read())
- else:
- for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir():
- if not textproto_file.name.endswith(".textproto"):
- continue
- read_a_thing(textproto_file.read_text(encoding="utf-8"))
- return things
- def LoadLanguages(base_dir=None):
- return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir)
- def LoadScripts(base_dir=None):
- return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir)
- def LoadRegions(base_dir=None):
- return _load_thing("regions", languages_public_pb2.RegionProto, base_dir)
- def parse(exemplars: str):
- """Parses a list of exemplar characters into a set of codepoints."""
- codepoints = set()
- for chars in exemplars.split():
- if len(chars) > 1:
- chars = chars.lstrip("{").rstrip("}")
- normalized_chars = unicodedata.normalize("NFC", chars)
- if normalized_chars != chars:
- for char in normalized_chars:
- codepoints.add(char)
- for char in chars:
- codepoints.add(char)
- return codepoints
|