#!/usr/bin/env python3 # # Copyright 2022 The Google Fonts Tools Authors. # Copyright 2017,2022 Google LLC All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS-IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Helper API for interaction with languages/regions/scripts data on the Google Fonts collection. """ import glob import os import unicodedata import sys from gflanguages import languages_public_pb2 from google.protobuf import text_format if sys.version_info < (3, 10): from importlib_resources import files else: from importlib.resources import files try: from ._version import version as __version__ # type: ignore except ImportError: __version__ = "0.0.0+unknown" def _load_thing(thing_type, proto_class, base_dir=None): things = {} def read_a_thing(contents): proto = proto_class() thing = text_format.Parse(contents, proto) assert thing.id not in things, f"Duplicate {thing_type} id: {thing.id}" things[thing.id] = thing if base_dir is not None: thing_dir = os.path.join(base_dir, thing_type) for textproto_file in glob.iglob(os.path.join(thing_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: read_a_thing(f.read()) else: for textproto_file in files("gflanguages.data").joinpath(thing_type).iterdir(): if not textproto_file.name.endswith(".textproto"): continue read_a_thing(textproto_file.read_text(encoding="utf-8")) return things def LoadLanguages(base_dir=None): return _load_thing("languages", languages_public_pb2.LanguageProto, base_dir) def LoadScripts(base_dir=None): return _load_thing("scripts", languages_public_pb2.ScriptProto, base_dir) def LoadRegions(base_dir=None): return _load_thing("regions", languages_public_pb2.RegionProto, base_dir) def parse(exemplars: str): """Parses a list of exemplar characters into a set of codepoints.""" codepoints = set() for chars in exemplars.split(): if len(chars) > 1: chars = chars.lstrip("{").rstrip("}") normalized_chars = unicodedata.normalize("NFC", chars) if normalized_chars != chars: for char in normalized_chars: codepoints.add(char) for char in chars: codepoints.add(char) return codepoints