#!/usr/bin/env python3 # # Copyright 2022 The Google Fonts Tools Authors. # Copyright 2017,2022 Google LLC All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS-IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """ Helper API for interaction with languages/regions/scripts data on the Google Fonts collection. """ import glob import os import unicodedata from gflanguages import languages_public_pb2 from google.protobuf import text_format from pkg_resources import resource_filename try: from ._version import version as __version__ # type: ignore except ImportError: __version__ = "0.0.0+unknown" DATA_DIR = resource_filename("gflanguages", "data") def LoadLanguages(base_dir=DATA_DIR): if base_dir is None: base_dir = DATA_DIR languages_dir = os.path.join(base_dir, "languages") langs = {} for textproto_file in glob.iglob(os.path.join(languages_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: language = text_format.Parse(f.read(), languages_public_pb2.LanguageProto()) langs[language.id] = language return langs def LoadScripts(base_dir=DATA_DIR): if base_dir is None: base_dir = DATA_DIR scripts_dir = os.path.join(base_dir, "scripts") scripts = {} for textproto_file in glob.iglob(os.path.join(scripts_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: script = text_format.Parse(f.read(), languages_public_pb2.ScriptProto()) scripts[script.id] = script return scripts def LoadRegions(base_dir=DATA_DIR): if base_dir is None: base_dir = DATA_DIR regions_dir = os.path.join(base_dir, "regions") regions = {} for textproto_file in glob.iglob(os.path.join(regions_dir, "*.textproto")): with open(textproto_file, "r", encoding="utf-8") as f: region = text_format.Parse(f.read(), languages_public_pb2.RegionProto()) regions[region.id] = region return regions def parse(exemplars: str): """Parses a list of exemplar characters into a set of codepoints.""" codepoints = set() for chars in exemplars.split(): if len(chars) > 1: chars = chars.lstrip("{").rstrip("}") normalized_chars = unicodedata.normalize("NFC", chars) if normalized_chars != chars: for char in normalized_chars: codepoints.add(char) for char in chars: codepoints.add(char) return codepoints