Browse Source

Merge commit 'e75e874ca3efc62d8389e8ba47d001d7e1314ec7' into lang-v0.6.0

Simon Cozens 10 months ago
parent
commit
ca36943762

+ 5 - 5
lang/.github/workflows/publish-release.yml

@@ -10,12 +10,12 @@ jobs:
     name: Build distribution
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
         with:
           submodules: recursive
           fetch-depth: 0
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
           python-version: '3.x'
 
@@ -50,7 +50,7 @@ jobs:
       - name: Build a binary wheel and a source tarball
         run: python3 -m build
       - name: Store the distribution packages
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: python-package-distributions
           path: dist/
@@ -69,12 +69,12 @@ jobs:
       id-token: write  # IMPORTANT: mandatory for trusted publishing
     steps:
       - name: Download all the dists
-        uses: actions/download-artifact@v3
+        uses: actions/download-artifact@v4
         with:
           name: python-package-distributions
           path: dist/
       - name: Publish distribution 📦 to PyPI
-        uses: pypa/gh-action-pypi-publish@v1.8.11
+        uses: pypa/gh-action-pypi-publish@v1.8.14
         with:
           # repository-url: https://test.pypi.org/legacy/ # for testing purposes
           verify-metadata: false # twine previously didn't verify metadata when uploading

+ 38 - 0
lang/.github/workflows/test.yml

@@ -0,0 +1,38 @@
+name: Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+env:
+  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+jobs:
+  build:
+    runs-on: ${{ matrix.platform }}
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        platform: [ubuntu-latest, windows-latest]
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+          fetch-depth: 0
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages
+        run: |
+          pip install '.[dev]'
+          pip install black pylint
+      - name: Check formatting
+        run: |
+          black --diff --check Lib --exclude ".*_pb2.*|_version.py"
+      - name: Run Tests
+        run: |
+          pytest tests

+ 0 - 29
lang/.github/workflows/tox.yml

@@ -1,29 +0,0 @@
-name: Tests & linting with tox
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    strategy:
-      max-parallel: 5
-      matrix:
-        python-version: [3.7, 3.8, 3.9]
-
-    steps:
-    - uses: actions/checkout@v1
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
-
-    - name: Test with tox
-      run: tox

+ 16 - 0
lang/Lib/gflanguages/__init__.py

@@ -21,6 +21,7 @@ data on the Google Fonts collection.
 """
 import glob
 import os
+import unicodedata
 
 from gflanguages import languages_public_pb2
 from google.protobuf import text_format
@@ -71,3 +72,18 @@ def LoadRegions(base_dir=DATA_DIR):
             region = text_format.Parse(f.read(), languages_public_pb2.RegionProto())
             regions[region.id] = region
     return regions
+
+
+def parse(exemplars: str):
+    """Parses a list of exemplar characters into a set of codepoints."""
+    codepoints = set()
+    for chars in exemplars.split():
+        if len(chars) > 1:
+            chars = chars.lstrip("{").rstrip("}")
+        normalized_chars = unicodedata.normalize("NFC", chars)
+        if normalized_chars != chars:
+            for char in normalized_chars:
+                codepoints.add(char)
+        for char in chars:
+            codepoints.add(char)
+    return codepoints

+ 12 - 0
lang/Lib/gflanguages/data/languages/ati_Latn.textproto

@@ -0,0 +1,12 @@
+id: "ati_Latn"
+language: "ati"
+script: "Latn"
+region: "CI"
+name: "Attié"
+population: 739000
+exemplar_chars {
+  base: "a A b B c C d D {dzh} {DZH} e E ë Ë ɛ Ɛ f F g G {gb} {GB} h H i I j J k K {kp} {KP} l L m M n N o O ö Ö ɔ Ɔ p P r R s S {sh} {SH} t T {ts} {TS} {tsh} {TSH} u U v V w W y Y z Z ˗ ʼ ˮ "
+  marks: "◌̈"
+  auxiliary: "q Q x X"
+}
+source: "Firmin Ahoua & Patrice Achie Brouh, Parlons akyé bodin, Paris, L’Harmattan, 2009"

+ 6 - 2
lang/Lib/gflanguages/data/languages/dow_Latn.textproto

@@ -5,6 +5,10 @@ name: "Doyayo"
 population: 15000
 region: "CM"
 exemplar_chars {
-  base: "a A b B ɓ Ɓ d D ɗ Ɗ ɛ Ɛ e E f F g G h H i I k K l L m M n N ŋ Ŋ ɔ Ɔ o O p P r R s S t T u U v V w W y Y z Z"
+  base: "a A á Á à À {a̧} {A̧} {á̧} {Á̧} {à̧} {À̧} {ā̧} {Ā̧} {a̧̍} {A̧̍} ā Ā {a̍} {A̍} b B ɓ Ɓ d D ɗ Ɗ e E é É è È ē Ē ɛ Ɛ {ɛ́} {Ɛ́} {ɛ̀} {Ɛ̀} {ɛ̧} {Ɛ̧} {ɛ̧̄} {Ɛ̧̄} {ɛ̄} {Ɛ̄} {ɛ̍} {Ɛ̍} f F g G h H i I í Í ì Ì ī Ī {i̧} {I̧} {í̧} {Í̧} {ì̧} {Ì̧} {ī̧} {Ī̧} k K l L m M n N ŋ Ŋ o O ó Ó ò Ò ō Ō {o̍} {O̍} ɔ Ɔ {ɔ́} {Ɔ́} {ɔ̀} {Ɔ̀} {ɔ̧} {Ɔ̧} {ɔ̧́} {Ɔ̧́} {ɔ̧̀} {Ɔ̧̀} {ɔ̧̄} {Ɔ̧̄} {ɔ̄} {Ɔ̄} {ɔ̍} {Ɔ̍} p P r R s S t T u U ú Ú ù Ù {u̧} {U̧} {ū̧} {Ū̧} ū Ū v V w W y Y z Z"
+  marks: "◌̀ ◌́ ◌̄ ◌̍ ◌̧"
   auxiliary: "c C j J q Q x X"
-}
+}
+source: "Djataou Pascal, Dictionnaire Doyayo-Français, SIL International, 2012"
+source: "Doyayo Nouveau Testament = Nouveau Testament doyayo, Alliance biblique du Cameroun, 1991"
+note: "Alliance biblique du Cameroun 1991 uses top accents as tone marks on some words."

File diff suppressed because it is too large
+ 0 - 0
lang/Lib/gflanguages/data/languages/en_Dsrt.textproto


+ 15 - 0
lang/Lib/gflanguages/data/languages/eto_Latn.textproto

@@ -0,0 +1,15 @@
+id: "eto_Latn"
+language: "eto"
+script: "Latn"
+name: "Eton (Cameroon)"
+population: 400000
+region: "CM"
+exemplar_chars {
+    base: "a A à À á Á b B c C d D e E é É è È ə Ə {ə̀} {Ə̀} {ə́} {Ə́} ɛ Ɛ {ɛ̀} {Ɛ̀} {ɛ́} {Ɛ́} g G {gb} {GB} {mgb} {MGB} h H i I ì Ì í Í j J {nj} {NJ} k K {kp} {KP} l L m M {mb} {MB} ǹ Ǹ ń Ń {nd} {ND} {ny} {NY} ŋ Ŋ {ŋ̀} {Ŋ̀} {ŋ́} {Ŋ́} {ŋm} {ŊM} o O ò Ò ó Ó ɔ Ɔ {ɔ̀} {Ɔ̀} {ɔ́} {Ɔ́} p P r R s S t T u U ù Ù ú Ú v V w W y Y z Z"
+    marks: "◌̀ ◌́"
+    auxiliary: "ɲ Ɲ {ɲ̀} {Ɲ̀} {ɲ́} {Ɲ́}"
+}
+note: "Van de Velde orthography uses ɲ."
+source: "Hubert Fernand Nkoumou, Dictionnaire français-eton = Káládà bìbúg pùlàsí-ítón, Cameroun, ACAPLA, 2007"
+source: "Mark Van de Velde, A description of Eton: phonology, morphology, basic syntax and lexicon, 2006"
+source: "Mark Van de Velde et Kisito Essele Essele, Dictionnaire éton-français, 2022"

+ 13 - 0
lang/Lib/gflanguages/data/languages/giz_Latn.textproto

@@ -0,0 +1,13 @@
+id: "giz_Latn"
+language: "giz"
+script: "Latn"
+name: "Southern Giziga"
+preferred_name: "Giziga"
+population: 211000
+region: "CM"
+exemplar_chars {
+  base: "a A i I u U e E b B ɓ Ɓ c C d D ɗ Ɗ f F g G {gb} {GB} {gw} {GW} h H j J {kp} {KP} {kw} {KW} l L m M {mb} {MB} {nd} {ND} {ng} {NG} {ngb} {NGB} {ngw} {NGW} {nj} {NJ} ŋ Ŋ p P r R s S {sl} {SL} v V {vb} {VB} w W y Y {zl} {ZL} ˀ"
+}
+note: "Southern Giziga (ISO 639-3: giz) and Northern Giziga (ISO 639-3: gis) are distinct languages in Barreteau et al. (1984) but are considered variants of the same language in Lukas 1970, Barreteau & Dieu (2000), Shay 2021.\nU+02C0 ˀ is used in Alliance Biblique du Cameroun 2011 and some other works."
+source: "Bible en langue Guiziga, Alliance Biblique du Cameroun, 2011"
+source: "Erin Shay, A Grammar of Giziga: A Chadic Language of Far North Cameroon, Leiden, Brill, 2021"

+ 4 - 2
lang/Lib/gflanguages/data/languages/gnd_Latn.textproto

@@ -5,6 +5,8 @@ name: "Zulgo-Gemzek"
 population: 26000
 region: "CM"
 exemplar_chars {
-  base: "a A b B ɓ Ɓ d D ɗ Ɗ e E ə Ə f F g G h H i I k K l L m M n N ŋ Ŋ p P r R s S t T u U v V w W y Y z Z"
+  base: "a A à À b B ɓ Ɓ d D ɗ Ɗ e E è È ə Ə {ə̀} {Ə̀} f F g G h H i I ì Ì k K l L m M n N ŋ Ŋ p P r R s S t T u U ù Ù v V w W y Y z Z"
+  marks: "◌̀"
   auxiliary: "c C j J o O q Q x X"
-}
+}
+source: "Zulgo Bible, Wycliffe, 1988"

Some files were not shown because too many files changed in this diff