Browse Source

Update contrib/libs/utf8proc to 2.7.0

ref:e9f684e9e9c1d13cd51d1547614f405b8a6b5620
robot-contrib 2 years ago
parent
commit
f403b8fac5

+ 17 - 17
contrib/libs/utf8proc/.yandex_meta/devtools.copyrights.report

@@ -29,23 +29,21 @@
 # FILE_INCLUDE - include all file data into licenses text file
 # =======================
 
-KEEP     COPYRIGHT_SERVICE_LABEL 1d50473f143812371fcfe0d38cbb5764
+KEEP     COPYRIGHT_SERVICE_LABEL 1bf0d41af40e40f004ad052d60870e01
 BELONGS ya.make
     License text:
-         *  Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
-         *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
+        *Copyright © 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
     Scancode info:
         Original SPDX id: COPYRIGHT_SERVICE_LABEL
         Score           : 100.00
         Match type      : COPYRIGHT
     Files with this license:
-        utf8proc.c [3:4]
-        utf8proc.h [2:3]
+        LICENSE.md [10:10]
 
-KEEP     COPYRIGHT_SERVICE_LABEL 4342f5b409956a6777adfa764b09dcdd
+KEEP     COPYRIGHT_SERVICE_LABEL 1d50473f143812371fcfe0d38cbb5764
 BELONGS ya.make
     License text:
-         *  Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+         *  Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
          *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
     Scancode info:
         Original SPDX id: COPYRIGHT_SERVICE_LABEL
@@ -55,36 +53,38 @@ BELONGS ya.make
         utf8proc.c [3:4]
         utf8proc.h [2:3]
 
-KEEP     COPYRIGHT_SERVICE_LABEL 6426e7d9aab211c64c5141f673c67fff
+KEEP     COPYRIGHT_SERVICE_LABEL a46e434110f608cf0cfb9b71e70f5e1c
 BELONGS ya.make
     License text:
-        *Copyright © 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
+        *Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
+        under the Terms of Use in http://www.unicode.org/copyright.html.*
     Scancode info:
         Original SPDX id: COPYRIGHT_SERVICE_LABEL
         Score           : 100.00
         Match type      : COPYRIGHT
     Files with this license:
-        LICENSE.md [10:10]
+        LICENSE.md [59:60]
 
-KEEP     COPYRIGHT_SERVICE_LABEL a46e434110f608cf0cfb9b71e70f5e1c
+KEEP     COPYRIGHT_SERVICE_LABEL c19e908094e2721a7c300de68dc25809
 BELONGS ya.make
     License text:
-        *Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
-        under the Terms of Use in http://www.unicode.org/copyright.html.*
+        *Copyright (c) 2009, 2013 Public Software Group e. V., Berlin, Germany*
     Scancode info:
         Original SPDX id: COPYRIGHT_SERVICE_LABEL
         Score           : 100.00
         Match type      : COPYRIGHT
     Files with this license:
-        LICENSE.md [59:60]
+        LICENSE.md [32:32]
 
-KEEP     COPYRIGHT_SERVICE_LABEL c19e908094e2721a7c300de68dc25809
+KEEP     COPYRIGHT_SERVICE_LABEL cd60ad4acc98c56e6197736c3447f8de
 BELONGS ya.make
     License text:
-        *Copyright (c) 2009, 2013 Public Software Group e. V., Berlin, Germany*
+         *  Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+         *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
     Scancode info:
         Original SPDX id: COPYRIGHT_SERVICE_LABEL
         Score           : 100.00
         Match type      : COPYRIGHT
     Files with this license:
-        LICENSE.md [32:32]
+        utf8proc.c [3:4]
+        utf8proc.h [2:3]

+ 5 - 5
contrib/libs/utf8proc/.yandex_meta/devtools.licenses.report

@@ -84,7 +84,7 @@ BELONGS ya.make
         Match type      : REFERENCE
         Links           : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT
     Files with this license:
-        NEWS.md [211:211]
+        NEWS.md [299:299]
 
 KEEP     MIT                  b9f647ef7e29973cc8b999af88006590
 BELONGS ya.make
@@ -111,9 +111,9 @@ BELONGS ya.make
     Files with this license:
         utf8proc.h [28:28]
 
-KEEP     MIT AND Unicode      f1f18913b6e9a1006771a56a06d41a82
+KEEP     MIT AND Unicode      e494dba55af130e3b0d56ff7a74377e3
 BELONGS ya.make
-FILE_INCLUDE LICENSE.md found in files: README.md at line 31
+FILE_INCLUDE LICENSE.md found in files: README.md at line 30
     Note: matched license text is too long. Read it in the source files.
     Scancode info:
         Original SPDX id: MIT
@@ -121,11 +121,11 @@ FILE_INCLUDE LICENSE.md found in files: README.md at line 31
         Match type      : NOTICE
         Links           : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT
     Files with this license:
-        README.md [26:31]
+        README.md [25:30]
     Scancode info:
         Original SPDX id: LicenseRef-scancode-unicode
         Score           : 100.00
         Match type      : NOTICE
         Links           : http://unicode.org/, http://unicode.org/copyright.html, https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unicode.LICENSE
     Files with this license:
-        README.md [26:31]
+        README.md [25:30]

+ 4 - 4
contrib/libs/utf8proc/.yandex_meta/licenses.list.txt

@@ -1,10 +1,10 @@
 ====================COPYRIGHT====================
- *  Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+ *  Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
  *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
 
 
 ====================COPYRIGHT====================
-*Copyright © 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
+*Copyright © 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
 
 
 ====================COPYRIGHT====================
@@ -26,7 +26,7 @@ whose copyright and license statements are reproduced below, all new
 work on the utf8proc library is licensed under the [MIT "expat"
 license](http://opensource.org/licenses/MIT):
 
-*Copyright © 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
+*Copyright © 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the "Software"),
@@ -70,7 +70,7 @@ DEALINGS IN THE SOFTWARE.
 
 ## Unicode data license ##
 
-This software distribution contains derived data from a modified version of
+This software contains data (`utf8proc_data.c`) derived from processing
 the Unicode data files. The following license applies to that data:
 
 **COPYRIGHT AND PERMISSION NOTICE**

+ 0 - 3
contrib/libs/utf8proc/CMakeLists.txt

@@ -8,9 +8,6 @@
 
 
 add_library(contrib-libs-utf8proc)
-target_compile_options(contrib-libs-utf8proc PUBLIC
-  -DUTF8PROC_STATIC
-)
 target_sources(contrib-libs-utf8proc PRIVATE
   ${CMAKE_SOURCE_DIR}/contrib/libs/utf8proc/utf8proc.c
 )

+ 2 - 2
contrib/libs/utf8proc/LICENSE.md

@@ -7,7 +7,7 @@ whose copyright and license statements are reproduced below, all new
 work on the utf8proc library is licensed under the [MIT "expat"
 license](http://opensource.org/licenses/MIT):
 
-*Copyright © 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
+*Copyright © 2014-2021 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
 
 Permission is hereby granted, free of charge, to any person obtaining a
 copy of this software and associated documentation files (the "Software"),
@@ -51,7 +51,7 @@ DEALINGS IN THE SOFTWARE.
 
 ## Unicode data license ##
 
-This software distribution contains derived data from a modified version of
+This software contains data (`utf8proc_data.c`) derived from processing
 the Unicode data files. The following license applies to that data:
 
 **COPYRIGHT AND PERMISSION NOTICE**

+ 150 - 35
contrib/libs/utf8proc/NEWS.md

@@ -1,5 +1,93 @@
 # utf8proc release history #
 
+## Version 2.7.0 ##
+
+2021-12-16
+
+ - Unicode 14 support ([#233]).
+
+ - Support `GNUInstallDirs` in CMake build ([#159]).
+
+ - `cmake` build now installs `pkg-config` file ([#224]).
+
+ - Various build and portability improvements.
+
+## Version 2.6.1 ##
+
+2020-12-15
+
+ - Bugfix in `utf8proc_grapheme_break_stateful` for `NULL` state argument, which
+   also broke `utf8proc_grapheme_break`.
+
+## Version 2.6 ##
+
+2020-11-23
+
+ - New `utf8proc_islower` and `utf8proc_isupper` functions ([#196]).
+
+ - Bugfix for manual calls to `grapheme_break_extended` for initial characters ([#205]).
+
+ - Various build and portability improvements.
+
+## Version 2.5 ##
+
+2019-03-27
+
+- Unicode 13 support ([#179]).
+
+- No longer report zero width for category Sk ([#167]).
+
+- `cmake` support improvements ([#173]).
+
+## Version 2.4 ##
+
+2019-05-10
+
+- Unicode 12.1 support ([#156]).
+
+- New `-DUTF8PROC_INSTALL=No` option for `cmake` builds to disable installation ([#152]).
+
+- Better `make` support for HP-UX ([#154]).
+
+- Fixed incorrect `UTF8PROC_VERSION_MINOR` version number in header and bumped shared-library version.
+
+## Version 2.3 ##
+
+2019-03-30
+
+- Unicode 12 support ([#148]).
+
+- New function `utf8proc_unicode_version` to return the supported Unicode version ([#151]).
+
+- Simpler character-width computation that no longer uses GNU Unifont metrics: East-Asian wide
+  characters have width 2, and all other printable characters have width 1 ([#150]).
+
+- Fix `CHARBOUND` option for `utf8proc_map` to preserve U+FFFE and U+FFFF non-characters ([#149]).
+
+- Various build-system improvements ([#141], [#142], [#147]).
+
+## Version 2.2 ##
+
+2018-07-24
+
+- Unicode 11 support ([#132] and [#140]).
+
+- `utf8proc_NFKC_Casefold` convenience function for `NFKC_Casefold`
+  normalization ([#133]).
+
+- `UTF8PROC_STRIPNA` option to strip unassigned codepoints ([#133]).
+
+- Support building static libraries on Windows (callers need to
+  `#define UTF8PROC_STATIC`) ([#123]).
+
+- `cmake` fix to avoid defining `UTF8PROC_EXPORTS` globally ([#121]).
+
+- `toupper` of ß (U+00df) now yields ẞ (U+1E9E) ([#134]), similar to musl;
+  case-folding still yields the standard "ss" mapping.
+
+- `utf8proc_charwidth` now returns `1` for U+00AD (soft hyphen) and
+  for unassigned/PUA codepoints ([#135]).
+
 ## Version 2.1.1 ##
 
 2018-04-27
@@ -279,38 +367,65 @@ Release of version 1.0.1
 
 2006-06-02: initial release of version 0.1
 
-[#6]: https://github.com/JuliaLang/utf8proc/issues/6
-[#13]: https://github.com/JuliaLang/utf8proc/issues/13
-[#17]: https://github.com/JuliaLang/utf8proc/issues/17
-[#20]: https://github.com/JuliaLang/utf8proc/issues/20
-[#22]: https://github.com/JuliaLang/utf8proc/issues/22
-[#24]: https://github.com/JuliaLang/utf8proc/issues/24
-[#27]: https://github.com/JuliaLang/utf8proc/issues/27
-[#28]: https://github.com/JuliaLang/utf8proc/issues/28
-[#29]: https://github.com/JuliaLang/utf8proc/issues/29
-[#32]: https://github.com/JuliaLang/utf8proc/issues/32
-[#35]: https://github.com/JuliaLang/utf8proc/issues/35
-[#40]: https://github.com/JuliaLang/utf8proc/issues/40
-[#43]: https://github.com/JuliaLang/utf8proc/issues/43
-[#45]: https://github.com/JuliaLang/utf8proc/issues/45
-[#47]: https://github.com/JuliaLang/utf8proc/issues/47
-[#51]: https://github.com/JuliaLang/utf8proc/issues/51
-[#55]: https://github.com/JuliaLang/utf8proc/issues/55
-[#58]: https://github.com/JuliaLang/utf8proc/issues/58
-[#62]: https://github.com/JuliaLang/utf8proc/issues/62
-[#66]: https://github.com/JuliaLang/utf8proc/issues/66
-[#68]: https://github.com/JuliaLang/utf8proc/issues/68
-[#70]: https://github.com/JuliaLang/utf8proc/issues/70
-[#77]: https://github.com/JuliaLang/utf8proc/issues/77
-[#78]: https://github.com/JuliaLang/utf8proc/issues/78
-[#79]: https://github.com/JuliaLang/utf8proc/issues/79
-[#80]: https://github.com/JuliaLang/utf8proc/issues/80
-[#84]: https://github.com/JuliaLang/utf8proc/issues/84
-[#88]: https://github.com/JuliaLang/utf8proc/issues/88
-[#89]: https://github.com/JuliaLang/utf8proc/issues/89
-[#90]: https://github.com/JuliaLang/utf8proc/issues/90
-[#94]: https://github.com/JuliaLang/utf8proc/issues/94
-[#99]: https://github.com/JuliaLang/utf8proc/issues/99
-[#113]: https://github.com/JuliaLang/utf8proc/issues/113
-[#125]: https://github.com/JuliaLang/utf8proc/issues/125
-[#128]: https://github.com/JuliaLang/utf8proc/issues/128
+<!--- generated by NEWS-update.jl: -->
+
+[#6]: https://github.com/JuliaStrings/utf8proc/issues/6
+[#13]: https://github.com/JuliaStrings/utf8proc/issues/13
+[#17]: https://github.com/JuliaStrings/utf8proc/issues/17
+[#20]: https://github.com/JuliaStrings/utf8proc/issues/20
+[#22]: https://github.com/JuliaStrings/utf8proc/issues/22
+[#24]: https://github.com/JuliaStrings/utf8proc/issues/24
+[#27]: https://github.com/JuliaStrings/utf8proc/issues/27
+[#28]: https://github.com/JuliaStrings/utf8proc/issues/28
+[#29]: https://github.com/JuliaStrings/utf8proc/issues/29
+[#32]: https://github.com/JuliaStrings/utf8proc/issues/32
+[#35]: https://github.com/JuliaStrings/utf8proc/issues/35
+[#40]: https://github.com/JuliaStrings/utf8proc/issues/40
+[#43]: https://github.com/JuliaStrings/utf8proc/issues/43
+[#45]: https://github.com/JuliaStrings/utf8proc/issues/45
+[#47]: https://github.com/JuliaStrings/utf8proc/issues/47
+[#51]: https://github.com/JuliaStrings/utf8proc/issues/51
+[#55]: https://github.com/JuliaStrings/utf8proc/issues/55
+[#58]: https://github.com/JuliaStrings/utf8proc/issues/58
+[#62]: https://github.com/JuliaStrings/utf8proc/issues/62
+[#66]: https://github.com/JuliaStrings/utf8proc/issues/66
+[#68]: https://github.com/JuliaStrings/utf8proc/issues/68
+[#70]: https://github.com/JuliaStrings/utf8proc/issues/70
+[#77]: https://github.com/JuliaStrings/utf8proc/issues/77
+[#78]: https://github.com/JuliaStrings/utf8proc/issues/78
+[#79]: https://github.com/JuliaStrings/utf8proc/issues/79
+[#80]: https://github.com/JuliaStrings/utf8proc/issues/80
+[#84]: https://github.com/JuliaStrings/utf8proc/issues/84
+[#88]: https://github.com/JuliaStrings/utf8proc/issues/88
+[#89]: https://github.com/JuliaStrings/utf8proc/issues/89
+[#90]: https://github.com/JuliaStrings/utf8proc/issues/90
+[#94]: https://github.com/JuliaStrings/utf8proc/issues/94
+[#99]: https://github.com/JuliaStrings/utf8proc/issues/99
+[#113]: https://github.com/JuliaStrings/utf8proc/issues/113
+[#121]: https://github.com/JuliaStrings/utf8proc/issues/121
+[#123]: https://github.com/JuliaStrings/utf8proc/issues/123
+[#125]: https://github.com/JuliaStrings/utf8proc/issues/125
+[#128]: https://github.com/JuliaStrings/utf8proc/issues/128
+[#132]: https://github.com/JuliaStrings/utf8proc/issues/132
+[#133]: https://github.com/JuliaStrings/utf8proc/issues/133
+[#134]: https://github.com/JuliaStrings/utf8proc/issues/134
+[#135]: https://github.com/JuliaStrings/utf8proc/issues/135
+[#140]: https://github.com/JuliaStrings/utf8proc/issues/140
+[#141]: https://github.com/JuliaStrings/utf8proc/issues/141
+[#142]: https://github.com/JuliaStrings/utf8proc/issues/142
+[#147]: https://github.com/JuliaStrings/utf8proc/issues/147
+[#148]: https://github.com/JuliaStrings/utf8proc/issues/148
+[#149]: https://github.com/JuliaStrings/utf8proc/issues/149
+[#150]: https://github.com/JuliaStrings/utf8proc/issues/150
+[#151]: https://github.com/JuliaStrings/utf8proc/issues/151
+[#152]: https://github.com/JuliaStrings/utf8proc/issues/152
+[#154]: https://github.com/JuliaStrings/utf8proc/issues/154
+[#156]: https://github.com/JuliaStrings/utf8proc/issues/156
+[#159]: https://github.com/JuliaStrings/utf8proc/issues/159
+[#167]: https://github.com/JuliaStrings/utf8proc/issues/167
+[#173]: https://github.com/JuliaStrings/utf8proc/issues/173
+[#179]: https://github.com/JuliaStrings/utf8proc/issues/179
+[#196]: https://github.com/JuliaStrings/utf8proc/issues/196
+[#205]: https://github.com/JuliaStrings/utf8proc/issues/205
+[#224]: https://github.com/JuliaStrings/utf8proc/issues/224
+[#233]: https://github.com/JuliaStrings/utf8proc/issues/233

+ 26 - 6
contrib/libs/utf8proc/README.md

@@ -1,9 +1,8 @@
 # utf8proc
-[![Travis CI Status](https://travis-ci.org/JuliaLang/utf8proc.png)](https://travis-ci.org/JuliaLang/utf8proc)
-[![AppVeyor Status](https://ci.appveyor.com/api/projects/status/aou20lfkyhj8xbwq/branch/master?svg=true)](https://ci.appveyor.com/project/tkelman/utf8proc/branch/master)
+[![CI](https://github.com/NanoComp/meep/actions/workflows/build-ci.yml/badge.svg)](https://github.com/JuliaStrings/utf8proc/actions/workflows/build-ci.yml)
+[![AppVeyor status](https://ci.appveyor.com/api/projects/status/ivaa0v6ikxrmm5r6?svg=true)](https://ci.appveyor.com/project/StevenGJohnson/utf8proc)
 
-
-[utf8proc](http://julialang.org/utf8proc/) is a small, clean C
+[utf8proc](http://juliastrings.github.io/utf8proc/) is a small, clean C
 library that provides Unicode normalization, case-folding, and other
 operations for data in the [UTF-8
 encoding](http://en.wikipedia.org/wiki/UTF-8).  It was [initially
@@ -32,7 +31,28 @@ the included `LICENSE.md` file for more detailed information.
 
 ## Quick Start
 
-For compilation of the C library run `make`.
+Typical users should download a [utf8proc release](http://juliastrings.github.io/utf8proc/releases/) rather than cloning directly from github.
+
+For compilation of the C library, run `make`.  You can also install the library and header file with `make install` (by default into `/usr/local/lib` and `/usr/local/bin`, but this can be changed by `make prefix=/some/dir`).  `make check` runs some tests, and `make clean` deletes all of the generated files.
+
+Alternatively, you can compile with `cmake`, e.g. by
+```sh
+mkdir build
+cd build
+cmake ..
+make
+```
+
+### Using other compilers
+The included `Makefile` supports GNU/Linux flavors and MacOS with `gcc`-like compilers; Windows users will typically use `cmake`.
+
+For other Unix-like systems and other compilers, you may need to pass modified settings to `make` in order to use the correct compilation flags for building shared libraries on your system.
+
+For HP-UX with HP's `aCC` compiler and GNU Make (installed as `gmake`), you can compile with
+```
+gmake CC=/opt/aCC/bin/aCC CFLAGS="+O2" PICFLAG="+z" C99FLAG="-Ae" WCFLAGS="+w" LDFLAG_SHARED="-b" SOFLAG="-Wl,+h"
+```
+To run `gmake install` you will need GNU coreutils for the `install` command, and you may want to pass `prefix=/opt libdir=/opt/lib/hpux32` or similar to change the installation location.
 
 ## General Information
 
@@ -40,7 +60,7 @@ The C library is found in this directory after successful compilation
 and is named `libutf8proc.a` (for the static library) and
 `libutf8proc.so` (for the dynamic library).
 
-The Unicode version supported is 9.0.0.
+The Unicode version supported is 13.0.0.
 
 For Unicode normalizations, the following options are used:
 

+ 77 - 39
contrib/libs/utf8proc/utf8proc.c

@@ -1,6 +1,6 @@
 /* -*- mode: c; c-basic-offset: 2; tab-width: 2; indent-tabs-mode: nil -*- */
 /*
- *  Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+ *  Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
  *  Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
  *
  *  Permission is hereby granted, free of charge, to any person obtaining a
@@ -27,7 +27,7 @@
  *  Unicode data files.
  *
  *  The original data files are available at
- *  http://www.unicode.org/Public/UNIDATA/
+ *  https://www.unicode.org/Public/UNIDATA/
  *
  *  Please notice the copyright statement in the file "utf8proc_data.c".
  */
@@ -42,6 +42,14 @@
 
 
 #include "utf8proc.h"
+
+#ifndef SSIZE_MAX
+#define SSIZE_MAX ((size_t)SIZE_MAX/2)
+#endif
+#ifndef UINT16_MAX
+#  define UINT16_MAX 65535U
+#endif
+
 #include "utf8proc_data.c"
 
 
@@ -92,6 +100,10 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) {
   return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) "";
 }
 
+UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void) {
+  return "14.0.0";
+}
+
 UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
   switch (errcode) {
     case UTF8PROC_ERROR_NOMEM:
@@ -113,7 +125,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) {
 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
   const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
 ) {
-  utf8proc_uint32_t uc;
+  utf8proc_int32_t uc;
   const utf8proc_uint8_t *end;
 
   *dst = -1;
@@ -125,7 +137,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
     return 1;
   }
   // Must be between 0xc2 and 0xf4 inclusive to be valid
-  if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
+  if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
   if (uc < 0xe0) {         // 2-byte sequence
      // Must have valid continuation character
      if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
@@ -188,9 +200,13 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut
   } else return 0;
 }
 
-/* internal "unsafe" version that does not check whether uc is in range */
-static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
+/* internal version used for inserting 0xff bytes between graphemes */
+static utf8proc_ssize_t charbound_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t *dst) {
    if (uc < 0x00) {
+      if (uc == -1) { /* internal value used for grapheme breaks */
+        dst[0] = (utf8proc_uint8_t)0xFF;
+        return 1;
+      }
       return 0;
    } else if (uc < 0x80) {
       dst[0] = (utf8proc_uint8_t)uc;
@@ -199,12 +215,6 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t
       dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6));
       dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F));
       return 2;
-   } else if (uc == 0xFFFF) {
-       dst[0] = (utf8proc_uint8_t)0xFF;
-       return 1;
-   } else if (uc == 0xFFFE) {
-       dst[0] = (utf8proc_uint8_t)0xFE;
-       return 1;
    } else if (uc < 0x10000) {
       dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12));
       dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F));
@@ -271,12 +281,8 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
      tbc == UTF8PROC_BOUNDCLASS_ZWJ ||                // ---
      tbc == UTF8PROC_BOUNDCLASS_SPACINGMARK ||        // GB9a
      lbc == UTF8PROC_BOUNDCLASS_PREPEND) ? false :    // GB9b
-    ((lbc == UTF8PROC_BOUNDCLASS_E_BASE ||            // GB10 (requires additional handling below)
-      lbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) &&       // ----
-     tbc == UTF8PROC_BOUNDCLASS_E_MODIFIER) ? false : // ----
-    (lbc == UTF8PROC_BOUNDCLASS_ZWJ &&                         // GB11
-     (tbc == UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ ||             // ----
-      tbc == UTF8PROC_BOUNDCLASS_E_BASE_GAZ)) ? false :        // ----
+    (lbc == UTF8PROC_BOUNDCLASS_E_ZWG &&              // GB11 (requires additional handling below)
+     tbc == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) ? false : // ----
     (lbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR &&          // GB12/13 (requires additional handling below)
      tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR) ? false :  // ----
     true; // GB999
@@ -284,10 +290,14 @@ static utf8proc_bool grapheme_break_simple(int lbc, int tbc) {
 
 static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
 {
-  int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
-                      ? *state : lbc);
-  utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
   if (state) {
+    int lbc_override;
+    if (*state == UTF8PROC_BOUNDCLASS_START)
+      *state = lbc_override = lbc;
+    else
+      lbc_override = *state;
+    utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
+
     // Special support for GB 12/13 made possible by GB999. After two RI
     // class codepoints we want to force a break. Do this by resetting the
     // second RI's bound class to UTF8PROC_BOUNDCLASS_OTHER, to force a break
@@ -295,16 +305,22 @@ static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t
     // forbidden by a different rule such as GB9).
     if (*state == tbc && tbc == UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR)
       *state = UTF8PROC_BOUNDCLASS_OTHER;
-    // Special support for GB10. Fold any EXTEND codepoints into the previous
-    // boundclass if we're dealing with an emoji base boundclass.
-    else if ((*state == UTF8PROC_BOUNDCLASS_E_BASE      ||
-              *state == UTF8PROC_BOUNDCLASS_E_BASE_GAZ) &&
-             tbc == UTF8PROC_BOUNDCLASS_EXTEND)
-      *state = UTF8PROC_BOUNDCLASS_E_BASE;
+    // Special support for GB11 (emoji extend* zwj / emoji)
+    else if (*state == UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC) {
+      if (tbc == UTF8PROC_BOUNDCLASS_EXTEND) // fold EXTEND codepoints into emoji
+        *state = UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC;
+      else if (tbc == UTF8PROC_BOUNDCLASS_ZWJ)
+        *state = UTF8PROC_BOUNDCLASS_E_ZWG; // state to record emoji+zwg combo
+      else
+        *state = tbc;
+    }
     else
       *state = tbc;
+
+    return break_permitted;
   }
-  return break_permitted;
+  else
+    return grapheme_break_simple(lbc, tbc);
 }
 
 UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
@@ -340,9 +356,9 @@ static utf8proc_int32_t seqindex_decode_index(const utf8proc_uint32_t seqindex)
 
 static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqindex, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
   utf8proc_ssize_t written = 0;
-  const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x1FFF];
-  int len = seqindex >> 13;
-  if (len >= 7) {
+  const utf8proc_uint16_t *entry = &utf8proc_sequences[seqindex & 0x3FFF];
+  int len = seqindex >> 14;
+  if (len >= 3) {
     len = *entry;
     entry++;
   }
@@ -360,19 +376,31 @@ static utf8proc_ssize_t seqindex_write_char_decomposed(utf8proc_uint16_t seqinde
 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
 {
   utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
-  return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
+  return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
 }
 
 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
 {
   utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
-  return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+  return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
 }
 
 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
 {
   utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
-  return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+  return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
+}
+
+UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
+{
+  const utf8proc_property_t *p = utf8proc_get_property(c);
+  return p->lowercase_seqindex != p->uppercase_seqindex && p->lowercase_seqindex == UINT16_MAX;
+}
+
+UTF8PROC_DLLEXPORT int utf8proc_isupper(utf8proc_int32_t c)
+{
+  const utf8proc_property_t *p = utf8proc_get_property(c);
+  return p->lowercase_seqindex != p->uppercase_seqindex && p->uppercase_seqindex == UINT16_MAX && p->category != UTF8PROC_CATEGORY_LT;
 }
 
 /* return a character width analogous to wcwidth (except portable and
@@ -382,7 +410,7 @@ UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) {
 }
 
 UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) {
-  return utf8proc_get_property(c)->category;
+  return (utf8proc_category_t) utf8proc_get_property(c)->category;
 }
 
 UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
@@ -392,7 +420,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) {
 
 #define utf8proc_decompose_lump(replacement_uc) \
   return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
-  options & ~UTF8PROC_LUMP, last_boundclass)
+  options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)
 
 UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
   const utf8proc_property_t *property;
@@ -423,6 +451,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
   if (options & UTF8PROC_IGNORE) {
     if (property->ignorable) return 0;
   }
+  if (options & UTF8PROC_STRIPNA) {
+    if (!category) return 0;
+  }
   if (options & UTF8PROC_LUMP) {
     if (category == UTF8PROC_CATEGORY_ZS) utf8proc_decompose_lump(0x0020);
     if (uc == 0x2018 || uc == 0x2019 || uc == 0x02BC || uc == 0x02C8)
@@ -470,7 +501,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc,
     int tbc = property->boundclass;
     boundary = grapheme_break_extended(*last_boundclass, tbc, last_boundclass);
     if (boundary) {
-      if (bufsize >= 1) dst[0] = 0xFFFF;
+      if (bufsize >= 1) dst[0] = -1; /* sentinel value for grapheme break */
       if (bufsize >= 2) dst[1] = uc;
       return 2;
     }
@@ -676,7 +707,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
     if (options & UTF8PROC_CHARBOUND) {
         for (rpos = 0; rpos < length; rpos++) {
             uc = buffer[rpos];
-            wpos += unsafe_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
+            wpos += charbound_encode_char(uc, ((utf8proc_uint8_t *)buffer) + wpos);
         }
     } else {
         for (rpos = 0; rpos < length; rpos++) {
@@ -704,7 +735,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
   *dstptr = NULL;
   result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
   if (result < 0) return result;
-  buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
+  buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
   if (!buffer) return UTF8PROC_ERROR_NOMEM;
   result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
   if (result < 0) {
@@ -752,3 +783,10 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str)
     UTF8PROC_COMPOSE | UTF8PROC_COMPAT);
   return retval;
 }
+
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str) {
+  utf8proc_uint8_t *retval;
+  utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE |
+    UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE);
+  return retval;
+}

+ 49 - 33
contrib/libs/utf8proc/utf8proc.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
+ * Copyright (c) 2014-2021 Steven G. Johnson, Jiahao Chen, Peter Colberg, Tony Kelman, Scott P. Jones, and other contributors.
  * Copyright (c) 2009 Public Software Group e. V., Berlin, Germany
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -27,8 +27,8 @@
  *
  * utf8proc is a free/open-source (MIT/expat licensed) C library
  * providing Unicode normalization, case-folding, and other operations
- * for strings in the UTF-8 encoding, supporting Unicode version
- * 9.0.0.  See the utf8proc home page (http://julialang.org/utf8proc/)
+ * for strings in the UTF-8 encoding, supporting up-to-date Unicode versions.
+ * See the utf8proc home page (http://julialang.org/utf8proc/)
  * for downloads and other information, or the source code on github
  * (https://github.com/JuliaLang/utf8proc).
  *
@@ -71,9 +71,9 @@
 /** The MAJOR version number (increased when backwards API compatibility is broken). */
 #define UTF8PROC_VERSION_MAJOR 2
 /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
-#define UTF8PROC_VERSION_MINOR 1
+#define UTF8PROC_VERSION_MINOR 7
 /** The PATCH version (increased for fixes that do not change the API). */
-#define UTF8PROC_VERSION_PATCH 1
+#define UTF8PROC_VERSION_PATCH 0
 /** @} */
 
 #include <stdlib.h>
@@ -120,34 +120,12 @@ typedef bool utf8proc_bool;
 #endif
 #include <limits.h>
 
-#ifdef UTF8PROC_STATIC
-#  define UTF8PROC_DLLEXPORT
-#else
-#  ifdef _WIN32
-#    ifdef UTF8PROC_EXPORTS
-#      define UTF8PROC_DLLEXPORT __declspec(dllexport)
-#    else
-#      define UTF8PROC_DLLEXPORT __declspec(dllimport)
-#    endif
-#  elif __GNUC__ >= 4
-#    define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default")))
-#  else
-#    define UTF8PROC_DLLEXPORT
-#  endif
-#endif
+#define UTF8PROC_DLLEXPORT
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#ifndef SSIZE_MAX
-#define SSIZE_MAX ((size_t)SIZE_MAX/2)
-#endif
-
-#ifndef UINT16_MAX
-#  define UINT16_MAX 65535U
-#endif
-
 /**
  * Option flags used by several functions in the library.
  */
@@ -213,6 +191,10 @@ typedef enum {
    *       @ref UTF8PROC_DECOMPOSE
    */
   UTF8PROC_STRIPMARK = (1<<13),
+  /**
+   * Strip unassigned codepoints.
+   */
+  UTF8PROC_STRIPNA    = (1<<14),
 } utf8proc_option_t;
 
 /** @name Error codes
@@ -378,10 +360,18 @@ typedef enum {
   UTF8PROC_BOUNDCLASS_SPACINGMARK        = 12, /**< Spacingmark */
   UTF8PROC_BOUNDCLASS_PREPEND            = 13, /**< Prepend */
   UTF8PROC_BOUNDCLASS_ZWJ                = 14, /**< Zero Width Joiner */
+
+  /* the following are no longer used in Unicode 11, but we keep
+     the constants here for backward compatibility */
   UTF8PROC_BOUNDCLASS_E_BASE             = 15, /**< Emoji Base */
   UTF8PROC_BOUNDCLASS_E_MODIFIER         = 16, /**< Emoji Modifier */
   UTF8PROC_BOUNDCLASS_GLUE_AFTER_ZWJ     = 17, /**< Glue_After_ZWJ */
   UTF8PROC_BOUNDCLASS_E_BASE_GAZ         = 18, /**< E_BASE + GLUE_AFTER_ZJW */
+
+  /* the Extended_Pictographic property is used in the Unicode 11
+     grapheme-boundary rules, so we store it in the boundclass field */
+  UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC = 19,
+  UTF8PROC_BOUNDCLASS_E_ZWG = 20, /* UTF8PROC_BOUNDCLASS_EXTENDED_PICTOGRAPHIC + ZWJ */
 } utf8proc_boundclass_t;
 
 /**
@@ -404,6 +394,11 @@ UTF8PROC_DLLEXPORT extern const utf8proc_int8_t utf8proc_utf8class[256];
  */
 UTF8PROC_DLLEXPORT const char *utf8proc_version(void);
 
+/**
+ * Returns the utf8proc supported Unicode version as a string MAJOR.MINOR.PATCH.
+ */
+UTF8PROC_DLLEXPORT const char *utf8proc_unicode_version(void);
+
 /**
  * Returns an informative error string for the given utf8proc error code
  * (e.g. the error codes returned by @ref utf8proc_map).
@@ -469,6 +464,7 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(utf8proc_int
  * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster
  * - @ref UTF8PROC_LUMP      - lump certain different codepoints together
  * - @ref UTF8PROC_STRIPMARK - remove all character marks
+ * - @ref UTF8PROC_STRIPNA   - remove unassigned codepoints
  * @param last_boundclass
  * Pointer to an integer variable containing
  * the previous codepoint's boundary class if the @ref UTF8PROC_CHARBOUND
@@ -492,7 +488,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(
  * string and orders the decomposed sequences correctly.
  *
  * If the @ref UTF8PROC_NULLTERM flag in `options` is set, processing
- * will be stopped, when a NULL byte is encounted, otherwise `strlen`
+ * will be stopped, when a NULL byte is encountered, otherwise `strlen`
  * bytes are processed.  The result (in the form of 32-bit unicode
  * codepoints) is written into the buffer being pointed to by
  * `buffer` (which must contain at least `bufsize` entries).  In case of
@@ -580,6 +576,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
  * Given a pair of consecutive codepoints, return whether a grapheme break is
  * permitted between them (as defined by the extended grapheme clusters in UAX#29).
  *
+ * @param codepoint1 The first codepoint.
+ * @param codepoint2 The second codepoint, occurring consecutively after `codepoint1`.
  * @param state Beginning with Version 29 (Unicode 9.0.0), this algorithm requires
  *              state to break graphemes. This state can be passed in as a pointer
  *              in the `state` argument and should initially be set to 0. If the
@@ -588,7 +586,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer,
  *              matching the rules in Unicode 8.0.0.
  *
  * @warning If the state parameter is used, `utf8proc_grapheme_break_stateful` must
- *          be called IN ORDER on ALL potential breaks in a string.
+ *          be called IN ORDER on ALL potential breaks in a string.  However, it
+ *          is safe to reset the state to zero after a grapheme break.
  */
 UTF8PROC_DLLEXPORT utf8proc_bool utf8proc_grapheme_break_stateful(
     utf8proc_int32_t codepoint1, utf8proc_int32_t codepoint2, utf8proc_int32_t *state);
@@ -622,6 +621,18 @@ UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c);
  */
 UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c);
 
+/**
+ * Given a codepoint `c`, return `1` if the codepoint corresponds to a lower-case character
+ * and `0` otherwise.
+ */
+UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c);
+
+/**
+ * Given a codepoint `c`, return `1` if the codepoint corresponds to an upper-case character
+ * and `0` otherwise.
+ */
+UTF8PROC_DLLEXPORT int utf8proc_isupper(utf8proc_int32_t c);
+
 /**
  * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
  * except that a width of 0 is returned for non-printable codepoints
@@ -655,7 +666,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
  * contain NULL characters with the string if `str` contained NULL
  * characters). Other flags in the `options` field are passed to the
  * functions defined above, and regarded as described.  See also
- * @ref utfproc_map_custom to supply a custom codepoint transformation.
+ * @ref utf8proc_map_custom to supply a custom codepoint transformation.
  *
  * In case of success the length of the new string is returned,
  * otherwise a negative error code is returned.
@@ -680,8 +691,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
 
 /** @name Unicode normalization
  *
- * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC
- * normalized version of the null-terminated string `str`.  These
+ * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD, NFKC or
+ * NFKC_Casefold normalized version of the null-terminated string `str`.  These
  * are shortcuts to calling @ref utf8proc_map with @ref UTF8PROC_NULLTERM
  * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
  */
@@ -694,6 +705,11 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str);
 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str);
 /** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */
 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
+/**
+ * NFKC_Casefold normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT
+ * and @ref UTF8PROC_CASEFOLD and @ref UTF8PROC_IGNORE).
+ **/
+UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
 /** @} */
 
 #ifdef __cplusplus

File diff suppressed because it is too large
+ 753 - 714
contrib/libs/utf8proc/utf8proc_data.c


Some files were not shown because too many files changed in this diff