Unicode.h 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Support/Unicode.h - Unicode character properties -*- C++ -*-=====//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines functions that allow querying certain properties of Unicode
  15. // characters.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_SUPPORT_UNICODE_H
  19. #define LLVM_SUPPORT_UNICODE_H
  20. namespace llvm {
  21. class StringRef;
  22. namespace sys {
  23. namespace unicode {
  24. enum ColumnWidthErrors {
  25. ErrorInvalidUTF8 = -2,
  26. ErrorNonPrintableCharacter = -1
  27. };
  28. /// Determines if a character is likely to be displayed correctly on the
  29. /// terminal. Exact implementation would have to depend on the specific
  30. /// terminal, so we define the semantic that should be suitable for generic case
  31. /// of a terminal capable to output Unicode characters.
  32. ///
  33. /// All characters from the Unicode code point range are considered printable
  34. /// except for:
  35. /// * C0 and C1 control character ranges;
  36. /// * default ignorable code points as per 5.21 of
  37. /// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
  38. /// except for U+00AD SOFT HYPHEN, as it's actually displayed on most
  39. /// terminals;
  40. /// * format characters (category = Cf);
  41. /// * surrogates (category = Cs);
  42. /// * unassigned characters (category = Cn).
  43. /// \return true if the character is considered printable.
  44. bool isPrintable(int UCS);
  45. /// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
  46. /// when output on a terminal ("character width"). This depends on the
  47. /// implementation of the terminal, and there's no standard definition of
  48. /// character width.
  49. ///
  50. /// The implementation defines it in a way that is expected to be compatible
  51. /// with a generic Unicode-capable terminal.
  52. ///
  53. /// \return Character width:
  54. /// * ErrorNonPrintableCharacter (-1) if \p Text contains non-printable
  55. /// characters (as identified by isPrintable);
  56. /// * 0 for each non-spacing and enclosing combining mark;
  57. /// * 2 for each CJK character excluding halfwidth forms;
  58. /// * 1 for each of the remaining characters.
  59. int columnWidthUTF8(StringRef Text);
  60. /// Fold input unicode character according the Simple unicode case folding
  61. /// rules.
  62. int foldCharSimple(int C);
  63. } // namespace unicode
  64. } // namespace sys
  65. } // namespace llvm
  66. #endif
  67. #ifdef __GNUC__
  68. #pragma GCC diagnostic pop
  69. #endif