123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081 |
- #pragma once
- #ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-parameter"
- #endif
- //===- llvm/Support/Unicode.h - Unicode character properties -*- C++ -*-=====//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines functions that allow querying certain properties of Unicode
- // characters.
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_SUPPORT_UNICODE_H
- #define LLVM_SUPPORT_UNICODE_H
- namespace llvm {
- class StringRef;
- namespace sys {
- namespace unicode {
- enum ColumnWidthErrors {
- ErrorInvalidUTF8 = -2,
- ErrorNonPrintableCharacter = -1
- };
- /// Determines if a character is likely to be displayed correctly on the
- /// terminal. Exact implementation would have to depend on the specific
- /// terminal, so we define the semantic that should be suitable for generic case
- /// of a terminal capable to output Unicode characters.
- ///
- /// All characters from the Unicode code point range are considered printable
- /// except for:
- /// * C0 and C1 control character ranges;
- /// * default ignorable code points as per 5.21 of
- /// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
- /// except for U+00AD SOFT HYPHEN, as it's actually displayed on most
- /// terminals;
- /// * format characters (category = Cf);
- /// * surrogates (category = Cs);
- /// * unassigned characters (category = Cn).
- /// \return true if the character is considered printable.
- bool isPrintable(int UCS);
- /// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
- /// when output on a terminal ("character width"). This depends on the
- /// implementation of the terminal, and there's no standard definition of
- /// character width.
- ///
- /// The implementation defines it in a way that is expected to be compatible
- /// with a generic Unicode-capable terminal.
- ///
- /// \return Character width:
- /// * ErrorNonPrintableCharacter (-1) if \p Text contains non-printable
- /// characters (as identified by isPrintable);
- /// * 0 for each non-spacing and enclosing combining mark;
- /// * 2 for each CJK character excluding halfwidth forms;
- /// * 1 for each of the remaining characters.
- int columnWidthUTF8(StringRef Text);
- /// Fold input unicode character according the Simple unicode case folding
- /// rules.
- int foldCharSimple(int C);
- } // namespace unicode
- } // namespace sys
- } // namespace llvm
- #endif
- #ifdef __GNUC__
- #pragma GCC diagnostic pop
- #endif
|