LiteralSupport.h 10 KB


  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the NumericLiteralParser, CharLiteralParser, and
  15. // StringLiteralParser interfaces.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
  19. #define LLVM_CLANG_LEX_LITERALSUPPORT_H
  20. #include "clang/Basic/CharInfo.h"
  21. #include "clang/Basic/LLVM.h"
  22. #include "clang/Basic/TokenKinds.h"
  23. #include "llvm/ADT/APFloat.h"
  24. #include "llvm/ADT/ArrayRef.h"
  25. #include "llvm/ADT/SmallString.h"
  26. #include "llvm/ADT/StringRef.h"
  27. #include "llvm/Support/DataTypes.h"
  28. namespace clang {
  29. class DiagnosticsEngine;
  30. class Preprocessor;
  31. class Token;
  32. class SourceLocation;
  33. class TargetInfo;
  34. class SourceManager;
  35. class LangOptions;
  36. /// Copy characters from Input to Buf, expanding any UCNs.
  37. void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
  38. /// NumericLiteralParser - This performs strict semantic analysis of the content
  39. /// of a ppnumber, classifying it as either integer, floating, or erroneous,
  40. /// determines the radix of the value and can convert it to a useful value.
  41. class NumericLiteralParser {
  42. const SourceManager &SM;
  43. const LangOptions &LangOpts;
  44. DiagnosticsEngine &Diags;
  45. const char *const ThisTokBegin;
  46. const char *const ThisTokEnd;
  47. const char *DigitsBegin, *SuffixBegin; // markers
  48. const char *s; // cursor
  49. unsigned radix;
  50. bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
  51. SmallString<32> UDSuffixBuf;
  52. public:
  53. NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
  54. const SourceManager &SM, const LangOptions &LangOpts,
  55. const TargetInfo &Target, DiagnosticsEngine &Diags);
  56. bool hadError : 1;
  57. bool isUnsigned : 1;
  58. bool isLong : 1; // This is *not* set for long long.
  59. bool isLongLong : 1;
  60. bool isSizeT : 1; // 1z, 1uz (C++2b)
  61. bool isHalf : 1; // 1.0h
  62. bool isFloat : 1; // 1.0f
  63. bool isImaginary : 1; // 1.0i
  64. bool isFloat16 : 1; // 1.0f16
  65. bool isFloat128 : 1; // 1.0q
  66. uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
  67. bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
  68. bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
  69. bool isFixedPointLiteral() const {
  70. return (saw_period || saw_exponent) && saw_fixed_point_suffix;
  71. }
  72. bool isIntegerLiteral() const {
  73. return !saw_period && !saw_exponent && !isFixedPointLiteral();
  74. }
  75. bool isFloatingLiteral() const {
  76. return (saw_period || saw_exponent) && !isFixedPointLiteral();
  77. }
  78. bool hasUDSuffix() const {
  79. return saw_ud_suffix;
  80. }
  81. StringRef getUDSuffix() const {
  82. assert(saw_ud_suffix);
  83. return UDSuffixBuf;
  84. }
  85. unsigned getUDSuffixOffset() const {
  86. assert(saw_ud_suffix);
  87. return SuffixBegin - ThisTokBegin;
  88. }
  89. static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
  90. unsigned getRadix() const { return radix; }
  91. /// GetIntegerValue - Convert this numeric literal value to an APInt that
  92. /// matches Val's input width. If there is an overflow (i.e., if the unsigned
  93. /// value read is larger than the APInt's bits will hold), set Val to the low
  94. /// bits of the result and return true. Otherwise, return false.
  95. bool GetIntegerValue(llvm::APInt &Val);
  96. /// GetFloatValue - Convert this numeric literal to a floating value, using
  97. /// the specified APFloat fltSemantics (specifying float, double, etc).
  98. /// The optional bool isExact (passed-by-reference) has its value
  99. /// set to true if the returned APFloat can represent the number in the
  100. /// literal exactly, and false otherwise.
  101. llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
  102. /// GetFixedPointValue - Convert this numeric literal value into a
  103. /// scaled integer that represents this value. Returns true if an overflow
  104. /// occurred when calculating the integral part of the scaled integer or
  105. /// calculating the digit sequence of the exponent.
  106. bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
  107. private:
  108. void ParseNumberStartingWithZero(SourceLocation TokLoc);
  109. void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
  110. static bool isDigitSeparator(char C) { return C == '\''; }
  111. /// Determine whether the sequence of characters [Start, End) contains
  112. /// any real digits (not digit separators).
  113. bool containsDigits(const char *Start, const char *End) {
  114. return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
  115. }
  116. enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
  117. /// Ensure that we don't have a digit separator here.
  118. void checkSeparator(SourceLocation TokLoc, const char *Pos,
  119. CheckSeparatorKind IsAfterDigits);
  120. /// SkipHexDigits - Read and skip over any hex digits, up to End.
  121. /// Return a pointer to the first non-hex digit or End.
  122. const char *SkipHexDigits(const char *ptr) {
  123. while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
  124. ptr++;
  125. return ptr;
  126. }
  127. /// SkipOctalDigits - Read and skip over any octal digits, up to End.
  128. /// Return a pointer to the first non-hex digit or End.
  129. const char *SkipOctalDigits(const char *ptr) {
  130. while (ptr != ThisTokEnd &&
  131. ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
  132. ptr++;
  133. return ptr;
  134. }
  135. /// SkipDigits - Read and skip over any digits, up to End.
  136. /// Return a pointer to the first non-hex digit or End.
  137. const char *SkipDigits(const char *ptr) {
  138. while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
  139. ptr++;
  140. return ptr;
  141. }
  142. /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
  143. /// Return a pointer to the first non-binary digit or End.
  144. const char *SkipBinaryDigits(const char *ptr) {
  145. while (ptr != ThisTokEnd &&
  146. (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
  147. ptr++;
  148. return ptr;
  149. }
  150. };
  151. /// CharLiteralParser - Perform interpretation and semantic analysis of a
  152. /// character literal.
  153. class CharLiteralParser {
  154. uint64_t Value;
  155. tok::TokenKind Kind;
  156. bool IsMultiChar;
  157. bool HadError;
  158. SmallString<32> UDSuffixBuf;
  159. unsigned UDSuffixOffset;
  160. public:
  161. CharLiteralParser(const char *begin, const char *end,
  162. SourceLocation Loc, Preprocessor &PP,
  163. tok::TokenKind kind);
  164. bool hadError() const { return HadError; }
  165. bool isAscii() const { return Kind == tok::char_constant; }
  166. bool isWide() const { return Kind == tok::wide_char_constant; }
  167. bool isUTF8() const { return Kind == tok::utf8_char_constant; }
  168. bool isUTF16() const { return Kind == tok::utf16_char_constant; }
  169. bool isUTF32() const { return Kind == tok::utf32_char_constant; }
  170. bool isMultiChar() const { return IsMultiChar; }
  171. uint64_t getValue() const { return Value; }
  172. StringRef getUDSuffix() const { return UDSuffixBuf; }
  173. unsigned getUDSuffixOffset() const {
  174. assert(!UDSuffixBuf.empty() && "no ud-suffix");
  175. return UDSuffixOffset;
  176. }
  177. };
  178. /// StringLiteralParser - This decodes string escape characters and performs
  179. /// wide string analysis and Translation Phase #6 (concatenation of string
  180. /// literals) (C99 5.1.1.2p1).
  181. class StringLiteralParser {
  182. const SourceManager &SM;
  183. const LangOptions &Features;
  184. const TargetInfo &Target;
  185. DiagnosticsEngine *Diags;
  186. unsigned MaxTokenLength;
  187. unsigned SizeBound;
  188. unsigned CharByteWidth;
  189. tok::TokenKind Kind;
  190. SmallString<512> ResultBuf;
  191. char *ResultPtr; // cursor
  192. SmallString<32> UDSuffixBuf;
  193. unsigned UDSuffixToken;
  194. unsigned UDSuffixOffset;
  195. public:
  196. StringLiteralParser(ArrayRef<Token> StringToks,
  197. Preprocessor &PP);
  198. StringLiteralParser(ArrayRef<Token> StringToks,
  199. const SourceManager &sm, const LangOptions &features,
  200. const TargetInfo &target,
  201. DiagnosticsEngine *diags = nullptr)
  202. : SM(sm), Features(features), Target(target), Diags(diags),
  203. MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
  204. ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
  205. init(StringToks);
  206. }
  207. bool hadError;
  208. bool Pascal;
  209. StringRef GetString() const {
  210. return StringRef(ResultBuf.data(), GetStringLength());
  211. }
  212. unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
  213. unsigned GetNumStringChars() const {
  214. return GetStringLength() / CharByteWidth;
  215. }
  216. /// getOffsetOfStringByte - This function returns the offset of the
  217. /// specified byte of the string data represented by Token. This handles
  218. /// advancing over escape sequences in the string.
  219. ///
  220. /// If the Diagnostics pointer is non-null, then this will do semantic
  221. /// checking of the string literal and emit errors and warnings.
  222. unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
  223. bool isAscii() const { return Kind == tok::string_literal; }
  224. bool isWide() const { return Kind == tok::wide_string_literal; }
  225. bool isUTF8() const { return Kind == tok::utf8_string_literal; }
  226. bool isUTF16() const { return Kind == tok::utf16_string_literal; }
  227. bool isUTF32() const { return Kind == tok::utf32_string_literal; }
  228. bool isPascal() const { return Pascal; }
  229. StringRef getUDSuffix() const { return UDSuffixBuf; }
  230. /// Get the index of a token containing a ud-suffix.
  231. unsigned getUDSuffixToken() const {
  232. assert(!UDSuffixBuf.empty() && "no ud-suffix");
  233. return UDSuffixToken;
  234. }
  235. /// Get the spelling offset of the first byte of the ud-suffix.
  236. unsigned getUDSuffixOffset() const {
  237. assert(!UDSuffixBuf.empty() && "no ud-suffix");
  238. return UDSuffixOffset;
  239. }
  240. static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
  241. private:
  242. void init(ArrayRef<Token> StringToks);
  243. bool CopyStringFragment(const Token &Tok, const char *TokBegin,
  244. StringRef Fragment);
  245. void DiagnoseLexingError(SourceLocation Loc);
  246. };
  247. } // end namespace clang
  248. #endif
  249. #ifdef __GNUC__
  250. #pragma GCC diagnostic pop
  251. #endif