LiteralSupport.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the NumericLiteralParser, CharLiteralParser, and
  15. // StringLiteralParser interfaces.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
  19. #define LLVM_CLANG_LEX_LITERALSUPPORT_H
  20. #include "clang/Basic/CharInfo.h"
  21. #include "clang/Basic/LLVM.h"
  22. #include "clang/Basic/TokenKinds.h"
  23. #include "llvm/ADT/APFloat.h"
  24. #include "llvm/ADT/ArrayRef.h"
  25. #include "llvm/ADT/SmallString.h"
  26. #include "llvm/ADT/StringRef.h"
  27. #include "llvm/Support/DataTypes.h"
  28. namespace clang {
  29. class DiagnosticsEngine;
  30. class Preprocessor;
  31. class Token;
  32. class SourceLocation;
  33. class TargetInfo;
  34. class SourceManager;
  35. class LangOptions;
  36. /// Copy characters from Input to Buf, expanding any UCNs.
  37. void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
  38. /// NumericLiteralParser - This performs strict semantic analysis of the content
  39. /// of a ppnumber, classifying it as either integer, floating, or erroneous,
  40. /// determines the radix of the value and can convert it to a useful value.
  41. class NumericLiteralParser {
  42. const SourceManager &SM;
  43. const LangOptions &LangOpts;
  44. DiagnosticsEngine &Diags;
  45. const char *const ThisTokBegin;
  46. const char *const ThisTokEnd;
  47. const char *DigitsBegin, *SuffixBegin; // markers
  48. const char *s; // cursor
  49. unsigned radix;
  50. bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix;
  51. SmallString<32> UDSuffixBuf;
  52. public:
  53. NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc,
  54. const SourceManager &SM, const LangOptions &LangOpts,
  55. const TargetInfo &Target, DiagnosticsEngine &Diags);
  56. bool hadError : 1;
  57. bool isUnsigned : 1;
  58. bool isLong : 1; // This is *not* set for long long.
  59. bool isLongLong : 1;
  60. bool isSizeT : 1; // 1z, 1uz (C++2b)
  61. bool isHalf : 1; // 1.0h
  62. bool isFloat : 1; // 1.0f
  63. bool isImaginary : 1; // 1.0i
  64. bool isFloat16 : 1; // 1.0f16
  65. bool isFloat128 : 1; // 1.0q
  66. bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr
  67. bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk
  68. bool isBitInt : 1; // 1wb, 1uwb (C2x)
  69. uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
  70. bool isFixedPointLiteral() const {
  71. return (saw_period || saw_exponent) && saw_fixed_point_suffix;
  72. }
  73. bool isIntegerLiteral() const {
  74. return !saw_period && !saw_exponent && !isFixedPointLiteral();
  75. }
  76. bool isFloatingLiteral() const {
  77. return (saw_period || saw_exponent) && !isFixedPointLiteral();
  78. }
  79. bool hasUDSuffix() const {
  80. return saw_ud_suffix;
  81. }
  82. StringRef getUDSuffix() const {
  83. assert(saw_ud_suffix);
  84. return UDSuffixBuf;
  85. }
  86. unsigned getUDSuffixOffset() const {
  87. assert(saw_ud_suffix);
  88. return SuffixBegin - ThisTokBegin;
  89. }
  90. static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
  91. unsigned getRadix() const { return radix; }
  92. /// GetIntegerValue - Convert this numeric literal value to an APInt that
  93. /// matches Val's input width. If there is an overflow (i.e., if the unsigned
  94. /// value read is larger than the APInt's bits will hold), set Val to the low
  95. /// bits of the result and return true. Otherwise, return false.
  96. bool GetIntegerValue(llvm::APInt &Val);
  97. /// GetFloatValue - Convert this numeric literal to a floating value, using
  98. /// the specified APFloat fltSemantics (specifying float, double, etc).
  99. /// The optional bool isExact (passed-by-reference) has its value
  100. /// set to true if the returned APFloat can represent the number in the
  101. /// literal exactly, and false otherwise.
  102. llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
  103. /// GetFixedPointValue - Convert this numeric literal value into a
  104. /// scaled integer that represents this value. Returns true if an overflow
  105. /// occurred when calculating the integral part of the scaled integer or
  106. /// calculating the digit sequence of the exponent.
  107. bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale);
  108. /// Get the digits that comprise the literal. This excludes any prefix or
  109. /// suffix associated with the literal.
  110. StringRef getLiteralDigits() const {
  111. assert(!hadError && "cannot reliably get the literal digits with an error");
  112. return StringRef(DigitsBegin, SuffixBegin - DigitsBegin);
  113. }
  114. private:
  115. void ParseNumberStartingWithZero(SourceLocation TokLoc);
  116. void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
  117. static bool isDigitSeparator(char C) { return C == '\''; }
  118. /// Determine whether the sequence of characters [Start, End) contains
  119. /// any real digits (not digit separators).
  120. bool containsDigits(const char *Start, const char *End) {
  121. return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
  122. }
  123. enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
  124. /// Ensure that we don't have a digit separator here.
  125. void checkSeparator(SourceLocation TokLoc, const char *Pos,
  126. CheckSeparatorKind IsAfterDigits);
  127. /// SkipHexDigits - Read and skip over any hex digits, up to End.
  128. /// Return a pointer to the first non-hex digit or End.
  129. const char *SkipHexDigits(const char *ptr) {
  130. while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
  131. ptr++;
  132. return ptr;
  133. }
  134. /// SkipOctalDigits - Read and skip over any octal digits, up to End.
  135. /// Return a pointer to the first non-hex digit or End.
  136. const char *SkipOctalDigits(const char *ptr) {
  137. while (ptr != ThisTokEnd &&
  138. ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
  139. ptr++;
  140. return ptr;
  141. }
  142. /// SkipDigits - Read and skip over any digits, up to End.
  143. /// Return a pointer to the first non-hex digit or End.
  144. const char *SkipDigits(const char *ptr) {
  145. while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
  146. ptr++;
  147. return ptr;
  148. }
  149. /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
  150. /// Return a pointer to the first non-binary digit or End.
  151. const char *SkipBinaryDigits(const char *ptr) {
  152. while (ptr != ThisTokEnd &&
  153. (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
  154. ptr++;
  155. return ptr;
  156. }
  157. };
  158. /// CharLiteralParser - Perform interpretation and semantic analysis of a
  159. /// character literal.
  160. class CharLiteralParser {
  161. uint64_t Value;
  162. tok::TokenKind Kind;
  163. bool IsMultiChar;
  164. bool HadError;
  165. SmallString<32> UDSuffixBuf;
  166. unsigned UDSuffixOffset;
  167. public:
  168. CharLiteralParser(const char *begin, const char *end,
  169. SourceLocation Loc, Preprocessor &PP,
  170. tok::TokenKind kind);
  171. bool hadError() const { return HadError; }
  172. bool isOrdinary() const { return Kind == tok::char_constant; }
  173. bool isWide() const { return Kind == tok::wide_char_constant; }
  174. bool isUTF8() const { return Kind == tok::utf8_char_constant; }
  175. bool isUTF16() const { return Kind == tok::utf16_char_constant; }
  176. bool isUTF32() const { return Kind == tok::utf32_char_constant; }
  177. bool isMultiChar() const { return IsMultiChar; }
  178. uint64_t getValue() const { return Value; }
  179. StringRef getUDSuffix() const { return UDSuffixBuf; }
  180. unsigned getUDSuffixOffset() const {
  181. assert(!UDSuffixBuf.empty() && "no ud-suffix");
  182. return UDSuffixOffset;
  183. }
  184. };
  185. /// StringLiteralParser - This decodes string escape characters and performs
  186. /// wide string analysis and Translation Phase #6 (concatenation of string
  187. /// literals) (C99 5.1.1.2p1).
  188. class StringLiteralParser {
  189. const SourceManager &SM;
  190. const LangOptions &Features;
  191. const TargetInfo &Target;
  192. DiagnosticsEngine *Diags;
  193. unsigned MaxTokenLength;
  194. unsigned SizeBound;
  195. unsigned CharByteWidth;
  196. tok::TokenKind Kind;
  197. SmallString<512> ResultBuf;
  198. char *ResultPtr; // cursor
  199. SmallString<32> UDSuffixBuf;
  200. unsigned UDSuffixToken;
  201. unsigned UDSuffixOffset;
  202. public:
  203. StringLiteralParser(ArrayRef<Token> StringToks,
  204. Preprocessor &PP);
  205. StringLiteralParser(ArrayRef<Token> StringToks,
  206. const SourceManager &sm, const LangOptions &features,
  207. const TargetInfo &target,
  208. DiagnosticsEngine *diags = nullptr)
  209. : SM(sm), Features(features), Target(target), Diags(diags),
  210. MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
  211. ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
  212. init(StringToks);
  213. }
  214. bool hadError;
  215. bool Pascal;
  216. StringRef GetString() const {
  217. return StringRef(ResultBuf.data(), GetStringLength());
  218. }
  219. unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
  220. unsigned GetNumStringChars() const {
  221. return GetStringLength() / CharByteWidth;
  222. }
  223. /// getOffsetOfStringByte - This function returns the offset of the
  224. /// specified byte of the string data represented by Token. This handles
  225. /// advancing over escape sequences in the string.
  226. ///
  227. /// If the Diagnostics pointer is non-null, then this will do semantic
  228. /// checking of the string literal and emit errors and warnings.
  229. unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
  230. bool isOrdinary() const { return Kind == tok::string_literal; }
  231. bool isWide() const { return Kind == tok::wide_string_literal; }
  232. bool isUTF8() const { return Kind == tok::utf8_string_literal; }
  233. bool isUTF16() const { return Kind == tok::utf16_string_literal; }
  234. bool isUTF32() const { return Kind == tok::utf32_string_literal; }
  235. bool isPascal() const { return Pascal; }
  236. StringRef getUDSuffix() const { return UDSuffixBuf; }
  237. /// Get the index of a token containing a ud-suffix.
  238. unsigned getUDSuffixToken() const {
  239. assert(!UDSuffixBuf.empty() && "no ud-suffix");
  240. return UDSuffixToken;
  241. }
  242. /// Get the spelling offset of the first byte of the ud-suffix.
  243. unsigned getUDSuffixOffset() const {
  244. assert(!UDSuffixBuf.empty() && "no ud-suffix");
  245. return UDSuffixOffset;
  246. }
  247. static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
  248. private:
  249. void init(ArrayRef<Token> StringToks);
  250. bool CopyStringFragment(const Token &Tok, const char *TokBegin,
  251. StringRef Fragment);
  252. void DiagnoseLexingError(SourceLocation Loc);
  253. };
  254. } // end namespace clang
  255. #endif
  256. #ifdef __GNUC__
  257. #pragma GCC diagnostic pop
  258. #endif