Preprocessor.h 95 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. /// \file
  15. /// Defines the clang::Preprocessor interface.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
  19. #define LLVM_CLANG_LEX_PREPROCESSOR_H
  20. #include "clang/Basic/Diagnostic.h"
  21. #include "clang/Basic/DiagnosticIDs.h"
  22. #include "clang/Basic/IdentifierTable.h"
  23. #include "clang/Basic/LLVM.h"
  24. #include "clang/Basic/LangOptions.h"
  25. #include "clang/Basic/Module.h"
  26. #include "clang/Basic/SourceLocation.h"
  27. #include "clang/Basic/SourceManager.h"
  28. #include "clang/Basic/TokenKinds.h"
  29. #include "clang/Lex/Lexer.h"
  30. #include "clang/Lex/MacroInfo.h"
  31. #include "clang/Lex/ModuleLoader.h"
  32. #include "clang/Lex/ModuleMap.h"
  33. #include "clang/Lex/PPCallbacks.h"
  34. #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
  35. #include "clang/Lex/Token.h"
  36. #include "clang/Lex/TokenLexer.h"
  37. #include "llvm/ADT/ArrayRef.h"
  38. #include "llvm/ADT/DenseMap.h"
  39. #include "llvm/ADT/FoldingSet.h"
  40. #include "llvm/ADT/FunctionExtras.h"
  41. #include "llvm/ADT/None.h"
  42. #include "llvm/ADT/Optional.h"
  43. #include "llvm/ADT/PointerUnion.h"
  44. #include "llvm/ADT/STLExtras.h"
  45. #include "llvm/ADT/SmallPtrSet.h"
  46. #include "llvm/ADT/SmallVector.h"
  47. #include "llvm/ADT/StringRef.h"
  48. #include "llvm/ADT/TinyPtrVector.h"
  49. #include "llvm/ADT/iterator_range.h"
  50. #include "llvm/Support/Allocator.h"
  51. #include "llvm/Support/Casting.h"
  52. #include "llvm/Support/Registry.h"
  53. #include <cassert>
  54. #include <cstddef>
  55. #include <cstdint>
  56. #include <map>
  57. #include <memory>
  58. #include <string>
  59. #include <utility>
  60. #include <vector>
  61. namespace llvm {
  62. template<unsigned InternalLen> class SmallString;
  63. } // namespace llvm
  64. namespace clang {
  65. class CodeCompletionHandler;
  66. class CommentHandler;
  67. class DirectoryEntry;
  68. class DirectoryLookup;
  69. class EmptylineHandler;
  70. class ExternalPreprocessorSource;
  71. class FileEntry;
  72. class FileManager;
  73. class HeaderSearch;
  74. class MacroArgs;
  75. class PragmaHandler;
  76. class PragmaNamespace;
  77. class PreprocessingRecord;
  78. class PreprocessorLexer;
  79. class PreprocessorOptions;
  80. class ScratchBuffer;
  81. class TargetInfo;
  82. namespace Builtin {
  83. class Context;
  84. }
  85. /// Stores token information for comparing actual tokens with
  86. /// predefined values. Only handles simple tokens and identifiers.
  87. class TokenValue {
  88. tok::TokenKind Kind;
  89. IdentifierInfo *II;
  90. public:
  91. TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
  92. assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
  93. assert(Kind != tok::identifier &&
  94. "Identifiers should be created by TokenValue(IdentifierInfo *)");
  95. assert(!tok::isLiteral(Kind) && "Literals are not supported.");
  96. assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
  97. }
  98. TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
  99. bool operator==(const Token &Tok) const {
  100. return Tok.getKind() == Kind &&
  101. (!II || II == Tok.getIdentifierInfo());
  102. }
  103. };
  104. /// Context in which macro name is used.
  105. enum MacroUse {
  106. // other than #define or #undef
  107. MU_Other = 0,
  108. // macro name specified in #define
  109. MU_Define = 1,
  110. // macro name specified in #undef
  111. MU_Undef = 2
  112. };
  113. /// Engages in a tight little dance with the lexer to efficiently
  114. /// preprocess tokens.
  115. ///
  116. /// Lexers know only about tokens within a single source file, and don't
  117. /// know anything about preprocessor-level issues like the \#include stack,
  118. /// token expansion, etc.
  119. class Preprocessor {
  120. friend class VAOptDefinitionContext;
  121. friend class VariadicMacroScopeGuard;
  122. llvm::unique_function<void(const clang::Token &)> OnToken;
  123. std::shared_ptr<PreprocessorOptions> PPOpts;
  124. DiagnosticsEngine *Diags;
  125. LangOptions &LangOpts;
  126. const TargetInfo *Target = nullptr;
  127. const TargetInfo *AuxTarget = nullptr;
  128. FileManager &FileMgr;
  129. SourceManager &SourceMgr;
  130. std::unique_ptr<ScratchBuffer> ScratchBuf;
  131. HeaderSearch &HeaderInfo;
  132. ModuleLoader &TheModuleLoader;
  133. /// External source of macros.
  134. ExternalPreprocessorSource *ExternalSource;
  135. /// A BumpPtrAllocator object used to quickly allocate and release
  136. /// objects internal to the Preprocessor.
  137. llvm::BumpPtrAllocator BP;
  138. /// Identifiers for builtin macros and other builtins.
  139. IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
  140. IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
  141. IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
  142. IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
  143. IdentifierInfo *Ident__FILE_NAME__; // __FILE_NAME__
  144. IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
  145. IdentifierInfo *Ident__COUNTER__; // __COUNTER__
  146. IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
  147. IdentifierInfo *Ident__identifier; // __identifier
  148. IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
  149. IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
  150. IdentifierInfo *Ident__has_feature; // __has_feature
  151. IdentifierInfo *Ident__has_extension; // __has_extension
  152. IdentifierInfo *Ident__has_builtin; // __has_builtin
  153. IdentifierInfo *Ident__has_attribute; // __has_attribute
  154. IdentifierInfo *Ident__has_include; // __has_include
  155. IdentifierInfo *Ident__has_include_next; // __has_include_next
  156. IdentifierInfo *Ident__has_warning; // __has_warning
  157. IdentifierInfo *Ident__is_identifier; // __is_identifier
  158. IdentifierInfo *Ident__building_module; // __building_module
  159. IdentifierInfo *Ident__MODULE__; // __MODULE__
  160. IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
  161. IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
  162. IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
  163. IdentifierInfo *Ident__is_target_arch; // __is_target_arch
  164. IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
  165. IdentifierInfo *Ident__is_target_os; // __is_target_os
  166. IdentifierInfo *Ident__is_target_environment; // __is_target_environment
  167. // Weak, only valid (and set) while InMacroArgs is true.
  168. Token* ArgMacro;
  169. SourceLocation DATELoc, TIMELoc;
  170. // Next __COUNTER__ value, starts at 0.
  171. unsigned CounterValue = 0;
  172. enum {
  173. /// Maximum depth of \#includes.
  174. MaxAllowedIncludeStackDepth = 200
  175. };
  176. // State that is set before the preprocessor begins.
  177. bool KeepComments : 1;
  178. bool KeepMacroComments : 1;
  179. bool SuppressIncludeNotFoundError : 1;
  180. // State that changes while the preprocessor runs:
  181. bool InMacroArgs : 1; // True if parsing fn macro invocation args.
  182. /// Whether the preprocessor owns the header search object.
  183. bool OwnsHeaderSearch : 1;
  184. /// True if macro expansion is disabled.
  185. bool DisableMacroExpansion : 1;
  186. /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
  187. /// when parsing preprocessor directives.
  188. bool MacroExpansionInDirectivesOverride : 1;
  189. class ResetMacroExpansionHelper;
  190. /// Whether we have already loaded macros from the external source.
  191. mutable bool ReadMacrosFromExternalSource : 1;
  192. /// True if pragmas are enabled.
  193. bool PragmasEnabled : 1;
  194. /// True if the current build action is a preprocessing action.
  195. bool PreprocessedOutput : 1;
  196. /// True if we are currently preprocessing a #if or #elif directive
  197. bool ParsingIfOrElifDirective;
  198. /// True if we are pre-expanding macro arguments.
  199. bool InMacroArgPreExpansion;
  200. /// Mapping/lookup information for all identifiers in
  201. /// the program, including program keywords.
  202. mutable IdentifierTable Identifiers;
  203. /// This table contains all the selectors in the program.
  204. ///
  205. /// Unlike IdentifierTable above, this table *isn't* populated by the
  206. /// preprocessor. It is declared/expanded here because its role/lifetime is
  207. /// conceptually similar to the IdentifierTable. In addition, the current
  208. /// control flow (in clang::ParseAST()), make it convenient to put here.
  209. ///
  210. /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
  211. /// the lifetime of the preprocessor.
  212. SelectorTable Selectors;
  213. /// Information about builtins.
  214. std::unique_ptr<Builtin::Context> BuiltinInfo;
  215. /// Tracks all of the pragmas that the client registered
  216. /// with this preprocessor.
  217. std::unique_ptr<PragmaNamespace> PragmaHandlers;
  218. /// Pragma handlers of the original source is stored here during the
  219. /// parsing of a model file.
  220. std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
  221. /// Tracks all of the comment handlers that the client registered
  222. /// with this preprocessor.
  223. std::vector<CommentHandler *> CommentHandlers;
  224. /// Empty line handler.
  225. EmptylineHandler *Emptyline = nullptr;
  226. /// True if we want to ignore EOF token and continue later on (thus
  227. /// avoid tearing the Lexer and etc. down).
  228. bool IncrementalProcessing = false;
  229. public:
  230. /// The kind of translation unit we are processing.
  231. const TranslationUnitKind TUKind;
  232. private:
  233. /// The code-completion handler.
  234. CodeCompletionHandler *CodeComplete = nullptr;
  235. /// The file that we're performing code-completion for, if any.
  236. const FileEntry *CodeCompletionFile = nullptr;
  237. /// The offset in file for the code-completion point.
  238. unsigned CodeCompletionOffset = 0;
  239. /// The location for the code-completion point. This gets instantiated
  240. /// when the CodeCompletionFile gets \#include'ed for preprocessing.
  241. SourceLocation CodeCompletionLoc;
  242. /// The start location for the file of the code-completion point.
  243. ///
  244. /// This gets instantiated when the CodeCompletionFile gets \#include'ed
  245. /// for preprocessing.
  246. SourceLocation CodeCompletionFileLoc;
  247. /// The source location of the \c import contextual keyword we just
  248. /// lexed, if any.
  249. SourceLocation ModuleImportLoc;
  250. /// The module import path that we're currently processing.
  251. SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
  252. /// Whether the last token we lexed was an '@'.
  253. bool LastTokenWasAt = false;
  254. /// A position within a C++20 import-seq.
  255. class ImportSeq {
  256. public:
  257. enum State : int {
  258. // Positive values represent a number of unclosed brackets.
  259. AtTopLevel = 0,
  260. AfterTopLevelTokenSeq = -1,
  261. AfterExport = -2,
  262. AfterImportSeq = -3,
  263. };
  264. ImportSeq(State S) : S(S) {}
  265. /// Saw any kind of open bracket.
  266. void handleOpenBracket() {
  267. S = static_cast<State>(std::max<int>(S, 0) + 1);
  268. }
  269. /// Saw any kind of close bracket other than '}'.
  270. void handleCloseBracket() {
  271. S = static_cast<State>(std::max<int>(S, 1) - 1);
  272. }
  273. /// Saw a close brace.
  274. void handleCloseBrace() {
  275. handleCloseBracket();
  276. if (S == AtTopLevel && !AfterHeaderName)
  277. S = AfterTopLevelTokenSeq;
  278. }
  279. /// Saw a semicolon.
  280. void handleSemi() {
  281. if (atTopLevel()) {
  282. S = AfterTopLevelTokenSeq;
  283. AfterHeaderName = false;
  284. }
  285. }
  286. /// Saw an 'export' identifier.
  287. void handleExport() {
  288. if (S == AfterTopLevelTokenSeq)
  289. S = AfterExport;
  290. else if (S <= 0)
  291. S = AtTopLevel;
  292. }
  293. /// Saw an 'import' identifier.
  294. void handleImport() {
  295. if (S == AfterTopLevelTokenSeq || S == AfterExport)
  296. S = AfterImportSeq;
  297. else if (S <= 0)
  298. S = AtTopLevel;
  299. }
  300. /// Saw a 'header-name' token; do not recognize any more 'import' tokens
  301. /// until we reach a top-level semicolon.
  302. void handleHeaderName() {
  303. if (S == AfterImportSeq)
  304. AfterHeaderName = true;
  305. handleMisc();
  306. }
  307. /// Saw any other token.
  308. void handleMisc() {
  309. if (S <= 0)
  310. S = AtTopLevel;
  311. }
  312. bool atTopLevel() { return S <= 0; }
  313. bool afterImportSeq() { return S == AfterImportSeq; }
  314. private:
  315. State S;
  316. /// Whether we're in the pp-import-suffix following the header-name in a
  317. /// pp-import. If so, a close-brace is not sufficient to end the
  318. /// top-level-token-seq of an import-seq.
  319. bool AfterHeaderName = false;
  320. };
  321. /// Our current position within a C++20 import-seq.
  322. ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
  323. /// Whether the module import expects an identifier next. Otherwise,
  324. /// it expects a '.' or ';'.
  325. bool ModuleImportExpectsIdentifier = false;
  326. /// The identifier and source location of the currently-active
  327. /// \#pragma clang arc_cf_code_audited begin.
  328. std::pair<IdentifierInfo *, SourceLocation> PragmaARCCFCodeAuditedInfo;
  329. /// The source location of the currently-active
  330. /// \#pragma clang assume_nonnull begin.
  331. SourceLocation PragmaAssumeNonNullLoc;
  332. /// True if we hit the code-completion point.
  333. bool CodeCompletionReached = false;
  334. /// The code completion token containing the information
  335. /// on the stem that is to be code completed.
  336. IdentifierInfo *CodeCompletionII = nullptr;
  337. /// Range for the code completion token.
  338. SourceRange CodeCompletionTokenRange;
  339. /// The directory that the main file should be considered to occupy,
  340. /// if it does not correspond to a real file (as happens when building a
  341. /// module).
  342. const DirectoryEntry *MainFileDir = nullptr;
  343. /// The number of bytes that we will initially skip when entering the
  344. /// main file, along with a flag that indicates whether skipping this number
  345. /// of bytes will place the lexer at the start of a line.
  346. ///
  347. /// This is used when loading a precompiled preamble.
  348. std::pair<int, bool> SkipMainFilePreamble;
  349. /// Whether we hit an error due to reaching max allowed include depth. Allows
  350. /// to avoid hitting the same error over and over again.
  351. bool HasReachedMaxIncludeDepth = false;
  352. /// The number of currently-active calls to Lex.
  353. ///
  354. /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
  355. /// require asking for multiple additional tokens. This counter makes it
  356. /// possible for Lex to detect whether it's producing a token for the end
  357. /// of phase 4 of translation or for some other situation.
  358. unsigned LexLevel = 0;
  359. /// The number of (LexLevel 0) preprocessor tokens.
  360. unsigned TokenCount = 0;
  361. /// Preprocess every token regardless of LexLevel.
  362. bool PreprocessToken = false;
  363. /// The maximum number of (LexLevel 0) tokens before issuing a -Wmax-tokens
  364. /// warning, or zero for unlimited.
  365. unsigned MaxTokens = 0;
  366. SourceLocation MaxTokensOverrideLoc;
  367. public:
  368. struct PreambleSkipInfo {
  369. SourceLocation HashTokenLoc;
  370. SourceLocation IfTokenLoc;
  371. bool FoundNonSkipPortion;
  372. bool FoundElse;
  373. SourceLocation ElseLoc;
  374. PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
  375. bool FoundNonSkipPortion, bool FoundElse,
  376. SourceLocation ElseLoc)
  377. : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
  378. FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
  379. ElseLoc(ElseLoc) {}
  380. };
  381. using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
  382. private:
  383. friend class ASTReader;
  384. friend class MacroArgs;
  385. class PreambleConditionalStackStore {
  386. enum State {
  387. Off = 0,
  388. Recording = 1,
  389. Replaying = 2,
  390. };
  391. public:
  392. PreambleConditionalStackStore() = default;
  393. void startRecording() { ConditionalStackState = Recording; }
  394. void startReplaying() { ConditionalStackState = Replaying; }
  395. bool isRecording() const { return ConditionalStackState == Recording; }
  396. bool isReplaying() const { return ConditionalStackState == Replaying; }
  397. ArrayRef<PPConditionalInfo> getStack() const {
  398. return ConditionalStack;
  399. }
  400. void doneReplaying() {
  401. ConditionalStack.clear();
  402. ConditionalStackState = Off;
  403. }
  404. void setStack(ArrayRef<PPConditionalInfo> s) {
  405. if (!isRecording() && !isReplaying())
  406. return;
  407. ConditionalStack.clear();
  408. ConditionalStack.append(s.begin(), s.end());
  409. }
  410. bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
  411. bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
  412. void clearSkipInfo() { SkipInfo.reset(); }
  413. llvm::Optional<PreambleSkipInfo> SkipInfo;
  414. private:
  415. SmallVector<PPConditionalInfo, 4> ConditionalStack;
  416. State ConditionalStackState = Off;
  417. } PreambleConditionalStack;
  418. /// The current top of the stack that we're lexing from if
  419. /// not expanding a macro and we are lexing directly from source code.
  420. ///
  421. /// Only one of CurLexer, or CurTokenLexer will be non-null.
  422. std::unique_ptr<Lexer> CurLexer;
  423. /// The current top of the stack what we're lexing from
  424. /// if not expanding a macro.
  425. ///
  426. /// This is an alias for CurLexer.
  427. PreprocessorLexer *CurPPLexer = nullptr;
  428. /// Used to find the current FileEntry, if CurLexer is non-null
  429. /// and if applicable.
  430. ///
  431. /// This allows us to implement \#include_next and find directory-specific
  432. /// properties.
  433. const DirectoryLookup *CurDirLookup = nullptr;
  434. /// The current macro we are expanding, if we are expanding a macro.
  435. ///
  436. /// One of CurLexer and CurTokenLexer must be null.
  437. std::unique_ptr<TokenLexer> CurTokenLexer;
  438. /// The kind of lexer we're currently working with.
  439. enum CurLexerKind {
  440. CLK_Lexer,
  441. CLK_TokenLexer,
  442. CLK_CachingLexer,
  443. CLK_LexAfterModuleImport
  444. } CurLexerKind = CLK_Lexer;
  445. /// If the current lexer is for a submodule that is being built, this
  446. /// is that submodule.
  447. Module *CurLexerSubmodule = nullptr;
  448. /// Keeps track of the stack of files currently
  449. /// \#included, and macros currently being expanded from, not counting
  450. /// CurLexer/CurTokenLexer.
  451. struct IncludeStackInfo {
  452. enum CurLexerKind CurLexerKind;
  453. Module *TheSubmodule;
  454. std::unique_ptr<Lexer> TheLexer;
  455. PreprocessorLexer *ThePPLexer;
  456. std::unique_ptr<TokenLexer> TheTokenLexer;
  457. const DirectoryLookup *TheDirLookup;
  458. // The following constructors are completely useless copies of the default
  459. // versions, only needed to pacify MSVC.
  460. IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
  461. std::unique_ptr<Lexer> &&TheLexer,
  462. PreprocessorLexer *ThePPLexer,
  463. std::unique_ptr<TokenLexer> &&TheTokenLexer,
  464. const DirectoryLookup *TheDirLookup)
  465. : CurLexerKind(std::move(CurLexerKind)),
  466. TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
  467. ThePPLexer(std::move(ThePPLexer)),
  468. TheTokenLexer(std::move(TheTokenLexer)),
  469. TheDirLookup(std::move(TheDirLookup)) {}
  470. };
  471. std::vector<IncludeStackInfo> IncludeMacroStack;
  472. /// Actions invoked when some preprocessor activity is
  473. /// encountered (e.g. a file is \#included, etc).
  474. std::unique_ptr<PPCallbacks> Callbacks;
  475. struct MacroExpandsInfo {
  476. Token Tok;
  477. MacroDefinition MD;
  478. SourceRange Range;
  479. MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
  480. : Tok(Tok), MD(MD), Range(Range) {}
  481. };
  482. SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
  483. /// Information about a name that has been used to define a module macro.
  484. struct ModuleMacroInfo {
  485. /// The most recent macro directive for this identifier.
  486. MacroDirective *MD;
  487. /// The active module macros for this identifier.
  488. llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
  489. /// The generation number at which we last updated ActiveModuleMacros.
  490. /// \see Preprocessor::VisibleModules.
  491. unsigned ActiveModuleMacrosGeneration = 0;
  492. /// Whether this macro name is ambiguous.
  493. bool IsAmbiguous = false;
  494. /// The module macros that are overridden by this macro.
  495. llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
  496. ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
  497. };
  498. /// The state of a macro for an identifier.
  499. class MacroState {
  500. mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
  501. ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
  502. const IdentifierInfo *II) const {
  503. if (II->isOutOfDate())
  504. PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
  505. // FIXME: Find a spare bit on IdentifierInfo and store a
  506. // HasModuleMacros flag.
  507. if (!II->hasMacroDefinition() ||
  508. (!PP.getLangOpts().Modules &&
  509. !PP.getLangOpts().ModulesLocalVisibility) ||
  510. !PP.CurSubmoduleState->VisibleModules.getGeneration())
  511. return nullptr;
  512. auto *Info = State.dyn_cast<ModuleMacroInfo*>();
  513. if (!Info) {
  514. Info = new (PP.getPreprocessorAllocator())
  515. ModuleMacroInfo(State.get<MacroDirective *>());
  516. State = Info;
  517. }
  518. if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
  519. Info->ActiveModuleMacrosGeneration)
  520. PP.updateModuleMacroInfo(II, *Info);
  521. return Info;
  522. }
  523. public:
  524. MacroState() : MacroState(nullptr) {}
  525. MacroState(MacroDirective *MD) : State(MD) {}
  526. MacroState(MacroState &&O) noexcept : State(O.State) {
  527. O.State = (MacroDirective *)nullptr;
  528. }
  529. MacroState &operator=(MacroState &&O) noexcept {
  530. auto S = O.State;
  531. O.State = (MacroDirective *)nullptr;
  532. State = S;
  533. return *this;
  534. }
  535. ~MacroState() {
  536. if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
  537. Info->~ModuleMacroInfo();
  538. }
  539. MacroDirective *getLatest() const {
  540. if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
  541. return Info->MD;
  542. return State.get<MacroDirective*>();
  543. }
  544. void setLatest(MacroDirective *MD) {
  545. if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
  546. Info->MD = MD;
  547. else
  548. State = MD;
  549. }
  550. bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
  551. auto *Info = getModuleInfo(PP, II);
  552. return Info ? Info->IsAmbiguous : false;
  553. }
  554. ArrayRef<ModuleMacro *>
  555. getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
  556. if (auto *Info = getModuleInfo(PP, II))
  557. return Info->ActiveModuleMacros;
  558. return None;
  559. }
  560. MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
  561. SourceManager &SourceMgr) const {
  562. // FIXME: Incorporate module macros into the result of this.
  563. if (auto *Latest = getLatest())
  564. return Latest->findDirectiveAtLoc(Loc, SourceMgr);
  565. return {};
  566. }
  567. void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
  568. if (auto *Info = getModuleInfo(PP, II)) {
  569. Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
  570. Info->ActiveModuleMacros.begin(),
  571. Info->ActiveModuleMacros.end());
  572. Info->ActiveModuleMacros.clear();
  573. Info->IsAmbiguous = false;
  574. }
  575. }
  576. ArrayRef<ModuleMacro*> getOverriddenMacros() const {
  577. if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
  578. return Info->OverriddenMacros;
  579. return None;
  580. }
  581. void setOverriddenMacros(Preprocessor &PP,
  582. ArrayRef<ModuleMacro *> Overrides) {
  583. auto *Info = State.dyn_cast<ModuleMacroInfo*>();
  584. if (!Info) {
  585. if (Overrides.empty())
  586. return;
  587. Info = new (PP.getPreprocessorAllocator())
  588. ModuleMacroInfo(State.get<MacroDirective *>());
  589. State = Info;
  590. }
  591. Info->OverriddenMacros.clear();
  592. Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
  593. Overrides.begin(), Overrides.end());
  594. Info->ActiveModuleMacrosGeneration = 0;
  595. }
  596. };
  597. /// For each IdentifierInfo that was associated with a macro, we
  598. /// keep a mapping to the history of all macro definitions and #undefs in
  599. /// the reverse order (the latest one is in the head of the list).
  600. ///
  601. /// This mapping lives within the \p CurSubmoduleState.
  602. using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
  603. struct SubmoduleState;
  604. /// Information about a submodule that we're currently building.
  605. struct BuildingSubmoduleInfo {
  606. /// The module that we are building.
  607. Module *M;
  608. /// The location at which the module was included.
  609. SourceLocation ImportLoc;
  610. /// Whether we entered this submodule via a pragma.
  611. bool IsPragma;
  612. /// The previous SubmoduleState.
  613. SubmoduleState *OuterSubmoduleState;
  614. /// The number of pending module macro names when we started building this.
  615. unsigned OuterPendingModuleMacroNames;
  616. BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
  617. SubmoduleState *OuterSubmoduleState,
  618. unsigned OuterPendingModuleMacroNames)
  619. : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
  620. OuterSubmoduleState(OuterSubmoduleState),
  621. OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
  622. };
  623. SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
  624. /// Information about a submodule's preprocessor state.
  625. struct SubmoduleState {
  626. /// The macros for the submodule.
  627. MacroMap Macros;
  628. /// The set of modules that are visible within the submodule.
  629. VisibleModuleSet VisibleModules;
  630. // FIXME: CounterValue?
  631. // FIXME: PragmaPushMacroInfo?
  632. };
  633. std::map<Module *, SubmoduleState> Submodules;
  634. /// The preprocessor state for preprocessing outside of any submodule.
  635. SubmoduleState NullSubmoduleState;
  636. /// The current submodule state. Will be \p NullSubmoduleState if we're not
  637. /// in a submodule.
  638. SubmoduleState *CurSubmoduleState;
  639. /// The files that have been included.
  640. IncludedFilesSet IncludedFiles;
  641. /// The set of known macros exported from modules.
  642. llvm::FoldingSet<ModuleMacro> ModuleMacros;
  643. /// The names of potential module macros that we've not yet processed.
  644. llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
  645. /// The list of module macros, for each identifier, that are not overridden by
  646. /// any other module macro.
  647. llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
  648. LeafModuleMacros;
  649. /// Macros that we want to warn because they are not used at the end
  650. /// of the translation unit.
  651. ///
  652. /// We store just their SourceLocations instead of
  653. /// something like MacroInfo*. The benefit of this is that when we are
  654. /// deserializing from PCH, we don't need to deserialize identifier & macros
  655. /// just so that we can report that they are unused, we just warn using
  656. /// the SourceLocations of this set (that will be filled by the ASTReader).
  657. using WarnUnusedMacroLocsTy = llvm::SmallDenseSet<SourceLocation, 32>;
  658. WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
  659. /// This is a pair of an optional message and source location used for pragmas
  660. /// that annotate macros like pragma clang restrict_expansion and pragma clang
  661. /// deprecated. This pair stores the optional message and the location of the
  662. /// annotation pragma for use producing diagnostics and notes.
  663. using MsgLocationPair = std::pair<std::string, SourceLocation>;
  664. struct MacroAnnotationInfo {
  665. SourceLocation Location;
  666. std::string Message;
  667. };
  668. struct MacroAnnotations {
  669. llvm::Optional<MacroAnnotationInfo> DeprecationInfo;
  670. llvm::Optional<MacroAnnotationInfo> RestrictExpansionInfo;
  671. llvm::Optional<SourceLocation> FinalAnnotationLoc;
  672. static MacroAnnotations makeDeprecation(SourceLocation Loc,
  673. std::string Msg) {
  674. return MacroAnnotations{MacroAnnotationInfo{Loc, std::move(Msg)},
  675. llvm::None, llvm::None};
  676. }
  677. static MacroAnnotations makeRestrictExpansion(SourceLocation Loc,
  678. std::string Msg) {
  679. return MacroAnnotations{
  680. llvm::None, MacroAnnotationInfo{Loc, std::move(Msg)}, llvm::None};
  681. }
  682. static MacroAnnotations makeFinal(SourceLocation Loc) {
  683. return MacroAnnotations{llvm::None, llvm::None, Loc};
  684. }
  685. };
  686. /// Warning information for macro annotations.
  687. llvm::DenseMap<const IdentifierInfo *, MacroAnnotations> AnnotationInfos;
  688. /// A "freelist" of MacroArg objects that can be
  689. /// reused for quick allocation.
  690. MacroArgs *MacroArgCache = nullptr;
  691. /// For each IdentifierInfo used in a \#pragma push_macro directive,
  692. /// we keep a MacroInfo stack used to restore the previous macro value.
  693. llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
  694. PragmaPushMacroInfo;
  695. // Various statistics we track for performance analysis.
  696. unsigned NumDirectives = 0;
  697. unsigned NumDefined = 0;
  698. unsigned NumUndefined = 0;
  699. unsigned NumPragma = 0;
  700. unsigned NumIf = 0;
  701. unsigned NumElse = 0;
  702. unsigned NumEndif = 0;
  703. unsigned NumEnteredSourceFiles = 0;
  704. unsigned MaxIncludeStackDepth = 0;
  705. unsigned NumMacroExpanded = 0;
  706. unsigned NumFnMacroExpanded = 0;
  707. unsigned NumBuiltinMacroExpanded = 0;
  708. unsigned NumFastMacroExpanded = 0;
  709. unsigned NumTokenPaste = 0;
  710. unsigned NumFastTokenPaste = 0;
  711. unsigned NumSkipped = 0;
  712. /// The predefined macros that preprocessor should use from the
  713. /// command line etc.
  714. std::string Predefines;
  715. /// The file ID for the preprocessor predefines.
  716. FileID PredefinesFileID;
  717. /// The file ID for the PCH through header.
  718. FileID PCHThroughHeaderFileID;
  719. /// Whether tokens are being skipped until a #pragma hdrstop is seen.
  720. bool SkippingUntilPragmaHdrStop = false;
  721. /// Whether tokens are being skipped until the through header is seen.
  722. bool SkippingUntilPCHThroughHeader = false;
  723. /// \{
  724. /// Cache of macro expanders to reduce malloc traffic.
  725. enum { TokenLexerCacheSize = 8 };
  726. unsigned NumCachedTokenLexers;
  727. std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
  728. /// \}
  729. /// Keeps macro expanded tokens for TokenLexers.
  730. //
  731. /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
  732. /// going to lex in the cache and when it finishes the tokens are removed
  733. /// from the end of the cache.
  734. SmallVector<Token, 16> MacroExpandedTokens;
  735. std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
  736. /// A record of the macro definitions and expansions that
  737. /// occurred during preprocessing.
  738. ///
  739. /// This is an optional side structure that can be enabled with
  740. /// \c createPreprocessingRecord() prior to preprocessing.
  741. PreprocessingRecord *Record = nullptr;
  742. /// Cached tokens state.
  743. using CachedTokensTy = SmallVector<Token, 1>;
  744. /// Cached tokens are stored here when we do backtracking or
  745. /// lookahead. They are "lexed" by the CachingLex() method.
  746. CachedTokensTy CachedTokens;
  747. /// The position of the cached token that CachingLex() should
  748. /// "lex" next.
  749. ///
  750. /// If it points beyond the CachedTokens vector, it means that a normal
  751. /// Lex() should be invoked.
  752. CachedTokensTy::size_type CachedLexPos = 0;
  753. /// Stack of backtrack positions, allowing nested backtracks.
  754. ///
  755. /// The EnableBacktrackAtThisPos() method pushes a position to
  756. /// indicate where CachedLexPos should be set when the BackTrack() method is
  757. /// invoked (at which point the last position is popped).
  758. std::vector<CachedTokensTy::size_type> BacktrackPositions;
  759. struct MacroInfoChain {
  760. MacroInfo MI;
  761. MacroInfoChain *Next;
  762. };
  763. /// MacroInfos are managed as a chain for easy disposal. This is the head
  764. /// of that list.
  765. MacroInfoChain *MIChainHead = nullptr;
  766. void updateOutOfDateIdentifier(IdentifierInfo &II) const;
  767. public:
  768. Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
  769. DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
  770. HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
  771. IdentifierInfoLookup *IILookup = nullptr,
  772. bool OwnsHeaderSearch = false,
  773. TranslationUnitKind TUKind = TU_Complete);
  774. ~Preprocessor();
  775. /// Initialize the preprocessor using information about the target.
  776. ///
  777. /// \param Target is owned by the caller and must remain valid for the
  778. /// lifetime of the preprocessor.
  779. /// \param AuxTarget is owned by the caller and must remain valid for
  780. /// the lifetime of the preprocessor.
  781. void Initialize(const TargetInfo &Target,
  782. const TargetInfo *AuxTarget = nullptr);
  783. /// Initialize the preprocessor to parse a model file
  784. ///
  785. /// To parse model files the preprocessor of the original source is reused to
  786. /// preserver the identifier table. However to avoid some duplicate
  787. /// information in the preprocessor some cleanup is needed before it is used
  788. /// to parse model files. This method does that cleanup.
  789. void InitializeForModelFile();
  790. /// Cleanup after model file parsing
  791. void FinalizeForModelFile();
  792. /// Retrieve the preprocessor options used to initialize this
  793. /// preprocessor.
  794. PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
  795. DiagnosticsEngine &getDiagnostics() const { return *Diags; }
  796. void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
  797. const LangOptions &getLangOpts() const { return LangOpts; }
  798. const TargetInfo &getTargetInfo() const { return *Target; }
  799. const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
  800. FileManager &getFileManager() const { return FileMgr; }
  801. SourceManager &getSourceManager() const { return SourceMgr; }
  802. HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
  803. IdentifierTable &getIdentifierTable() { return Identifiers; }
  804. const IdentifierTable &getIdentifierTable() const { return Identifiers; }
  805. SelectorTable &getSelectorTable() { return Selectors; }
  806. Builtin::Context &getBuiltinInfo() { return *BuiltinInfo; }
  807. llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
  808. void setExternalSource(ExternalPreprocessorSource *Source) {
  809. ExternalSource = Source;
  810. }
  811. ExternalPreprocessorSource *getExternalSource() const {
  812. return ExternalSource;
  813. }
  814. /// Retrieve the module loader associated with this preprocessor.
  815. ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
  816. bool hadModuleLoaderFatalFailure() const {
  817. return TheModuleLoader.HadFatalFailure;
  818. }
  819. /// Retrieve the number of Directives that have been processed by the
  820. /// Preprocessor.
  821. unsigned getNumDirectives() const {
  822. return NumDirectives;
  823. }
  824. /// True if we are currently preprocessing a #if or #elif directive
  825. bool isParsingIfOrElifDirective() const {
  826. return ParsingIfOrElifDirective;
  827. }
  828. /// Control whether the preprocessor retains comments in output.
  829. void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
  830. this->KeepComments = KeepComments | KeepMacroComments;
  831. this->KeepMacroComments = KeepMacroComments;
  832. }
  833. bool getCommentRetentionState() const { return KeepComments; }
  834. void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
  835. bool getPragmasEnabled() const { return PragmasEnabled; }
  836. void SetSuppressIncludeNotFoundError(bool Suppress) {
  837. SuppressIncludeNotFoundError = Suppress;
  838. }
  839. bool GetSuppressIncludeNotFoundError() {
  840. return SuppressIncludeNotFoundError;
  841. }
  842. /// Sets whether the preprocessor is responsible for producing output or if
  843. /// it is producing tokens to be consumed by Parse and Sema.
  844. void setPreprocessedOutput(bool IsPreprocessedOutput) {
  845. PreprocessedOutput = IsPreprocessedOutput;
  846. }
  847. /// Returns true if the preprocessor is responsible for generating output,
  848. /// false if it is producing tokens to be consumed by Parse and Sema.
  849. bool isPreprocessedOutput() const { return PreprocessedOutput; }
  850. /// Return true if we are lexing directly from the specified lexer.
  851. bool isCurrentLexer(const PreprocessorLexer *L) const {
  852. return CurPPLexer == L;
  853. }
  854. /// Return the current lexer being lexed from.
  855. ///
  856. /// Note that this ignores any potentially active macro expansions and _Pragma
  857. /// expansions going on at the time.
  858. PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
  859. /// Return the current file lexer being lexed from.
  860. ///
  861. /// Note that this ignores any potentially active macro expansions and _Pragma
  862. /// expansions going on at the time.
  863. PreprocessorLexer *getCurrentFileLexer() const;
  864. /// Return the submodule owning the file being lexed. This may not be
  865. /// the current module if we have changed modules since entering the file.
  866. Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
  867. /// Returns the FileID for the preprocessor predefines.
  868. FileID getPredefinesFileID() const { return PredefinesFileID; }
  869. /// \{
  870. /// Accessors for preprocessor callbacks.
  871. ///
  872. /// Note that this class takes ownership of any PPCallbacks object given to
  873. /// it.
  874. PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
  875. void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
  876. if (Callbacks)
  877. C = std::make_unique<PPChainedCallbacks>(std::move(C),
  878. std::move(Callbacks));
  879. Callbacks = std::move(C);
  880. }
  881. /// \}
  882. /// Get the number of tokens processed so far.
  883. unsigned getTokenCount() const { return TokenCount; }
  884. /// Get the max number of tokens before issuing a -Wmax-tokens warning.
  885. unsigned getMaxTokens() const { return MaxTokens; }
  886. void overrideMaxTokens(unsigned Value, SourceLocation Loc) {
  887. MaxTokens = Value;
  888. MaxTokensOverrideLoc = Loc;
  889. };
  890. SourceLocation getMaxTokensOverrideLoc() const { return MaxTokensOverrideLoc; }
  891. /// Register a function that would be called on each token in the final
  892. /// expanded token stream.
  893. /// This also reports annotation tokens produced by the parser.
  894. void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
  895. OnToken = std::move(F);
  896. }
  897. void setPreprocessToken(bool Preprocess) { PreprocessToken = Preprocess; }
  898. bool isMacroDefined(StringRef Id) {
  899. return isMacroDefined(&Identifiers.get(Id));
  900. }
  901. bool isMacroDefined(const IdentifierInfo *II) {
  902. return II->hasMacroDefinition() &&
  903. (!getLangOpts().Modules || (bool)getMacroDefinition(II));
  904. }
  905. /// Determine whether II is defined as a macro within the module M,
  906. /// if that is a module that we've already preprocessed. Does not check for
  907. /// macros imported into M.
  908. bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
  909. if (!II->hasMacroDefinition())
  910. return false;
  911. auto I = Submodules.find(M);
  912. if (I == Submodules.end())
  913. return false;
  914. auto J = I->second.Macros.find(II);
  915. if (J == I->second.Macros.end())
  916. return false;
  917. auto *MD = J->second.getLatest();
  918. return MD && MD->isDefined();
  919. }
  920. MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
  921. if (!II->hasMacroDefinition())
  922. return {};
  923. MacroState &S = CurSubmoduleState->Macros[II];
  924. auto *MD = S.getLatest();
  925. while (MD && isa<VisibilityMacroDirective>(MD))
  926. MD = MD->getPrevious();
  927. return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
  928. S.getActiveModuleMacros(*this, II),
  929. S.isAmbiguous(*this, II));
  930. }
  931. MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
  932. SourceLocation Loc) {
  933. if (!II->hadMacroDefinition())
  934. return {};
  935. MacroState &S = CurSubmoduleState->Macros[II];
  936. MacroDirective::DefInfo DI;
  937. if (auto *MD = S.getLatest())
  938. DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
  939. // FIXME: Compute the set of active module macros at the specified location.
  940. return MacroDefinition(DI.getDirective(),
  941. S.getActiveModuleMacros(*this, II),
  942. S.isAmbiguous(*this, II));
  943. }
  944. /// Given an identifier, return its latest non-imported MacroDirective
  945. /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
  946. MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
  947. if (!II->hasMacroDefinition())
  948. return nullptr;
  949. auto *MD = getLocalMacroDirectiveHistory(II);
  950. if (!MD || MD->getDefinition().isUndefined())
  951. return nullptr;
  952. return MD;
  953. }
  954. const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
  955. return const_cast<Preprocessor*>(this)->getMacroInfo(II);
  956. }
  957. MacroInfo *getMacroInfo(const IdentifierInfo *II) {
  958. if (!II->hasMacroDefinition())
  959. return nullptr;
  960. if (auto MD = getMacroDefinition(II))
  961. return MD.getMacroInfo();
  962. return nullptr;
  963. }
  964. /// Given an identifier, return the latest non-imported macro
  965. /// directive for that identifier.
  966. ///
  967. /// One can iterate over all previous macro directives from the most recent
  968. /// one.
  969. MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
  970. /// Add a directive to the macro directive history for this identifier.
  971. void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
  972. DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
  973. SourceLocation Loc) {
  974. DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
  975. appendMacroDirective(II, MD);
  976. return MD;
  977. }
  978. DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
  979. MacroInfo *MI) {
  980. return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
  981. }
  982. /// Set a MacroDirective that was loaded from a PCH file.
  983. void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
  984. MacroDirective *MD);
  985. /// Register an exported macro for a module and identifier.
  986. ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
  987. ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
  988. ModuleMacro *getModuleMacro(Module *Mod, const IdentifierInfo *II);
  989. /// Get the list of leaf (non-overridden) module macros for a name.
  990. ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
  991. if (II->isOutOfDate())
  992. updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
  993. auto I = LeafModuleMacros.find(II);
  994. if (I != LeafModuleMacros.end())
  995. return I->second;
  996. return None;
  997. }
  998. /// Get the list of submodules that we're currently building.
  999. ArrayRef<BuildingSubmoduleInfo> getBuildingSubmodules() const {
  1000. return BuildingSubmoduleStack;
  1001. }
  1002. /// \{
  1003. /// Iterators for the macro history table. Currently defined macros have
  1004. /// IdentifierInfo::hasMacroDefinition() set and an empty
  1005. /// MacroInfo::getUndefLoc() at the head of the list.
  1006. using macro_iterator = MacroMap::const_iterator;
  1007. macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
  1008. macro_iterator macro_end(bool IncludeExternalMacros = true) const;
  1009. llvm::iterator_range<macro_iterator>
  1010. macros(bool IncludeExternalMacros = true) const {
  1011. macro_iterator begin = macro_begin(IncludeExternalMacros);
  1012. macro_iterator end = macro_end(IncludeExternalMacros);
  1013. return llvm::make_range(begin, end);
  1014. }
  1015. /// \}
  1016. /// Mark the file as included.
  1017. /// Returns true if this is the first time the file was included.
  1018. bool markIncluded(const FileEntry *File) {
  1019. HeaderInfo.getFileInfo(File);
  1020. return IncludedFiles.insert(File).second;
  1021. }
  1022. /// Return true if this header has already been included.
  1023. bool alreadyIncluded(const FileEntry *File) const {
  1024. return IncludedFiles.count(File);
  1025. }
  1026. /// Get the set of included files.
  1027. IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
  1028. const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
  1029. /// Return the name of the macro defined before \p Loc that has
  1030. /// spelling \p Tokens. If there are multiple macros with same spelling,
  1031. /// return the last one defined.
  1032. StringRef getLastMacroWithSpelling(SourceLocation Loc,
  1033. ArrayRef<TokenValue> Tokens) const;
  1034. const std::string &getPredefines() const { return Predefines; }
  1035. /// Set the predefines for this Preprocessor.
  1036. ///
  1037. /// These predefines are automatically injected when parsing the main file.
  1038. void setPredefines(const char *P) { Predefines = P; }
  1039. void setPredefines(StringRef P) { Predefines = std::string(P); }
  1040. /// Return information about the specified preprocessor
  1041. /// identifier token.
  1042. IdentifierInfo *getIdentifierInfo(StringRef Name) const {
  1043. return &Identifiers.get(Name);
  1044. }
  1045. /// Add the specified pragma handler to this preprocessor.
  1046. ///
  1047. /// If \p Namespace is non-null, then it is a token required to exist on the
  1048. /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
  1049. void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
  1050. void AddPragmaHandler(PragmaHandler *Handler) {
  1051. AddPragmaHandler(StringRef(), Handler);
  1052. }
  1053. /// Remove the specific pragma handler from this preprocessor.
  1054. ///
  1055. /// If \p Namespace is non-null, then it should be the namespace that
  1056. /// \p Handler was added to. It is an error to remove a handler that
  1057. /// has not been registered.
  1058. void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
  1059. void RemovePragmaHandler(PragmaHandler *Handler) {
  1060. RemovePragmaHandler(StringRef(), Handler);
  1061. }
  1062. /// Install empty handlers for all pragmas (making them ignored).
  1063. void IgnorePragmas();
  1064. /// Set empty line handler.
  1065. void setEmptylineHandler(EmptylineHandler *Handler) { Emptyline = Handler; }
  1066. EmptylineHandler *getEmptylineHandler() const { return Emptyline; }
  1067. /// Add the specified comment handler to the preprocessor.
  1068. void addCommentHandler(CommentHandler *Handler);
  1069. /// Remove the specified comment handler.
  1070. ///
  1071. /// It is an error to remove a handler that has not been registered.
  1072. void removeCommentHandler(CommentHandler *Handler);
  1073. /// Set the code completion handler to the given object.
  1074. void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
  1075. CodeComplete = &Handler;
  1076. }
  1077. /// Retrieve the current code-completion handler.
  1078. CodeCompletionHandler *getCodeCompletionHandler() const {
  1079. return CodeComplete;
  1080. }
  1081. /// Clear out the code completion handler.
  1082. void clearCodeCompletionHandler() {
  1083. CodeComplete = nullptr;
  1084. }
  1085. /// Hook used by the lexer to invoke the "included file" code
  1086. /// completion point.
  1087. void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
  1088. /// Hook used by the lexer to invoke the "natural language" code
  1089. /// completion point.
  1090. void CodeCompleteNaturalLanguage();
  1091. /// Set the code completion token for filtering purposes.
  1092. void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
  1093. CodeCompletionII = Filter;
  1094. }
  1095. /// Set the code completion token range for detecting replacement range later
  1096. /// on.
  1097. void setCodeCompletionTokenRange(const SourceLocation Start,
  1098. const SourceLocation End) {
  1099. CodeCompletionTokenRange = {Start, End};
  1100. }
  1101. SourceRange getCodeCompletionTokenRange() const {
  1102. return CodeCompletionTokenRange;
  1103. }
  1104. /// Get the code completion token for filtering purposes.
  1105. StringRef getCodeCompletionFilter() {
  1106. if (CodeCompletionII)
  1107. return CodeCompletionII->getName();
  1108. return {};
  1109. }
  1110. /// Retrieve the preprocessing record, or NULL if there is no
  1111. /// preprocessing record.
  1112. PreprocessingRecord *getPreprocessingRecord() const { return Record; }
  1113. /// Create a new preprocessing record, which will keep track of
  1114. /// all macro expansions, macro definitions, etc.
  1115. void createPreprocessingRecord();
  1116. /// Returns true if the FileEntry is the PCH through header.
  1117. bool isPCHThroughHeader(const FileEntry *FE);
  1118. /// True if creating a PCH with a through header.
  1119. bool creatingPCHWithThroughHeader();
  1120. /// True if using a PCH with a through header.
  1121. bool usingPCHWithThroughHeader();
  1122. /// True if creating a PCH with a #pragma hdrstop.
  1123. bool creatingPCHWithPragmaHdrStop();
  1124. /// True if using a PCH with a #pragma hdrstop.
  1125. bool usingPCHWithPragmaHdrStop();
  1126. /// Skip tokens until after the #include of the through header or
  1127. /// until after a #pragma hdrstop.
  1128. void SkipTokensWhileUsingPCH();
  1129. /// Process directives while skipping until the through header or
  1130. /// #pragma hdrstop is found.
  1131. void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
  1132. SourceLocation HashLoc);
  1133. /// Enter the specified FileID as the main source file,
  1134. /// which implicitly adds the builtin defines etc.
  1135. void EnterMainSourceFile();
  1136. /// Inform the preprocessor callbacks that processing is complete.
  1137. void EndSourceFile();
  1138. /// Add a source file to the top of the include stack and
  1139. /// start lexing tokens from it instead of the current buffer.
  1140. ///
  1141. /// Emits a diagnostic, doesn't enter the file, and returns true on error.
  1142. bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
  1143. SourceLocation Loc, bool IsFirstIncludeOfFile = true);
  1144. /// Add a Macro to the top of the include stack and start lexing
  1145. /// tokens from it instead of the current buffer.
  1146. ///
  1147. /// \param Args specifies the tokens input to a function-like macro.
  1148. /// \param ILEnd specifies the location of the ')' for a function-like macro
  1149. /// or the identifier for an object-like macro.
  1150. void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
  1151. MacroArgs *Args);
  1152. private:
  1153. /// Add a "macro" context to the top of the include stack,
  1154. /// which will cause the lexer to start returning the specified tokens.
  1155. ///
  1156. /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
  1157. /// will not be subject to further macro expansion. Otherwise, these tokens
  1158. /// will be re-macro-expanded when/if expansion is enabled.
  1159. ///
  1160. /// If \p OwnsTokens is false, this method assumes that the specified stream
  1161. /// of tokens has a permanent owner somewhere, so they do not need to be
  1162. /// copied. If it is true, it assumes the array of tokens is allocated with
  1163. /// \c new[] and the Preprocessor will delete[] it.
  1164. ///
  1165. /// If \p IsReinject the resulting tokens will have Token::IsReinjected flag
  1166. /// set, see the flag documentation for details.
  1167. void EnterTokenStream(const Token *Toks, unsigned NumToks,
  1168. bool DisableMacroExpansion, bool OwnsTokens,
  1169. bool IsReinject);
  1170. public:
  1171. void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
  1172. bool DisableMacroExpansion, bool IsReinject) {
  1173. EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true,
  1174. IsReinject);
  1175. }
  1176. void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion,
  1177. bool IsReinject) {
  1178. EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false,
  1179. IsReinject);
  1180. }
  1181. /// Pop the current lexer/macro exp off the top of the lexer stack.
  1182. ///
  1183. /// This should only be used in situations where the current state of the
  1184. /// top-of-stack lexer is known.
  1185. void RemoveTopOfLexerStack();
  1186. /// From the point that this method is called, and until
  1187. /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
  1188. /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
  1189. /// make the Preprocessor re-lex the same tokens.
  1190. ///
  1191. /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
  1192. /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
  1193. /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
  1194. ///
  1195. /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
  1196. /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
  1197. /// tokens will continue indefinitely.
  1198. ///
  1199. void EnableBacktrackAtThisPos();
  1200. /// Disable the last EnableBacktrackAtThisPos call.
  1201. void CommitBacktrackedTokens();
  1202. /// Make Preprocessor re-lex the tokens that were lexed since
  1203. /// EnableBacktrackAtThisPos() was previously called.
  1204. void Backtrack();
  1205. /// True if EnableBacktrackAtThisPos() was called and
  1206. /// caching of tokens is on.
  1207. bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
  1208. /// Lex the next token for this preprocessor.
  1209. void Lex(Token &Result);
  1210. /// Lex a token, forming a header-name token if possible.
  1211. bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true);
  1212. bool LexAfterModuleImport(Token &Result);
  1213. void CollectPpImportSuffix(SmallVectorImpl<Token> &Toks);
  1214. void makeModuleVisible(Module *M, SourceLocation Loc);
  1215. SourceLocation getModuleImportLoc(Module *M) const {
  1216. return CurSubmoduleState->VisibleModules.getImportLoc(M);
  1217. }
  1218. /// Lex a string literal, which may be the concatenation of multiple
  1219. /// string literals and may even come from macro expansion.
  1220. /// \returns true on success, false if a error diagnostic has been generated.
  1221. bool LexStringLiteral(Token &Result, std::string &String,
  1222. const char *DiagnosticTag, bool AllowMacroExpansion) {
  1223. if (AllowMacroExpansion)
  1224. Lex(Result);
  1225. else
  1226. LexUnexpandedToken(Result);
  1227. return FinishLexStringLiteral(Result, String, DiagnosticTag,
  1228. AllowMacroExpansion);
  1229. }
  1230. /// Complete the lexing of a string literal where the first token has
  1231. /// already been lexed (see LexStringLiteral).
  1232. bool FinishLexStringLiteral(Token &Result, std::string &String,
  1233. const char *DiagnosticTag,
  1234. bool AllowMacroExpansion);
  1235. /// Lex a token. If it's a comment, keep lexing until we get
  1236. /// something not a comment.
  1237. ///
  1238. /// This is useful in -E -C mode where comments would foul up preprocessor
  1239. /// directive handling.
  1240. void LexNonComment(Token &Result) {
  1241. do
  1242. Lex(Result);
  1243. while (Result.getKind() == tok::comment);
  1244. }
  1245. /// Just like Lex, but disables macro expansion of identifier tokens.
  1246. void LexUnexpandedToken(Token &Result) {
  1247. // Disable macro expansion.
  1248. bool OldVal = DisableMacroExpansion;
  1249. DisableMacroExpansion = true;
  1250. // Lex the token.
  1251. Lex(Result);
  1252. // Reenable it.
  1253. DisableMacroExpansion = OldVal;
  1254. }
  1255. /// Like LexNonComment, but this disables macro expansion of
  1256. /// identifier tokens.
  1257. void LexUnexpandedNonComment(Token &Result) {
  1258. do
  1259. LexUnexpandedToken(Result);
  1260. while (Result.getKind() == tok::comment);
  1261. }
  1262. /// Parses a simple integer literal to get its numeric value. Floating
  1263. /// point literals and user defined literals are rejected. Used primarily to
  1264. /// handle pragmas that accept integer arguments.
  1265. bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
  1266. /// Disables macro expansion everywhere except for preprocessor directives.
  1267. void SetMacroExpansionOnlyInDirectives() {
  1268. DisableMacroExpansion = true;
  1269. MacroExpansionInDirectivesOverride = true;
  1270. }
  1271. /// Peeks ahead N tokens and returns that token without consuming any
  1272. /// tokens.
  1273. ///
  1274. /// LookAhead(0) returns the next token that would be returned by Lex(),
  1275. /// LookAhead(1) returns the token after it, etc. This returns normal
  1276. /// tokens after phase 5. As such, it is equivalent to using
  1277. /// 'Lex', not 'LexUnexpandedToken'.
  1278. const Token &LookAhead(unsigned N) {
  1279. assert(LexLevel == 0 && "cannot use lookahead while lexing");
  1280. if (CachedLexPos + N < CachedTokens.size())
  1281. return CachedTokens[CachedLexPos+N];
  1282. else
  1283. return PeekAhead(N+1);
  1284. }
  1285. /// When backtracking is enabled and tokens are cached,
  1286. /// this allows to revert a specific number of tokens.
  1287. ///
  1288. /// Note that the number of tokens being reverted should be up to the last
  1289. /// backtrack position, not more.
  1290. void RevertCachedTokens(unsigned N) {
  1291. assert(isBacktrackEnabled() &&
  1292. "Should only be called when tokens are cached for backtracking");
  1293. assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
  1294. && "Should revert tokens up to the last backtrack position, not more");
  1295. assert(signed(CachedLexPos) - signed(N) >= 0 &&
  1296. "Corrupted backtrack positions ?");
  1297. CachedLexPos -= N;
  1298. }
  1299. /// Enters a token in the token stream to be lexed next.
  1300. ///
  1301. /// If BackTrack() is called afterwards, the token will remain at the
  1302. /// insertion point.
  1303. /// If \p IsReinject is true, resulting token will have Token::IsReinjected
  1304. /// flag set. See the flag documentation for details.
  1305. void EnterToken(const Token &Tok, bool IsReinject) {
  1306. if (LexLevel) {
  1307. // It's not correct in general to enter caching lex mode while in the
  1308. // middle of a nested lexing action.
  1309. auto TokCopy = std::make_unique<Token[]>(1);
  1310. TokCopy[0] = Tok;
  1311. EnterTokenStream(std::move(TokCopy), 1, true, IsReinject);
  1312. } else {
  1313. EnterCachingLexMode();
  1314. assert(IsReinject && "new tokens in the middle of cached stream");
  1315. CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
  1316. }
  1317. }
  1318. /// We notify the Preprocessor that if it is caching tokens (because
  1319. /// backtrack is enabled) it should replace the most recent cached tokens
  1320. /// with the given annotation token. This function has no effect if
  1321. /// backtracking is not enabled.
  1322. ///
  1323. /// Note that the use of this function is just for optimization, so that the
  1324. /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
  1325. /// invoked.
  1326. void AnnotateCachedTokens(const Token &Tok) {
  1327. assert(Tok.isAnnotation() && "Expected annotation token");
  1328. if (CachedLexPos != 0 && isBacktrackEnabled())
  1329. AnnotatePreviousCachedTokens(Tok);
  1330. }
  1331. /// Get the location of the last cached token, suitable for setting the end
  1332. /// location of an annotation token.
  1333. SourceLocation getLastCachedTokenLocation() const {
  1334. assert(CachedLexPos != 0);
  1335. return CachedTokens[CachedLexPos-1].getLastLoc();
  1336. }
  1337. /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
  1338. /// CachedTokens.
  1339. bool IsPreviousCachedToken(const Token &Tok) const;
  1340. /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
  1341. /// in \p NewToks.
  1342. ///
  1343. /// Useful when a token needs to be split in smaller ones and CachedTokens
  1344. /// most recent token must to be updated to reflect that.
  1345. void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
  1346. /// Replace the last token with an annotation token.
  1347. ///
  1348. /// Like AnnotateCachedTokens(), this routine replaces an
  1349. /// already-parsed (and resolved) token with an annotation
  1350. /// token. However, this routine only replaces the last token with
  1351. /// the annotation token; it does not affect any other cached
  1352. /// tokens. This function has no effect if backtracking is not
  1353. /// enabled.
  1354. void ReplaceLastTokenWithAnnotation(const Token &Tok) {
  1355. assert(Tok.isAnnotation() && "Expected annotation token");
  1356. if (CachedLexPos != 0 && isBacktrackEnabled())
  1357. CachedTokens[CachedLexPos-1] = Tok;
  1358. }
  1359. /// Enter an annotation token into the token stream.
  1360. void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
  1361. void *AnnotationVal);
  1362. /// Determine whether it's possible for a future call to Lex to produce an
  1363. /// annotation token created by a previous call to EnterAnnotationToken.
  1364. bool mightHavePendingAnnotationTokens() {
  1365. return CurLexerKind != CLK_Lexer;
  1366. }
  1367. /// Update the current token to represent the provided
  1368. /// identifier, in order to cache an action performed by typo correction.
  1369. void TypoCorrectToken(const Token &Tok) {
  1370. assert(Tok.getIdentifierInfo() && "Expected identifier token");
  1371. if (CachedLexPos != 0 && isBacktrackEnabled())
  1372. CachedTokens[CachedLexPos-1] = Tok;
  1373. }
  1374. /// Recompute the current lexer kind based on the CurLexer/
  1375. /// CurTokenLexer pointers.
  1376. void recomputeCurLexerKind();
  1377. /// Returns true if incremental processing is enabled
  1378. bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
  1379. /// Enables the incremental processing
  1380. void enableIncrementalProcessing(bool value = true) {
  1381. IncrementalProcessing = value;
  1382. }
  1383. /// Specify the point at which code-completion will be performed.
  1384. ///
  1385. /// \param File the file in which code completion should occur. If
  1386. /// this file is included multiple times, code-completion will
  1387. /// perform completion the first time it is included. If NULL, this
  1388. /// function clears out the code-completion point.
  1389. ///
  1390. /// \param Line the line at which code completion should occur
  1391. /// (1-based).
  1392. ///
  1393. /// \param Column the column at which code completion should occur
  1394. /// (1-based).
  1395. ///
  1396. /// \returns true if an error occurred, false otherwise.
  1397. bool SetCodeCompletionPoint(const FileEntry *File,
  1398. unsigned Line, unsigned Column);
  1399. /// Determine if we are performing code completion.
  1400. bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
  1401. /// Returns the location of the code-completion point.
  1402. ///
  1403. /// Returns an invalid location if code-completion is not enabled or the file
  1404. /// containing the code-completion point has not been lexed yet.
  1405. SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
  1406. /// Returns the start location of the file of code-completion point.
  1407. ///
  1408. /// Returns an invalid location if code-completion is not enabled or the file
  1409. /// containing the code-completion point has not been lexed yet.
  1410. SourceLocation getCodeCompletionFileLoc() const {
  1411. return CodeCompletionFileLoc;
  1412. }
  1413. /// Returns true if code-completion is enabled and we have hit the
  1414. /// code-completion point.
  1415. bool isCodeCompletionReached() const { return CodeCompletionReached; }
  1416. /// Note that we hit the code-completion point.
  1417. void setCodeCompletionReached() {
  1418. assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
  1419. CodeCompletionReached = true;
  1420. // Silence any diagnostics that occur after we hit the code-completion.
  1421. getDiagnostics().setSuppressAllDiagnostics(true);
  1422. }
  1423. /// The location of the currently-active \#pragma clang
  1424. /// arc_cf_code_audited begin.
  1425. ///
  1426. /// Returns an invalid location if there is no such pragma active.
  1427. std::pair<IdentifierInfo *, SourceLocation>
  1428. getPragmaARCCFCodeAuditedInfo() const {
  1429. return PragmaARCCFCodeAuditedInfo;
  1430. }
  1431. /// Set the location of the currently-active \#pragma clang
  1432. /// arc_cf_code_audited begin. An invalid location ends the pragma.
  1433. void setPragmaARCCFCodeAuditedInfo(IdentifierInfo *Ident,
  1434. SourceLocation Loc) {
  1435. PragmaARCCFCodeAuditedInfo = {Ident, Loc};
  1436. }
  1437. /// The location of the currently-active \#pragma clang
  1438. /// assume_nonnull begin.
  1439. ///
  1440. /// Returns an invalid location if there is no such pragma active.
  1441. SourceLocation getPragmaAssumeNonNullLoc() const {
  1442. return PragmaAssumeNonNullLoc;
  1443. }
  1444. /// Set the location of the currently-active \#pragma clang
  1445. /// assume_nonnull begin. An invalid location ends the pragma.
  1446. void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
  1447. PragmaAssumeNonNullLoc = Loc;
  1448. }
  1449. /// Set the directory in which the main file should be considered
  1450. /// to have been found, if it is not a real file.
  1451. void setMainFileDir(const DirectoryEntry *Dir) {
  1452. MainFileDir = Dir;
  1453. }
  1454. /// Instruct the preprocessor to skip part of the main source file.
  1455. ///
  1456. /// \param Bytes The number of bytes in the preamble to skip.
  1457. ///
  1458. /// \param StartOfLine Whether skipping these bytes puts the lexer at the
  1459. /// start of a line.
  1460. void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
  1461. SkipMainFilePreamble.first = Bytes;
  1462. SkipMainFilePreamble.second = StartOfLine;
  1463. }
  1464. /// Forwarding function for diagnostics. This emits a diagnostic at
  1465. /// the specified Token's location, translating the token's start
  1466. /// position in the current buffer into a SourcePosition object for rendering.
  1467. DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
  1468. return Diags->Report(Loc, DiagID);
  1469. }
  1470. DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
  1471. return Diags->Report(Tok.getLocation(), DiagID);
  1472. }
  1473. /// Return the 'spelling' of the token at the given
  1474. /// location; does not go up to the spelling location or down to the
  1475. /// expansion location.
  1476. ///
  1477. /// \param buffer A buffer which will be used only if the token requires
  1478. /// "cleaning", e.g. if it contains trigraphs or escaped newlines
  1479. /// \param invalid If non-null, will be set \c true if an error occurs.
  1480. StringRef getSpelling(SourceLocation loc,
  1481. SmallVectorImpl<char> &buffer,
  1482. bool *invalid = nullptr) const {
  1483. return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
  1484. }
  1485. /// Return the 'spelling' of the Tok token.
  1486. ///
  1487. /// The spelling of a token is the characters used to represent the token in
  1488. /// the source file after trigraph expansion and escaped-newline folding. In
  1489. /// particular, this wants to get the true, uncanonicalized, spelling of
  1490. /// things like digraphs, UCNs, etc.
  1491. ///
  1492. /// \param Invalid If non-null, will be set \c true if an error occurs.
  1493. std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
  1494. return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
  1495. }
  1496. /// Get the spelling of a token into a preallocated buffer, instead
  1497. /// of as an std::string.
  1498. ///
  1499. /// The caller is required to allocate enough space for the token, which is
  1500. /// guaranteed to be at least Tok.getLength() bytes long. The length of the
  1501. /// actual result is returned.
  1502. ///
  1503. /// Note that this method may do two possible things: it may either fill in
  1504. /// the buffer specified with characters, or it may *change the input pointer*
  1505. /// to point to a constant buffer with the data already in it (avoiding a
  1506. /// copy). The caller is not allowed to modify the returned buffer pointer
  1507. /// if an internal buffer is returned.
  1508. unsigned getSpelling(const Token &Tok, const char *&Buffer,
  1509. bool *Invalid = nullptr) const {
  1510. return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
  1511. }
  1512. /// Get the spelling of a token into a SmallVector.
  1513. ///
  1514. /// Note that the returned StringRef may not point to the
  1515. /// supplied buffer if a copy can be avoided.
  1516. StringRef getSpelling(const Token &Tok,
  1517. SmallVectorImpl<char> &Buffer,
  1518. bool *Invalid = nullptr) const;
  1519. /// Relex the token at the specified location.
  1520. /// \returns true if there was a failure, false on success.
  1521. bool getRawToken(SourceLocation Loc, Token &Result,
  1522. bool IgnoreWhiteSpace = false) {
  1523. return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
  1524. }
  1525. /// Given a Token \p Tok that is a numeric constant with length 1,
  1526. /// return the character.
  1527. char
  1528. getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
  1529. bool *Invalid = nullptr) const {
  1530. assert(Tok.is(tok::numeric_constant) &&
  1531. Tok.getLength() == 1 && "Called on unsupported token");
  1532. assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
  1533. // If the token is carrying a literal data pointer, just use it.
  1534. if (const char *D = Tok.getLiteralData())
  1535. return *D;
  1536. // Otherwise, fall back on getCharacterData, which is slower, but always
  1537. // works.
  1538. return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
  1539. }
  1540. /// Retrieve the name of the immediate macro expansion.
  1541. ///
  1542. /// This routine starts from a source location, and finds the name of the
  1543. /// macro responsible for its immediate expansion. It looks through any
  1544. /// intervening macro argument expansions to compute this. It returns a
  1545. /// StringRef that refers to the SourceManager-owned buffer of the source
  1546. /// where that macro name is spelled. Thus, the result shouldn't out-live
  1547. /// the SourceManager.
  1548. StringRef getImmediateMacroName(SourceLocation Loc) {
  1549. return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
  1550. }
  1551. /// Plop the specified string into a scratch buffer and set the
  1552. /// specified token's location and length to it.
  1553. ///
  1554. /// If specified, the source location provides a location of the expansion
  1555. /// point of the token.
  1556. void CreateString(StringRef Str, Token &Tok,
  1557. SourceLocation ExpansionLocStart = SourceLocation(),
  1558. SourceLocation ExpansionLocEnd = SourceLocation());
  1559. /// Split the first Length characters out of the token starting at TokLoc
  1560. /// and return a location pointing to the split token. Re-lexing from the
  1561. /// split token will return the split token rather than the original.
  1562. SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
  1563. /// Computes the source location just past the end of the
  1564. /// token at this source location.
  1565. ///
  1566. /// This routine can be used to produce a source location that
  1567. /// points just past the end of the token referenced by \p Loc, and
  1568. /// is generally used when a diagnostic needs to point just after a
  1569. /// token where it expected something different that it received. If
  1570. /// the returned source location would not be meaningful (e.g., if
  1571. /// it points into a macro), this routine returns an invalid
  1572. /// source location.
  1573. ///
  1574. /// \param Offset an offset from the end of the token, where the source
  1575. /// location should refer to. The default offset (0) produces a source
  1576. /// location pointing just past the end of the token; an offset of 1 produces
  1577. /// a source location pointing to the last character in the token, etc.
  1578. SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
  1579. return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
  1580. }
  1581. /// Returns true if the given MacroID location points at the first
  1582. /// token of the macro expansion.
  1583. ///
  1584. /// \param MacroBegin If non-null and function returns true, it is set to
  1585. /// begin location of the macro.
  1586. bool isAtStartOfMacroExpansion(SourceLocation loc,
  1587. SourceLocation *MacroBegin = nullptr) const {
  1588. return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
  1589. MacroBegin);
  1590. }
  1591. /// Returns true if the given MacroID location points at the last
  1592. /// token of the macro expansion.
  1593. ///
  1594. /// \param MacroEnd If non-null and function returns true, it is set to
  1595. /// end location of the macro.
  1596. bool isAtEndOfMacroExpansion(SourceLocation loc,
  1597. SourceLocation *MacroEnd = nullptr) const {
  1598. return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
  1599. }
  1600. /// Print the token to stderr, used for debugging.
  1601. void DumpToken(const Token &Tok, bool DumpFlags = false) const;
  1602. void DumpLocation(SourceLocation Loc) const;
  1603. void DumpMacro(const MacroInfo &MI) const;
  1604. void dumpMacroInfo(const IdentifierInfo *II);
  1605. /// Given a location that specifies the start of a
  1606. /// token, return a new location that specifies a character within the token.
  1607. SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
  1608. unsigned Char) const {
  1609. return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
  1610. }
  1611. /// Increment the counters for the number of token paste operations
  1612. /// performed.
  1613. ///
  1614. /// If fast was specified, this is a 'fast paste' case we handled.
  1615. void IncrementPasteCounter(bool isFast) {
  1616. if (isFast)
  1617. ++NumFastTokenPaste;
  1618. else
  1619. ++NumTokenPaste;
  1620. }
  1621. void PrintStats();
  1622. size_t getTotalMemory() const;
  1623. /// When the macro expander pastes together a comment (/##/) in Microsoft
  1624. /// mode, this method handles updating the current state, returning the
  1625. /// token on the next source line.
  1626. void HandleMicrosoftCommentPaste(Token &Tok);
  1627. //===--------------------------------------------------------------------===//
  1628. // Preprocessor callback methods. These are invoked by a lexer as various
  1629. // directives and events are found.
  1630. /// Given a tok::raw_identifier token, look up the
  1631. /// identifier information for the token and install it into the token,
  1632. /// updating the token kind accordingly.
  1633. IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
  1634. private:
  1635. llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
  1636. public:
  1637. /// Specifies the reason for poisoning an identifier.
  1638. ///
  1639. /// If that identifier is accessed while poisoned, then this reason will be
  1640. /// used instead of the default "poisoned" diagnostic.
  1641. void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
  1642. /// Display reason for poisoned identifier.
  1643. void HandlePoisonedIdentifier(Token & Identifier);
  1644. void MaybeHandlePoisonedIdentifier(Token & Identifier) {
  1645. if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
  1646. if(II->isPoisoned()) {
  1647. HandlePoisonedIdentifier(Identifier);
  1648. }
  1649. }
  1650. }
  1651. private:
  1652. /// Identifiers used for SEH handling in Borland. These are only
  1653. /// allowed in particular circumstances
  1654. // __except block
  1655. IdentifierInfo *Ident__exception_code,
  1656. *Ident___exception_code,
  1657. *Ident_GetExceptionCode;
  1658. // __except filter expression
  1659. IdentifierInfo *Ident__exception_info,
  1660. *Ident___exception_info,
  1661. *Ident_GetExceptionInfo;
  1662. // __finally
  1663. IdentifierInfo *Ident__abnormal_termination,
  1664. *Ident___abnormal_termination,
  1665. *Ident_AbnormalTermination;
  1666. const char *getCurLexerEndPos();
  1667. void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
  1668. public:
  1669. void PoisonSEHIdentifiers(bool Poison = true); // Borland
  1670. /// Callback invoked when the lexer reads an identifier and has
  1671. /// filled in the tokens IdentifierInfo member.
  1672. ///
  1673. /// This callback potentially macro expands it or turns it into a named
  1674. /// token (like 'for').
  1675. ///
  1676. /// \returns true if we actually computed a token, false if we need to
  1677. /// lex again.
  1678. bool HandleIdentifier(Token &Identifier);
  1679. /// Callback invoked when the lexer hits the end of the current file.
  1680. ///
  1681. /// This either returns the EOF token and returns true, or
  1682. /// pops a level off the include stack and returns false, at which point the
  1683. /// client should call lex again.
  1684. bool HandleEndOfFile(Token &Result, SourceLocation Loc,
  1685. bool isEndOfMacro = false);
  1686. /// Callback invoked when the current TokenLexer hits the end of its
  1687. /// token stream.
  1688. bool HandleEndOfTokenLexer(Token &Result);
  1689. /// Callback invoked when the lexer sees a # token at the start of a
  1690. /// line.
  1691. ///
  1692. /// This consumes the directive, modifies the lexer/preprocessor state, and
  1693. /// advances the lexer(s) so that the next token read is the correct one.
  1694. void HandleDirective(Token &Result);
  1695. /// Ensure that the next token is a tok::eod token.
  1696. ///
  1697. /// If not, emit a diagnostic and consume up until the eod.
  1698. /// If \p EnableMacros is true, then we consider macros that expand to zero
  1699. /// tokens as being ok.
  1700. ///
  1701. /// \return The location of the end of the directive (the terminating
  1702. /// newline).
  1703. SourceLocation CheckEndOfDirective(const char *DirType,
  1704. bool EnableMacros = false);
  1705. /// Read and discard all tokens remaining on the current line until
  1706. /// the tok::eod token is found. Returns the range of the skipped tokens.
  1707. SourceRange DiscardUntilEndOfDirective();
  1708. /// Returns true if the preprocessor has seen a use of
  1709. /// __DATE__ or __TIME__ in the file so far.
  1710. bool SawDateOrTime() const {
  1711. return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
  1712. }
  1713. unsigned getCounterValue() const { return CounterValue; }
  1714. void setCounterValue(unsigned V) { CounterValue = V; }
  1715. /// Retrieves the module that we're currently building, if any.
  1716. Module *getCurrentModule();
  1717. /// Allocate a new MacroInfo object with the provided SourceLocation.
  1718. MacroInfo *AllocateMacroInfo(SourceLocation L);
  1719. /// Turn the specified lexer token into a fully checked and spelled
  1720. /// filename, e.g. as an operand of \#include.
  1721. ///
  1722. /// The caller is expected to provide a buffer that is large enough to hold
  1723. /// the spelling of the filename, but is also expected to handle the case
  1724. /// when this method decides to use a different buffer.
  1725. ///
  1726. /// \returns true if the input filename was in <>'s or false if it was
  1727. /// in ""'s.
  1728. bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
  1729. /// Given a "foo" or \<foo> reference, look up the indicated file.
  1730. ///
  1731. /// Returns None on failure. \p isAngled indicates whether the file
  1732. /// reference is for system \#include's or not (i.e. using <> instead of "").
  1733. Optional<FileEntryRef>
  1734. LookupFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled,
  1735. const DirectoryLookup *FromDir, const FileEntry *FromFile,
  1736. const DirectoryLookup **CurDir, SmallVectorImpl<char> *SearchPath,
  1737. SmallVectorImpl<char> *RelativePath,
  1738. ModuleMap::KnownHeader *SuggestedModule, bool *IsMapped,
  1739. bool *IsFrameworkFound, bool SkipCache = false);
  1740. /// Get the DirectoryLookup structure used to find the current
  1741. /// FileEntry, if CurLexer is non-null and if applicable.
  1742. ///
  1743. /// This allows us to implement \#include_next and find directory-specific
  1744. /// properties.
  1745. const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
  1746. /// Return true if we're in the top-level file, not in a \#include.
  1747. bool isInPrimaryFile() const;
  1748. /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
  1749. /// followed by EOD. Return true if the token is not a valid on-off-switch.
  1750. bool LexOnOffSwitch(tok::OnOffSwitch &Result);
  1751. bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
  1752. bool *ShadowFlag = nullptr);
  1753. void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
  1754. Module *LeaveSubmodule(bool ForPragma);
  1755. private:
  1756. friend void TokenLexer::ExpandFunctionArguments();
  1757. void PushIncludeMacroStack() {
  1758. assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
  1759. IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
  1760. std::move(CurLexer), CurPPLexer,
  1761. std::move(CurTokenLexer), CurDirLookup);
  1762. CurPPLexer = nullptr;
  1763. }
  1764. void PopIncludeMacroStack() {
  1765. CurLexer = std::move(IncludeMacroStack.back().TheLexer);
  1766. CurPPLexer = IncludeMacroStack.back().ThePPLexer;
  1767. CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
  1768. CurDirLookup = IncludeMacroStack.back().TheDirLookup;
  1769. CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
  1770. CurLexerKind = IncludeMacroStack.back().CurLexerKind;
  1771. IncludeMacroStack.pop_back();
  1772. }
  1773. void PropagateLineStartLeadingSpaceInfo(Token &Result);
  1774. /// Determine whether we need to create module macros for #defines in the
  1775. /// current context.
  1776. bool needModuleMacros() const;
  1777. /// Update the set of active module macros and ambiguity flag for a module
  1778. /// macro name.
  1779. void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
  1780. DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
  1781. SourceLocation Loc);
  1782. UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
  1783. VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
  1784. bool isPublic);
  1785. /// Lex and validate a macro name, which occurs after a
  1786. /// \#define or \#undef.
  1787. ///
  1788. /// \param MacroNameTok Token that represents the name defined or undefined.
  1789. /// \param IsDefineUndef Kind if preprocessor directive.
  1790. /// \param ShadowFlag Points to flag that is set if macro name shadows
  1791. /// a keyword.
  1792. ///
  1793. /// This emits a diagnostic, sets the token kind to eod,
  1794. /// and discards the rest of the macro line if the macro name is invalid.
  1795. void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
  1796. bool *ShadowFlag = nullptr);
  1797. /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
  1798. /// entire line) of the macro's tokens and adds them to MacroInfo, and while
  1799. /// doing so performs certain validity checks including (but not limited to):
  1800. /// - # (stringization) is followed by a macro parameter
  1801. /// \param MacroNameTok - Token that represents the macro name
  1802. /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
  1803. ///
  1804. /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
  1805. /// returns a nullptr if an invalid sequence of tokens is encountered.
  1806. MacroInfo *ReadOptionalMacroParameterListAndBody(
  1807. const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
  1808. /// The ( starting an argument list of a macro definition has just been read.
  1809. /// Lex the rest of the parameters and the closing ), updating \p MI with
  1810. /// what we learn and saving in \p LastTok the last token read.
  1811. /// Return true if an error occurs parsing the arg list.
  1812. bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
  1813. /// We just read a \#if or related directive and decided that the
  1814. /// subsequent tokens are in the \#if'd out portion of the
  1815. /// file. Lex the rest of the file, until we see an \#endif. If \p
  1816. /// FoundNonSkipPortion is true, then we have already emitted code for part of
  1817. /// this \#if directive, so \#else/\#elif blocks should never be entered. If
  1818. /// \p FoundElse is false, then \#else directives are ok, if not, then we have
  1819. /// already seen one so a \#else directive is a duplicate. When this returns,
  1820. /// the caller can lex the first valid token.
  1821. void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
  1822. SourceLocation IfTokenLoc,
  1823. bool FoundNonSkipPortion, bool FoundElse,
  1824. SourceLocation ElseLoc = SourceLocation());
  1825. /// Information about the result for evaluating an expression for a
  1826. /// preprocessor directive.
  1827. struct DirectiveEvalResult {
  1828. /// Whether the expression was evaluated as true or not.
  1829. bool Conditional;
  1830. /// True if the expression contained identifiers that were undefined.
  1831. bool IncludedUndefinedIds;
  1832. /// The source range for the expression.
  1833. SourceRange ExprRange;
  1834. };
  1835. /// Evaluate an integer constant expression that may occur after a
  1836. /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
  1837. ///
  1838. /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
  1839. DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
  1840. /// Install the standard preprocessor pragmas:
  1841. /// \#pragma GCC poison/system_header/dependency and \#pragma once.
  1842. void RegisterBuiltinPragmas();
  1843. /// Register builtin macros such as __LINE__ with the identifier table.
  1844. void RegisterBuiltinMacros();
  1845. /// If an identifier token is read that is to be expanded as a macro, handle
  1846. /// it and return the next token as 'Tok'. If we lexed a token, return true;
  1847. /// otherwise the caller should lex again.
  1848. bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
  1849. /// Cache macro expanded tokens for TokenLexers.
  1850. //
  1851. /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
  1852. /// going to lex in the cache and when it finishes the tokens are removed
  1853. /// from the end of the cache.
  1854. Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
  1855. ArrayRef<Token> tokens);
  1856. void removeCachedMacroExpandedTokensOfLastLexer();
  1857. /// Determine whether the next preprocessor token to be
  1858. /// lexed is a '('. If so, consume the token and return true, if not, this
  1859. /// method should have no observable side-effect on the lexed tokens.
  1860. bool isNextPPTokenLParen();
  1861. /// After reading "MACRO(", this method is invoked to read all of the formal
  1862. /// arguments specified for the macro invocation. Returns null on error.
  1863. MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
  1864. SourceLocation &MacroEnd);
  1865. /// If an identifier token is read that is to be expanded
  1866. /// as a builtin macro, handle it and return the next token as 'Tok'.
  1867. void ExpandBuiltinMacro(Token &Tok);
  1868. /// Read a \c _Pragma directive, slice it up, process it, then
  1869. /// return the first token after the directive.
  1870. /// This assumes that the \c _Pragma token has just been read into \p Tok.
  1871. void Handle_Pragma(Token &Tok);
  1872. /// Like Handle_Pragma except the pragma text is not enclosed within
  1873. /// a string literal.
  1874. void HandleMicrosoft__pragma(Token &Tok);
  1875. /// Add a lexer to the top of the include stack and
  1876. /// start lexing tokens from it instead of the current buffer.
  1877. void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
  1878. /// Set the FileID for the preprocessor predefines.
  1879. void setPredefinesFileID(FileID FID) {
  1880. assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
  1881. PredefinesFileID = FID;
  1882. }
  1883. /// Set the FileID for the PCH through header.
  1884. void setPCHThroughHeaderFileID(FileID FID);
  1885. /// Returns true if we are lexing from a file and not a
  1886. /// pragma or a macro.
  1887. static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
  1888. return L ? !L->isPragmaLexer() : P != nullptr;
  1889. }
  1890. static bool IsFileLexer(const IncludeStackInfo& I) {
  1891. return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
  1892. }
  1893. bool IsFileLexer() const {
  1894. return IsFileLexer(CurLexer.get(), CurPPLexer);
  1895. }
  1896. //===--------------------------------------------------------------------===//
  1897. // Caching stuff.
  1898. void CachingLex(Token &Result);
  1899. bool InCachingLexMode() const {
  1900. // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
  1901. // that we are past EOF, not that we are in CachingLex mode.
  1902. return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty();
  1903. }
  1904. void EnterCachingLexMode();
  1905. void EnterCachingLexModeUnchecked();
  1906. void ExitCachingLexMode() {
  1907. if (InCachingLexMode())
  1908. RemoveTopOfLexerStack();
  1909. }
  1910. const Token &PeekAhead(unsigned N);
  1911. void AnnotatePreviousCachedTokens(const Token &Tok);
  1912. //===--------------------------------------------------------------------===//
  1913. /// Handle*Directive - implement the various preprocessor directives. These
  1914. /// should side-effect the current preprocessor object so that the next call
  1915. /// to Lex() will return the appropriate token next.
  1916. void HandleLineDirective();
  1917. void HandleDigitDirective(Token &Tok);
  1918. void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
  1919. void HandleIdentSCCSDirective(Token &Tok);
  1920. void HandleMacroPublicDirective(Token &Tok);
  1921. void HandleMacroPrivateDirective();
  1922. /// An additional notification that can be produced by a header inclusion or
  1923. /// import to tell the parser what happened.
  1924. struct ImportAction {
  1925. enum ActionKind {
  1926. None,
  1927. ModuleBegin,
  1928. ModuleImport,
  1929. SkippedModuleImport,
  1930. Failure,
  1931. } Kind;
  1932. Module *ModuleForHeader = nullptr;
  1933. ImportAction(ActionKind AK, Module *Mod = nullptr)
  1934. : Kind(AK), ModuleForHeader(Mod) {
  1935. assert((AK == None || Mod || AK == Failure) &&
  1936. "no module for module action");
  1937. }
  1938. };
  1939. Optional<FileEntryRef> LookupHeaderIncludeOrImport(
  1940. const DirectoryLookup **CurDir, StringRef &Filename,
  1941. SourceLocation FilenameLoc, CharSourceRange FilenameRange,
  1942. const Token &FilenameTok, bool &IsFrameworkFound, bool IsImportDecl,
  1943. bool &IsMapped, const DirectoryLookup *LookupFrom,
  1944. const FileEntry *LookupFromFile, StringRef &LookupFilename,
  1945. SmallVectorImpl<char> &RelativePath, SmallVectorImpl<char> &SearchPath,
  1946. ModuleMap::KnownHeader &SuggestedModule, bool isAngled);
  1947. // File inclusion.
  1948. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok,
  1949. const DirectoryLookup *LookupFrom = nullptr,
  1950. const FileEntry *LookupFromFile = nullptr);
  1951. ImportAction
  1952. HandleHeaderIncludeOrImport(SourceLocation HashLoc, Token &IncludeTok,
  1953. Token &FilenameTok, SourceLocation EndLoc,
  1954. const DirectoryLookup *LookupFrom = nullptr,
  1955. const FileEntry *LookupFromFile = nullptr);
  1956. void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
  1957. void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
  1958. void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
  1959. void HandleMicrosoftImportDirective(Token &Tok);
  1960. public:
  1961. /// Check that the given module is available, producing a diagnostic if not.
  1962. /// \return \c true if the check failed (because the module is not available).
  1963. /// \c false if the module appears to be usable.
  1964. static bool checkModuleIsAvailable(const LangOptions &LangOpts,
  1965. const TargetInfo &TargetInfo,
  1966. DiagnosticsEngine &Diags, Module *M);
  1967. // Module inclusion testing.
  1968. /// Find the module that owns the source or header file that
  1969. /// \p Loc points to. If the location is in a file that was included
  1970. /// into a module, or is outside any module, returns nullptr.
  1971. Module *getModuleForLocation(SourceLocation Loc);
  1972. /// We want to produce a diagnostic at location IncLoc concerning an
  1973. /// unreachable effect at location MLoc (eg, where a desired entity was
  1974. /// declared or defined). Determine whether the right way to make MLoc
  1975. /// reachable is by #include, and if so, what header should be included.
  1976. ///
  1977. /// This is not necessarily fast, and might load unexpected module maps, so
  1978. /// should only be called by code that intends to produce an error.
  1979. ///
  1980. /// \param IncLoc The location at which the missing effect was detected.
  1981. /// \param MLoc A location within an unimported module at which the desired
  1982. /// effect occurred.
  1983. /// \return A file that can be #included to provide the desired effect. Null
  1984. /// if no such file could be determined or if a #include is not
  1985. /// appropriate (eg, if a module should be imported instead).
  1986. const FileEntry *getHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
  1987. SourceLocation MLoc);
  1988. bool isRecordingPreamble() const {
  1989. return PreambleConditionalStack.isRecording();
  1990. }
  1991. bool hasRecordedPreamble() const {
  1992. return PreambleConditionalStack.hasRecordedPreamble();
  1993. }
  1994. ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
  1995. return PreambleConditionalStack.getStack();
  1996. }
  1997. void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
  1998. PreambleConditionalStack.setStack(s);
  1999. }
  2000. void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
  2001. llvm::Optional<PreambleSkipInfo> SkipInfo) {
  2002. PreambleConditionalStack.startReplaying();
  2003. PreambleConditionalStack.setStack(s);
  2004. PreambleConditionalStack.SkipInfo = SkipInfo;
  2005. }
  2006. llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
  2007. return PreambleConditionalStack.SkipInfo;
  2008. }
  2009. private:
  2010. /// After processing predefined file, initialize the conditional stack from
  2011. /// the preamble.
  2012. void replayPreambleConditionalStack();
  2013. // Macro handling.
  2014. void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
  2015. void HandleUndefDirective();
  2016. // Conditional Inclusion.
  2017. void HandleIfdefDirective(Token &Result, const Token &HashToken,
  2018. bool isIfndef, bool ReadAnyTokensBeforeDirective);
  2019. void HandleIfDirective(Token &IfToken, const Token &HashToken,
  2020. bool ReadAnyTokensBeforeDirective);
  2021. void HandleEndifDirective(Token &EndifToken);
  2022. void HandleElseDirective(Token &Result, const Token &HashToken);
  2023. void HandleElifFamilyDirective(Token &ElifToken, const Token &HashToken,
  2024. tok::PPKeywordKind Kind);
  2025. // Pragmas.
  2026. void HandlePragmaDirective(PragmaIntroducer Introducer);
  2027. void ResolvePragmaIncludeInstead(SourceLocation Location) const;
  2028. public:
  2029. void HandlePragmaOnce(Token &OnceTok);
  2030. void HandlePragmaMark(Token &MarkTok);
  2031. void HandlePragmaPoison();
  2032. void HandlePragmaSystemHeader(Token &SysHeaderTok);
  2033. void HandlePragmaIncludeInstead(Token &Tok);
  2034. void HandlePragmaDependency(Token &DependencyTok);
  2035. void HandlePragmaPushMacro(Token &Tok);
  2036. void HandlePragmaPopMacro(Token &Tok);
  2037. void HandlePragmaIncludeAlias(Token &Tok);
  2038. void HandlePragmaModuleBuild(Token &Tok);
  2039. void HandlePragmaHdrstop(Token &Tok);
  2040. IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
  2041. // Return true and store the first token only if any CommentHandler
  2042. // has inserted some tokens and getCommentRetentionState() is false.
  2043. bool HandleComment(Token &result, SourceRange Comment);
  2044. /// A macro is used, update information about macros that need unused
  2045. /// warnings.
  2046. void markMacroAsUsed(MacroInfo *MI);
  2047. void addMacroDeprecationMsg(const IdentifierInfo *II, std::string Msg,
  2048. SourceLocation AnnotationLoc) {
  2049. auto Annotations = AnnotationInfos.find(II);
  2050. if (Annotations == AnnotationInfos.end())
  2051. AnnotationInfos.insert(std::make_pair(
  2052. II,
  2053. MacroAnnotations::makeDeprecation(AnnotationLoc, std::move(Msg))));
  2054. else
  2055. Annotations->second.DeprecationInfo =
  2056. MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
  2057. }
  2058. void addRestrictExpansionMsg(const IdentifierInfo *II, std::string Msg,
  2059. SourceLocation AnnotationLoc) {
  2060. auto Annotations = AnnotationInfos.find(II);
  2061. if (Annotations == AnnotationInfos.end())
  2062. AnnotationInfos.insert(
  2063. std::make_pair(II, MacroAnnotations::makeRestrictExpansion(
  2064. AnnotationLoc, std::move(Msg))));
  2065. else
  2066. Annotations->second.RestrictExpansionInfo =
  2067. MacroAnnotationInfo{AnnotationLoc, std::move(Msg)};
  2068. }
  2069. void addFinalLoc(const IdentifierInfo *II, SourceLocation AnnotationLoc) {
  2070. auto Annotations = AnnotationInfos.find(II);
  2071. if (Annotations == AnnotationInfos.end())
  2072. AnnotationInfos.insert(
  2073. std::make_pair(II, MacroAnnotations::makeFinal(AnnotationLoc)));
  2074. else
  2075. Annotations->second.FinalAnnotationLoc = AnnotationLoc;
  2076. }
  2077. const MacroAnnotations &getMacroAnnotations(const IdentifierInfo *II) const {
  2078. return AnnotationInfos.find(II)->second;
  2079. }
  2080. void emitMacroExpansionWarnings(const Token &Identifier) const {
  2081. if (Identifier.getIdentifierInfo()->isDeprecatedMacro())
  2082. emitMacroDeprecationWarning(Identifier);
  2083. if (Identifier.getIdentifierInfo()->isRestrictExpansion() &&
  2084. !SourceMgr.isInMainFile(Identifier.getLocation()))
  2085. emitRestrictExpansionWarning(Identifier);
  2086. }
  2087. private:
  2088. void emitMacroDeprecationWarning(const Token &Identifier) const;
  2089. void emitRestrictExpansionWarning(const Token &Identifier) const;
  2090. void emitFinalMacroWarning(const Token &Identifier, bool IsUndef) const;
  2091. Optional<unsigned>
  2092. getSkippedRangeForExcludedConditionalBlock(SourceLocation HashLoc);
  2093. /// Contains the currently active skipped range mappings for skipping excluded
  2094. /// conditional directives.
  2095. ExcludedPreprocessorDirectiveSkipMapping
  2096. *ExcludedConditionalDirectiveSkipMappings;
  2097. };
  2098. /// Abstract base class that describes a handler that will receive
  2099. /// source ranges for each of the comments encountered in the source file.
  2100. class CommentHandler {
  2101. public:
  2102. virtual ~CommentHandler();
  2103. // The handler shall return true if it has pushed any tokens
  2104. // to be read using e.g. EnterToken or EnterTokenStream.
  2105. virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
  2106. };
  2107. /// Abstract base class that describes a handler that will receive
  2108. /// source ranges for empty lines encountered in the source file.
  2109. class EmptylineHandler {
  2110. public:
  2111. virtual ~EmptylineHandler();
  2112. // The handler handles empty lines.
  2113. virtual void HandleEmptyline(SourceRange Range) = 0;
  2114. };
  2115. /// Registry of pragma handlers added by plugins
  2116. using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
  2117. } // namespace clang
  2118. #endif // LLVM_CLANG_LEX_PREPROCESSOR_H
  2119. #ifdef __GNUC__
  2120. #pragma GCC diagnostic pop
  2121. #endif