PrintPreprocessedOutput.cpp 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036
  1. //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This code simply runs the preprocessor on the input file and prints out the
  10. // result. This is the traditional behavior of the -E option.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/Frontend/Utils.h"
  14. #include "clang/Basic/CharInfo.h"
  15. #include "clang/Basic/Diagnostic.h"
  16. #include "clang/Basic/SourceManager.h"
  17. #include "clang/Frontend/PreprocessorOutputOptions.h"
  18. #include "clang/Lex/MacroInfo.h"
  19. #include "clang/Lex/PPCallbacks.h"
  20. #include "clang/Lex/Pragma.h"
  21. #include "clang/Lex/Preprocessor.h"
  22. #include "clang/Lex/TokenConcatenation.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/SmallString.h"
  25. #include "llvm/ADT/StringRef.h"
  26. #include "llvm/Support/ErrorHandling.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. #include <cstdio>
  29. using namespace clang;
  30. /// PrintMacroDefinition - Print a macro definition in a form that will be
  31. /// properly accepted back as a definition.
  32. static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
  33. Preprocessor &PP, raw_ostream &OS) {
  34. OS << "#define " << II.getName();
  35. if (MI.isFunctionLike()) {
  36. OS << '(';
  37. if (!MI.param_empty()) {
  38. MacroInfo::param_iterator AI = MI.param_begin(), E = MI.param_end();
  39. for (; AI+1 != E; ++AI) {
  40. OS << (*AI)->getName();
  41. OS << ',';
  42. }
  43. // Last argument.
  44. if ((*AI)->getName() == "__VA_ARGS__")
  45. OS << "...";
  46. else
  47. OS << (*AI)->getName();
  48. }
  49. if (MI.isGNUVarargs())
  50. OS << "..."; // #define foo(x...)
  51. OS << ')';
  52. }
  53. // GCC always emits a space, even if the macro body is empty. However, do not
  54. // want to emit two spaces if the first token has a leading space.
  55. if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
  56. OS << ' ';
  57. SmallString<128> SpellingBuffer;
  58. for (const auto &T : MI.tokens()) {
  59. if (T.hasLeadingSpace())
  60. OS << ' ';
  61. OS << PP.getSpelling(T, SpellingBuffer);
  62. }
  63. }
  64. //===----------------------------------------------------------------------===//
  65. // Preprocessed token printer
  66. //===----------------------------------------------------------------------===//
  67. namespace {
  68. class PrintPPOutputPPCallbacks : public PPCallbacks {
  69. Preprocessor &PP;
  70. SourceManager &SM;
  71. TokenConcatenation ConcatInfo;
  72. public:
  73. raw_ostream &OS;
  74. private:
  75. unsigned CurLine;
  76. bool EmittedTokensOnThisLine;
  77. bool EmittedDirectiveOnThisLine;
  78. SrcMgr::CharacteristicKind FileType;
  79. SmallString<512> CurFilename;
  80. bool Initialized;
  81. bool DisableLineMarkers;
  82. bool DumpDefines;
  83. bool DumpIncludeDirectives;
  84. bool UseLineDirectives;
  85. bool IsFirstFileEntered;
  86. bool MinimizeWhitespace;
  87. bool DirectivesOnly;
  88. Token PrevTok;
  89. Token PrevPrevTok;
  90. public:
  91. PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream &os, bool lineMarkers,
  92. bool defines, bool DumpIncludeDirectives,
  93. bool UseLineDirectives, bool MinimizeWhitespace,
  94. bool DirectivesOnly)
  95. : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os),
  96. DisableLineMarkers(lineMarkers), DumpDefines(defines),
  97. DumpIncludeDirectives(DumpIncludeDirectives),
  98. UseLineDirectives(UseLineDirectives),
  99. MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly) {
  100. CurLine = 0;
  101. CurFilename += "<uninit>";
  102. EmittedTokensOnThisLine = false;
  103. EmittedDirectiveOnThisLine = false;
  104. FileType = SrcMgr::C_User;
  105. Initialized = false;
  106. IsFirstFileEntered = false;
  107. PrevTok.startToken();
  108. PrevPrevTok.startToken();
  109. }
  110. bool isMinimizeWhitespace() const { return MinimizeWhitespace; }
  111. void setEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
  112. bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
  113. void setEmittedDirectiveOnThisLine() { EmittedDirectiveOnThisLine = true; }
  114. bool hasEmittedDirectiveOnThisLine() const {
  115. return EmittedDirectiveOnThisLine;
  116. }
  117. /// Ensure that the output stream position is at the beginning of a new line
  118. /// and inserts one if it does not. It is intended to ensure that directives
  119. /// inserted by the directives not from the input source (such as #line) are
  120. /// in the first column. To insert newlines that represent the input, use
  121. /// MoveToLine(/*...*/, /*RequireStartOfLine=*/true).
  122. void startNewLineIfNeeded();
  123. void FileChanged(SourceLocation Loc, FileChangeReason Reason,
  124. SrcMgr::CharacteristicKind FileType,
  125. FileID PrevFID) override;
  126. void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
  127. StringRef FileName, bool IsAngled,
  128. CharSourceRange FilenameRange,
  129. OptionalFileEntryRef File, StringRef SearchPath,
  130. StringRef RelativePath, const Module *Imported,
  131. SrcMgr::CharacteristicKind FileType) override;
  132. void Ident(SourceLocation Loc, StringRef str) override;
  133. void PragmaMessage(SourceLocation Loc, StringRef Namespace,
  134. PragmaMessageKind Kind, StringRef Str) override;
  135. void PragmaDebug(SourceLocation Loc, StringRef DebugType) override;
  136. void PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) override;
  137. void PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) override;
  138. void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
  139. diag::Severity Map, StringRef Str) override;
  140. void PragmaWarning(SourceLocation Loc, PragmaWarningSpecifier WarningSpec,
  141. ArrayRef<int> Ids) override;
  142. void PragmaWarningPush(SourceLocation Loc, int Level) override;
  143. void PragmaWarningPop(SourceLocation Loc) override;
  144. void PragmaExecCharsetPush(SourceLocation Loc, StringRef Str) override;
  145. void PragmaExecCharsetPop(SourceLocation Loc) override;
  146. void PragmaAssumeNonNullBegin(SourceLocation Loc) override;
  147. void PragmaAssumeNonNullEnd(SourceLocation Loc) override;
  148. /// Insert whitespace before emitting the next token.
  149. ///
  150. /// @param Tok Next token to be emitted.
  151. /// @param RequireSpace Ensure at least one whitespace is emitted. Useful
  152. /// if non-tokens have been emitted to the stream.
  153. /// @param RequireSameLine Never emit newlines. Useful when semantics depend
  154. /// on being on the same line, such as directives.
  155. void HandleWhitespaceBeforeTok(const Token &Tok, bool RequireSpace,
  156. bool RequireSameLine);
  157. /// Move to the line of the provided source location. This will
  158. /// return true if a newline was inserted or if
  159. /// the requested location is the first token on the first line.
  160. /// In these cases the next output will be the first column on the line and
  161. /// make it possible to insert indention. The newline was inserted
  162. /// implicitly when at the beginning of the file.
  163. ///
  164. /// @param Tok Token where to move to.
  165. /// @param RequireStartOfLine Whether the next line depends on being in the
  166. /// first column, such as a directive.
  167. ///
  168. /// @return Whether column adjustments are necessary.
  169. bool MoveToLine(const Token &Tok, bool RequireStartOfLine) {
  170. PresumedLoc PLoc = SM.getPresumedLoc(Tok.getLocation());
  171. unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
  172. bool IsFirstInFile =
  173. Tok.isAtStartOfLine() && PLoc.isValid() && PLoc.getLine() == 1;
  174. return MoveToLine(TargetLine, RequireStartOfLine) || IsFirstInFile;
  175. }
  176. /// Move to the line of the provided source location. Returns true if a new
  177. /// line was inserted.
  178. bool MoveToLine(SourceLocation Loc, bool RequireStartOfLine) {
  179. PresumedLoc PLoc = SM.getPresumedLoc(Loc);
  180. unsigned TargetLine = PLoc.isValid() ? PLoc.getLine() : CurLine;
  181. return MoveToLine(TargetLine, RequireStartOfLine);
  182. }
  183. bool MoveToLine(unsigned LineNo, bool RequireStartOfLine);
  184. bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
  185. const Token &Tok) {
  186. return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
  187. }
  188. void WriteLineInfo(unsigned LineNo, const char *Extra=nullptr,
  189. unsigned ExtraLen=0);
  190. bool LineMarkersAreDisabled() const { return DisableLineMarkers; }
  191. void HandleNewlinesInToken(const char *TokStr, unsigned Len);
  192. /// MacroDefined - This hook is called whenever a macro definition is seen.
  193. void MacroDefined(const Token &MacroNameTok,
  194. const MacroDirective *MD) override;
  195. /// MacroUndefined - This hook is called whenever a macro #undef is seen.
  196. void MacroUndefined(const Token &MacroNameTok,
  197. const MacroDefinition &MD,
  198. const MacroDirective *Undef) override;
  199. void BeginModule(const Module *M);
  200. void EndModule(const Module *M);
  201. };
  202. } // end anonymous namespace
  203. void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
  204. const char *Extra,
  205. unsigned ExtraLen) {
  206. startNewLineIfNeeded();
  207. // Emit #line directives or GNU line markers depending on what mode we're in.
  208. if (UseLineDirectives) {
  209. OS << "#line" << ' ' << LineNo << ' ' << '"';
  210. OS.write_escaped(CurFilename);
  211. OS << '"';
  212. } else {
  213. OS << '#' << ' ' << LineNo << ' ' << '"';
  214. OS.write_escaped(CurFilename);
  215. OS << '"';
  216. if (ExtraLen)
  217. OS.write(Extra, ExtraLen);
  218. if (FileType == SrcMgr::C_System)
  219. OS.write(" 3", 2);
  220. else if (FileType == SrcMgr::C_ExternCSystem)
  221. OS.write(" 3 4", 4);
  222. }
  223. OS << '\n';
  224. }
  225. /// MoveToLine - Move the output to the source line specified by the location
  226. /// object. We can do this by emitting some number of \n's, or be emitting a
  227. /// #line directive. This returns false if already at the specified line, true
  228. /// if some newlines were emitted.
  229. bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo,
  230. bool RequireStartOfLine) {
  231. // If it is required to start a new line or finish the current, insert
  232. // vertical whitespace now and take it into account when moving to the
  233. // expected line.
  234. bool StartedNewLine = false;
  235. if ((RequireStartOfLine && EmittedTokensOnThisLine) ||
  236. EmittedDirectiveOnThisLine) {
  237. OS << '\n';
  238. StartedNewLine = true;
  239. CurLine += 1;
  240. EmittedTokensOnThisLine = false;
  241. EmittedDirectiveOnThisLine = false;
  242. }
  243. // If this line is "close enough" to the original line, just print newlines,
  244. // otherwise print a #line directive.
  245. if (CurLine == LineNo) {
  246. // Nothing to do if we are already on the correct line.
  247. } else if (MinimizeWhitespace && DisableLineMarkers) {
  248. // With -E -P -fminimize-whitespace, don't emit anything if not necessary.
  249. } else if (!StartedNewLine && LineNo - CurLine == 1) {
  250. // Printing a single line has priority over printing a #line directive, even
  251. // when minimizing whitespace which otherwise would print #line directives
  252. // for every single line.
  253. OS << '\n';
  254. StartedNewLine = true;
  255. } else if (!DisableLineMarkers) {
  256. if (LineNo - CurLine <= 8) {
  257. const char *NewLines = "\n\n\n\n\n\n\n\n";
  258. OS.write(NewLines, LineNo - CurLine);
  259. } else {
  260. // Emit a #line or line marker.
  261. WriteLineInfo(LineNo, nullptr, 0);
  262. }
  263. StartedNewLine = true;
  264. } else if (EmittedTokensOnThisLine) {
  265. // If we are not on the correct line and don't need to be line-correct,
  266. // at least ensure we start on a new line.
  267. OS << '\n';
  268. StartedNewLine = true;
  269. }
  270. if (StartedNewLine) {
  271. EmittedTokensOnThisLine = false;
  272. EmittedDirectiveOnThisLine = false;
  273. }
  274. CurLine = LineNo;
  275. return StartedNewLine;
  276. }
  277. void PrintPPOutputPPCallbacks::startNewLineIfNeeded() {
  278. if (EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) {
  279. OS << '\n';
  280. EmittedTokensOnThisLine = false;
  281. EmittedDirectiveOnThisLine = false;
  282. }
  283. }
  284. /// FileChanged - Whenever the preprocessor enters or exits a #include file
  285. /// it invokes this handler. Update our conception of the current source
  286. /// position.
  287. void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
  288. FileChangeReason Reason,
  289. SrcMgr::CharacteristicKind NewFileType,
  290. FileID PrevFID) {
  291. // Unless we are exiting a #include, make sure to skip ahead to the line the
  292. // #include directive was at.
  293. SourceManager &SourceMgr = SM;
  294. PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
  295. if (UserLoc.isInvalid())
  296. return;
  297. unsigned NewLine = UserLoc.getLine();
  298. if (Reason == PPCallbacks::EnterFile) {
  299. SourceLocation IncludeLoc = UserLoc.getIncludeLoc();
  300. if (IncludeLoc.isValid())
  301. MoveToLine(IncludeLoc, /*RequireStartOfLine=*/false);
  302. } else if (Reason == PPCallbacks::SystemHeaderPragma) {
  303. // GCC emits the # directive for this directive on the line AFTER the
  304. // directive and emits a bunch of spaces that aren't needed. This is because
  305. // otherwise we will emit a line marker for THIS line, which requires an
  306. // extra blank line after the directive to avoid making all following lines
  307. // off by one. We can do better by simply incrementing NewLine here.
  308. NewLine += 1;
  309. }
  310. CurLine = NewLine;
  311. CurFilename.clear();
  312. CurFilename += UserLoc.getFilename();
  313. FileType = NewFileType;
  314. if (DisableLineMarkers) {
  315. if (!MinimizeWhitespace)
  316. startNewLineIfNeeded();
  317. return;
  318. }
  319. if (!Initialized) {
  320. WriteLineInfo(CurLine);
  321. Initialized = true;
  322. }
  323. // Do not emit an enter marker for the main file (which we expect is the first
  324. // entered file). This matches gcc, and improves compatibility with some tools
  325. // which track the # line markers as a way to determine when the preprocessed
  326. // output is in the context of the main file.
  327. if (Reason == PPCallbacks::EnterFile && !IsFirstFileEntered) {
  328. IsFirstFileEntered = true;
  329. return;
  330. }
  331. switch (Reason) {
  332. case PPCallbacks::EnterFile:
  333. WriteLineInfo(CurLine, " 1", 2);
  334. break;
  335. case PPCallbacks::ExitFile:
  336. WriteLineInfo(CurLine, " 2", 2);
  337. break;
  338. case PPCallbacks::SystemHeaderPragma:
  339. case PPCallbacks::RenameFile:
  340. WriteLineInfo(CurLine);
  341. break;
  342. }
  343. }
  344. void PrintPPOutputPPCallbacks::InclusionDirective(
  345. SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
  346. bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File,
  347. StringRef SearchPath, StringRef RelativePath, const Module *Imported,
  348. SrcMgr::CharacteristicKind FileType) {
  349. // In -dI mode, dump #include directives prior to dumping their content or
  350. // interpretation.
  351. if (DumpIncludeDirectives) {
  352. MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
  353. const std::string TokenText = PP.getSpelling(IncludeTok);
  354. assert(!TokenText.empty());
  355. OS << "#" << TokenText << " "
  356. << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
  357. << " /* clang -E -dI */";
  358. setEmittedDirectiveOnThisLine();
  359. }
  360. // When preprocessing, turn implicit imports into module import pragmas.
  361. if (Imported) {
  362. switch (IncludeTok.getIdentifierInfo()->getPPKeywordID()) {
  363. case tok::pp_include:
  364. case tok::pp_import:
  365. case tok::pp_include_next:
  366. MoveToLine(HashLoc, /*RequireStartOfLine=*/true);
  367. OS << "#pragma clang module import " << Imported->getFullModuleName(true)
  368. << " /* clang -E: implicit import for "
  369. << "#" << PP.getSpelling(IncludeTok) << " "
  370. << (IsAngled ? '<' : '"') << FileName << (IsAngled ? '>' : '"')
  371. << " */";
  372. setEmittedDirectiveOnThisLine();
  373. break;
  374. case tok::pp___include_macros:
  375. // #__include_macros has no effect on a user of a preprocessed source
  376. // file; the only effect is on preprocessing.
  377. //
  378. // FIXME: That's not *quite* true: it causes the module in question to
  379. // be loaded, which can affect downstream diagnostics.
  380. break;
  381. default:
  382. llvm_unreachable("unknown include directive kind");
  383. break;
  384. }
  385. }
  386. }
  387. /// Handle entering the scope of a module during a module compilation.
  388. void PrintPPOutputPPCallbacks::BeginModule(const Module *M) {
  389. startNewLineIfNeeded();
  390. OS << "#pragma clang module begin " << M->getFullModuleName(true);
  391. setEmittedDirectiveOnThisLine();
  392. }
  393. /// Handle leaving the scope of a module during a module compilation.
  394. void PrintPPOutputPPCallbacks::EndModule(const Module *M) {
  395. startNewLineIfNeeded();
  396. OS << "#pragma clang module end /*" << M->getFullModuleName(true) << "*/";
  397. setEmittedDirectiveOnThisLine();
  398. }
  399. /// Ident - Handle #ident directives when read by the preprocessor.
  400. ///
  401. void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, StringRef S) {
  402. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  403. OS.write("#ident ", strlen("#ident "));
  404. OS.write(S.begin(), S.size());
  405. setEmittedTokensOnThisLine();
  406. }
  407. /// MacroDefined - This hook is called whenever a macro definition is seen.
  408. void PrintPPOutputPPCallbacks::MacroDefined(const Token &MacroNameTok,
  409. const MacroDirective *MD) {
  410. const MacroInfo *MI = MD->getMacroInfo();
  411. // Print out macro definitions in -dD mode and when we have -fdirectives-only
  412. // for C++20 header units.
  413. if ((!DumpDefines && !DirectivesOnly) ||
  414. // Ignore __FILE__ etc.
  415. MI->isBuiltinMacro())
  416. return;
  417. SourceLocation DefLoc = MI->getDefinitionLoc();
  418. if (DirectivesOnly && !MI->isUsed()) {
  419. SourceManager &SM = PP.getSourceManager();
  420. if (SM.isWrittenInBuiltinFile(DefLoc) ||
  421. SM.isWrittenInCommandLineFile(DefLoc))
  422. return;
  423. }
  424. MoveToLine(DefLoc, /*RequireStartOfLine=*/true);
  425. PrintMacroDefinition(*MacroNameTok.getIdentifierInfo(), *MI, PP, OS);
  426. setEmittedDirectiveOnThisLine();
  427. }
  428. void PrintPPOutputPPCallbacks::MacroUndefined(const Token &MacroNameTok,
  429. const MacroDefinition &MD,
  430. const MacroDirective *Undef) {
  431. // Print out macro definitions in -dD mode and when we have -fdirectives-only
  432. // for C++20 header units.
  433. if (!DumpDefines && !DirectivesOnly)
  434. return;
  435. MoveToLine(MacroNameTok.getLocation(), /*RequireStartOfLine=*/true);
  436. OS << "#undef " << MacroNameTok.getIdentifierInfo()->getName();
  437. setEmittedDirectiveOnThisLine();
  438. }
  439. static void outputPrintable(raw_ostream &OS, StringRef Str) {
  440. for (unsigned char Char : Str) {
  441. if (isPrintable(Char) && Char != '\\' && Char != '"')
  442. OS << (char)Char;
  443. else // Output anything hard as an octal escape.
  444. OS << '\\'
  445. << (char)('0' + ((Char >> 6) & 7))
  446. << (char)('0' + ((Char >> 3) & 7))
  447. << (char)('0' + ((Char >> 0) & 7));
  448. }
  449. }
  450. void PrintPPOutputPPCallbacks::PragmaMessage(SourceLocation Loc,
  451. StringRef Namespace,
  452. PragmaMessageKind Kind,
  453. StringRef Str) {
  454. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  455. OS << "#pragma ";
  456. if (!Namespace.empty())
  457. OS << Namespace << ' ';
  458. switch (Kind) {
  459. case PMK_Message:
  460. OS << "message(\"";
  461. break;
  462. case PMK_Warning:
  463. OS << "warning \"";
  464. break;
  465. case PMK_Error:
  466. OS << "error \"";
  467. break;
  468. }
  469. outputPrintable(OS, Str);
  470. OS << '"';
  471. if (Kind == PMK_Message)
  472. OS << ')';
  473. setEmittedDirectiveOnThisLine();
  474. }
  475. void PrintPPOutputPPCallbacks::PragmaDebug(SourceLocation Loc,
  476. StringRef DebugType) {
  477. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  478. OS << "#pragma clang __debug ";
  479. OS << DebugType;
  480. setEmittedDirectiveOnThisLine();
  481. }
  482. void PrintPPOutputPPCallbacks::
  483. PragmaDiagnosticPush(SourceLocation Loc, StringRef Namespace) {
  484. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  485. OS << "#pragma " << Namespace << " diagnostic push";
  486. setEmittedDirectiveOnThisLine();
  487. }
  488. void PrintPPOutputPPCallbacks::
  489. PragmaDiagnosticPop(SourceLocation Loc, StringRef Namespace) {
  490. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  491. OS << "#pragma " << Namespace << " diagnostic pop";
  492. setEmittedDirectiveOnThisLine();
  493. }
  494. void PrintPPOutputPPCallbacks::PragmaDiagnostic(SourceLocation Loc,
  495. StringRef Namespace,
  496. diag::Severity Map,
  497. StringRef Str) {
  498. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  499. OS << "#pragma " << Namespace << " diagnostic ";
  500. switch (Map) {
  501. case diag::Severity::Remark:
  502. OS << "remark";
  503. break;
  504. case diag::Severity::Warning:
  505. OS << "warning";
  506. break;
  507. case diag::Severity::Error:
  508. OS << "error";
  509. break;
  510. case diag::Severity::Ignored:
  511. OS << "ignored";
  512. break;
  513. case diag::Severity::Fatal:
  514. OS << "fatal";
  515. break;
  516. }
  517. OS << " \"" << Str << '"';
  518. setEmittedDirectiveOnThisLine();
  519. }
  520. void PrintPPOutputPPCallbacks::PragmaWarning(SourceLocation Loc,
  521. PragmaWarningSpecifier WarningSpec,
  522. ArrayRef<int> Ids) {
  523. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  524. OS << "#pragma warning(";
  525. switch(WarningSpec) {
  526. case PWS_Default: OS << "default"; break;
  527. case PWS_Disable: OS << "disable"; break;
  528. case PWS_Error: OS << "error"; break;
  529. case PWS_Once: OS << "once"; break;
  530. case PWS_Suppress: OS << "suppress"; break;
  531. case PWS_Level1: OS << '1'; break;
  532. case PWS_Level2: OS << '2'; break;
  533. case PWS_Level3: OS << '3'; break;
  534. case PWS_Level4: OS << '4'; break;
  535. }
  536. OS << ':';
  537. for (ArrayRef<int>::iterator I = Ids.begin(), E = Ids.end(); I != E; ++I)
  538. OS << ' ' << *I;
  539. OS << ')';
  540. setEmittedDirectiveOnThisLine();
  541. }
  542. void PrintPPOutputPPCallbacks::PragmaWarningPush(SourceLocation Loc,
  543. int Level) {
  544. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  545. OS << "#pragma warning(push";
  546. if (Level >= 0)
  547. OS << ", " << Level;
  548. OS << ')';
  549. setEmittedDirectiveOnThisLine();
  550. }
  551. void PrintPPOutputPPCallbacks::PragmaWarningPop(SourceLocation Loc) {
  552. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  553. OS << "#pragma warning(pop)";
  554. setEmittedDirectiveOnThisLine();
  555. }
  556. void PrintPPOutputPPCallbacks::PragmaExecCharsetPush(SourceLocation Loc,
  557. StringRef Str) {
  558. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  559. OS << "#pragma character_execution_set(push";
  560. if (!Str.empty())
  561. OS << ", " << Str;
  562. OS << ')';
  563. setEmittedDirectiveOnThisLine();
  564. }
  565. void PrintPPOutputPPCallbacks::PragmaExecCharsetPop(SourceLocation Loc) {
  566. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  567. OS << "#pragma character_execution_set(pop)";
  568. setEmittedDirectiveOnThisLine();
  569. }
  570. void PrintPPOutputPPCallbacks::
  571. PragmaAssumeNonNullBegin(SourceLocation Loc) {
  572. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  573. OS << "#pragma clang assume_nonnull begin";
  574. setEmittedDirectiveOnThisLine();
  575. }
  576. void PrintPPOutputPPCallbacks::
  577. PragmaAssumeNonNullEnd(SourceLocation Loc) {
  578. MoveToLine(Loc, /*RequireStartOfLine=*/true);
  579. OS << "#pragma clang assume_nonnull end";
  580. setEmittedDirectiveOnThisLine();
  581. }
  582. void PrintPPOutputPPCallbacks::HandleWhitespaceBeforeTok(const Token &Tok,
  583. bool RequireSpace,
  584. bool RequireSameLine) {
  585. // These tokens are not expanded to anything and don't need whitespace before
  586. // them.
  587. if (Tok.is(tok::eof) ||
  588. (Tok.isAnnotation() && !Tok.is(tok::annot_header_unit) &&
  589. !Tok.is(tok::annot_module_begin) && !Tok.is(tok::annot_module_end)))
  590. return;
  591. // EmittedDirectiveOnThisLine takes priority over RequireSameLine.
  592. if ((!RequireSameLine || EmittedDirectiveOnThisLine) &&
  593. MoveToLine(Tok, /*RequireStartOfLine=*/EmittedDirectiveOnThisLine)) {
  594. if (MinimizeWhitespace) {
  595. // Avoid interpreting hash as a directive under -fpreprocessed.
  596. if (Tok.is(tok::hash))
  597. OS << ' ';
  598. } else {
  599. // Print out space characters so that the first token on a line is
  600. // indented for easy reading.
  601. unsigned ColNo = SM.getExpansionColumnNumber(Tok.getLocation());
  602. // The first token on a line can have a column number of 1, yet still
  603. // expect leading white space, if a macro expansion in column 1 starts
  604. // with an empty macro argument, or an empty nested macro expansion. In
  605. // this case, move the token to column 2.
  606. if (ColNo == 1 && Tok.hasLeadingSpace())
  607. ColNo = 2;
  608. // This hack prevents stuff like:
  609. // #define HASH #
  610. // HASH define foo bar
  611. // From having the # character end up at column 1, which makes it so it
  612. // is not handled as a #define next time through the preprocessor if in
  613. // -fpreprocessed mode.
  614. if (ColNo <= 1 && Tok.is(tok::hash))
  615. OS << ' ';
  616. // Otherwise, indent the appropriate number of spaces.
  617. for (; ColNo > 1; --ColNo)
  618. OS << ' ';
  619. }
  620. } else {
  621. // Insert whitespace between the previous and next token if either
  622. // - The caller requires it
  623. // - The input had whitespace between them and we are not in
  624. // whitespace-minimization mode
  625. // - The whitespace is necessary to keep the tokens apart and there is not
  626. // already a newline between them
  627. if (RequireSpace || (!MinimizeWhitespace && Tok.hasLeadingSpace()) ||
  628. ((EmittedTokensOnThisLine || EmittedDirectiveOnThisLine) &&
  629. AvoidConcat(PrevPrevTok, PrevTok, Tok)))
  630. OS << ' ';
  631. }
  632. PrevPrevTok = PrevTok;
  633. PrevTok = Tok;
  634. }
  635. void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
  636. unsigned Len) {
  637. unsigned NumNewlines = 0;
  638. for (; Len; --Len, ++TokStr) {
  639. if (*TokStr != '\n' &&
  640. *TokStr != '\r')
  641. continue;
  642. ++NumNewlines;
  643. // If we have \n\r or \r\n, skip both and count as one line.
  644. if (Len != 1 &&
  645. (TokStr[1] == '\n' || TokStr[1] == '\r') &&
  646. TokStr[0] != TokStr[1]) {
  647. ++TokStr;
  648. --Len;
  649. }
  650. }
  651. if (NumNewlines == 0) return;
  652. CurLine += NumNewlines;
  653. }
  654. namespace {
  655. struct UnknownPragmaHandler : public PragmaHandler {
  656. const char *Prefix;
  657. PrintPPOutputPPCallbacks *Callbacks;
  658. // Set to true if tokens should be expanded
  659. bool ShouldExpandTokens;
  660. UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks,
  661. bool RequireTokenExpansion)
  662. : Prefix(prefix), Callbacks(callbacks),
  663. ShouldExpandTokens(RequireTokenExpansion) {}
  664. void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer,
  665. Token &PragmaTok) override {
  666. // Figure out what line we went to and insert the appropriate number of
  667. // newline characters.
  668. Callbacks->MoveToLine(PragmaTok.getLocation(), /*RequireStartOfLine=*/true);
  669. Callbacks->OS.write(Prefix, strlen(Prefix));
  670. Callbacks->setEmittedTokensOnThisLine();
  671. if (ShouldExpandTokens) {
  672. // The first token does not have expanded macros. Expand them, if
  673. // required.
  674. auto Toks = std::make_unique<Token[]>(1);
  675. Toks[0] = PragmaTok;
  676. PP.EnterTokenStream(std::move(Toks), /*NumToks=*/1,
  677. /*DisableMacroExpansion=*/false,
  678. /*IsReinject=*/false);
  679. PP.Lex(PragmaTok);
  680. }
  681. // Read and print all of the pragma tokens.
  682. bool IsFirst = true;
  683. while (PragmaTok.isNot(tok::eod)) {
  684. Callbacks->HandleWhitespaceBeforeTok(PragmaTok, /*RequireSpace=*/IsFirst,
  685. /*RequireSameLine=*/true);
  686. IsFirst = false;
  687. std::string TokSpell = PP.getSpelling(PragmaTok);
  688. Callbacks->OS.write(&TokSpell[0], TokSpell.size());
  689. Callbacks->setEmittedTokensOnThisLine();
  690. if (ShouldExpandTokens)
  691. PP.Lex(PragmaTok);
  692. else
  693. PP.LexUnexpandedToken(PragmaTok);
  694. }
  695. Callbacks->setEmittedDirectiveOnThisLine();
  696. }
  697. };
  698. } // end anonymous namespace
  699. static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
  700. PrintPPOutputPPCallbacks *Callbacks,
  701. raw_ostream &OS) {
  702. bool DropComments = PP.getLangOpts().TraditionalCPP &&
  703. !PP.getCommentRetentionState();
  704. bool IsStartOfLine = false;
  705. char Buffer[256];
  706. while (true) {
  707. // Two lines joined with line continuation ('\' as last character on the
  708. // line) must be emitted as one line even though Tok.getLine() returns two
  709. // different values. In this situation Tok.isAtStartOfLine() is false even
  710. // though it may be the first token on the lexical line. When
  711. // dropping/skipping a token that is at the start of a line, propagate the
  712. // start-of-line-ness to the next token to not append it to the previous
  713. // line.
  714. IsStartOfLine = IsStartOfLine || Tok.isAtStartOfLine();
  715. Callbacks->HandleWhitespaceBeforeTok(Tok, /*RequireSpace=*/false,
  716. /*RequireSameLine=*/!IsStartOfLine);
  717. if (DropComments && Tok.is(tok::comment)) {
  718. // Skip comments. Normally the preprocessor does not generate
  719. // tok::comment nodes at all when not keeping comments, but under
  720. // -traditional-cpp the lexer keeps /all/ whitespace, including comments.
  721. PP.Lex(Tok);
  722. continue;
  723. } else if (Tok.is(tok::eod)) {
  724. // Don't print end of directive tokens, since they are typically newlines
  725. // that mess up our line tracking. These come from unknown pre-processor
  726. // directives or hash-prefixed comments in standalone assembly files.
  727. PP.Lex(Tok);
  728. // FIXME: The token on the next line after #include should have
  729. // Tok.isAtStartOfLine() set.
  730. IsStartOfLine = true;
  731. continue;
  732. } else if (Tok.is(tok::annot_module_include)) {
  733. // PrintPPOutputPPCallbacks::InclusionDirective handles producing
  734. // appropriate output here. Ignore this token entirely.
  735. PP.Lex(Tok);
  736. IsStartOfLine = true;
  737. continue;
  738. } else if (Tok.is(tok::annot_module_begin)) {
  739. // FIXME: We retrieve this token after the FileChanged callback, and
  740. // retrieve the module_end token before the FileChanged callback, so
  741. // we render this within the file and render the module end outside the
  742. // file, but this is backwards from the token locations: the module_begin
  743. // token is at the include location (outside the file) and the module_end
  744. // token is at the EOF location (within the file).
  745. Callbacks->BeginModule(
  746. reinterpret_cast<Module *>(Tok.getAnnotationValue()));
  747. PP.Lex(Tok);
  748. IsStartOfLine = true;
  749. continue;
  750. } else if (Tok.is(tok::annot_module_end)) {
  751. Callbacks->EndModule(
  752. reinterpret_cast<Module *>(Tok.getAnnotationValue()));
  753. PP.Lex(Tok);
  754. IsStartOfLine = true;
  755. continue;
  756. } else if (Tok.is(tok::annot_header_unit)) {
  757. // This is a header-name that has been (effectively) converted into a
  758. // module-name.
  759. // FIXME: The module name could contain non-identifier module name
  760. // components. We don't have a good way to round-trip those.
  761. Module *M = reinterpret_cast<Module *>(Tok.getAnnotationValue());
  762. std::string Name = M->getFullModuleName();
  763. OS.write(Name.data(), Name.size());
  764. Callbacks->HandleNewlinesInToken(Name.data(), Name.size());
  765. } else if (Tok.isAnnotation()) {
  766. // Ignore annotation tokens created by pragmas - the pragmas themselves
  767. // will be reproduced in the preprocessed output.
  768. PP.Lex(Tok);
  769. continue;
  770. } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
  771. OS << II->getName();
  772. } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
  773. Tok.getLiteralData()) {
  774. OS.write(Tok.getLiteralData(), Tok.getLength());
  775. } else if (Tok.getLength() < std::size(Buffer)) {
  776. const char *TokPtr = Buffer;
  777. unsigned Len = PP.getSpelling(Tok, TokPtr);
  778. OS.write(TokPtr, Len);
  779. // Tokens that can contain embedded newlines need to adjust our current
  780. // line number.
  781. // FIXME: The token may end with a newline in which case
  782. // setEmittedDirectiveOnThisLine/setEmittedTokensOnThisLine afterwards is
  783. // wrong.
  784. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
  785. Callbacks->HandleNewlinesInToken(TokPtr, Len);
  786. if (Tok.is(tok::comment) && Len >= 2 && TokPtr[0] == '/' &&
  787. TokPtr[1] == '/') {
  788. // It's a line comment;
  789. // Ensure that we don't concatenate anything behind it.
  790. Callbacks->setEmittedDirectiveOnThisLine();
  791. }
  792. } else {
  793. std::string S = PP.getSpelling(Tok);
  794. OS.write(S.data(), S.size());
  795. // Tokens that can contain embedded newlines need to adjust our current
  796. // line number.
  797. if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
  798. Callbacks->HandleNewlinesInToken(S.data(), S.size());
  799. if (Tok.is(tok::comment) && S.size() >= 2 && S[0] == '/' && S[1] == '/') {
  800. // It's a line comment;
  801. // Ensure that we don't concatenate anything behind it.
  802. Callbacks->setEmittedDirectiveOnThisLine();
  803. }
  804. }
  805. Callbacks->setEmittedTokensOnThisLine();
  806. IsStartOfLine = false;
  807. if (Tok.is(tok::eof)) break;
  808. PP.Lex(Tok);
  809. }
  810. }
  811. typedef std::pair<const IdentifierInfo *, MacroInfo *> id_macro_pair;
  812. static int MacroIDCompare(const id_macro_pair *LHS, const id_macro_pair *RHS) {
  813. return LHS->first->getName().compare(RHS->first->getName());
  814. }
  815. static void DoPrintMacros(Preprocessor &PP, raw_ostream *OS) {
  816. // Ignore unknown pragmas.
  817. PP.IgnorePragmas();
  818. // -dM mode just scans and ignores all tokens in the files, then dumps out
  819. // the macro table at the end.
  820. PP.EnterMainSourceFile();
  821. Token Tok;
  822. do PP.Lex(Tok);
  823. while (Tok.isNot(tok::eof));
  824. SmallVector<id_macro_pair, 128> MacrosByID;
  825. for (Preprocessor::macro_iterator I = PP.macro_begin(), E = PP.macro_end();
  826. I != E; ++I) {
  827. auto *MD = I->second.getLatest();
  828. if (MD && MD->isDefined())
  829. MacrosByID.push_back(id_macro_pair(I->first, MD->getMacroInfo()));
  830. }
  831. llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
  832. for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
  833. MacroInfo &MI = *MacrosByID[i].second;
  834. // Ignore computed macros like __LINE__ and friends.
  835. if (MI.isBuiltinMacro()) continue;
  836. PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
  837. *OS << '\n';
  838. }
  839. }
  840. /// DoPrintPreprocessedInput - This implements -E mode.
  841. ///
  842. void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS,
  843. const PreprocessorOutputOptions &Opts) {
  844. // Show macros with no output is handled specially.
  845. if (!Opts.ShowCPP) {
  846. assert(Opts.ShowMacros && "Not yet implemented!");
  847. DoPrintMacros(PP, OS);
  848. return;
  849. }
  850. // Inform the preprocessor whether we want it to retain comments or not, due
  851. // to -C or -CC.
  852. PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
  853. PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(
  854. PP, *OS, !Opts.ShowLineMarkers, Opts.ShowMacros,
  855. Opts.ShowIncludeDirectives, Opts.UseLineDirectives,
  856. Opts.MinimizeWhitespace, Opts.DirectivesOnly);
  857. // Expand macros in pragmas with -fms-extensions. The assumption is that
  858. // the majority of pragmas in such a file will be Microsoft pragmas.
  859. // Remember the handlers we will add so that we can remove them later.
  860. std::unique_ptr<UnknownPragmaHandler> MicrosoftExtHandler(
  861. new UnknownPragmaHandler(
  862. "#pragma", Callbacks,
  863. /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
  864. std::unique_ptr<UnknownPragmaHandler> GCCHandler(new UnknownPragmaHandler(
  865. "#pragma GCC", Callbacks,
  866. /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
  867. std::unique_ptr<UnknownPragmaHandler> ClangHandler(new UnknownPragmaHandler(
  868. "#pragma clang", Callbacks,
  869. /*RequireTokenExpansion=*/PP.getLangOpts().MicrosoftExt));
  870. PP.AddPragmaHandler(MicrosoftExtHandler.get());
  871. PP.AddPragmaHandler("GCC", GCCHandler.get());
  872. PP.AddPragmaHandler("clang", ClangHandler.get());
  873. // The tokens after pragma omp need to be expanded.
  874. //
  875. // OpenMP [2.1, Directive format]
  876. // Preprocessing tokens following the #pragma omp are subject to macro
  877. // replacement.
  878. std::unique_ptr<UnknownPragmaHandler> OpenMPHandler(
  879. new UnknownPragmaHandler("#pragma omp", Callbacks,
  880. /*RequireTokenExpansion=*/true));
  881. PP.AddPragmaHandler("omp", OpenMPHandler.get());
  882. PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callbacks));
  883. // After we have configured the preprocessor, enter the main file.
  884. PP.EnterMainSourceFile();
  885. if (Opts.DirectivesOnly)
  886. PP.SetMacroExpansionOnlyInDirectives();
  887. // Consume all of the tokens that come from the predefines buffer. Those
  888. // should not be emitted into the output and are guaranteed to be at the
  889. // start.
  890. const SourceManager &SourceMgr = PP.getSourceManager();
  891. Token Tok;
  892. do {
  893. PP.Lex(Tok);
  894. if (Tok.is(tok::eof) || !Tok.getLocation().isFileID())
  895. break;
  896. PresumedLoc PLoc = SourceMgr.getPresumedLoc(Tok.getLocation());
  897. if (PLoc.isInvalid())
  898. break;
  899. if (strcmp(PLoc.getFilename(), "<built-in>"))
  900. break;
  901. } while (true);
  902. // Read all the preprocessed tokens, printing them out to the stream.
  903. PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
  904. *OS << '\n';
  905. // Remove the handlers we just added to leave the preprocessor in a sane state
  906. // so that it can be reused (for example by a clang::Parser instance).
  907. PP.RemovePragmaHandler(MicrosoftExtHandler.get());
  908. PP.RemovePragmaHandler("GCC", GCCHandler.get());
  909. PP.RemovePragmaHandler("clang", ClangHandler.get());
  910. PP.RemovePragmaHandler("omp", OpenMPHandler.get());
  911. }