DependencyDirectivesSourceMinimizer.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982
  1. //===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This is the implementation for minimizing header and source files to the
  11. /// minimum necessary preprocessor directives for evaluating includes. It
  12. /// reduces the source down to #define, #include, #import, @import, and any
  13. /// conditional preprocessor logic that contains one of those.
  14. ///
  15. //===----------------------------------------------------------------------===//
  16. #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
  17. #include "clang/Basic/CharInfo.h"
  18. #include "clang/Basic/Diagnostic.h"
  19. #include "clang/Lex/LexDiagnostic.h"
  20. #include "llvm/ADT/StringMap.h"
  21. #include "llvm/ADT/StringSwitch.h"
  22. #include "llvm/Support/MemoryBuffer.h"
  23. using namespace llvm;
  24. using namespace clang;
  25. using namespace clang::minimize_source_to_dependency_directives;
  26. namespace {
  27. struct Minimizer {
  28. /// Minimized output.
  29. SmallVectorImpl<char> &Out;
  30. /// The known tokens encountered during the minimization.
  31. SmallVectorImpl<Token> &Tokens;
  32. Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
  33. StringRef Input, DiagnosticsEngine *Diags,
  34. SourceLocation InputSourceLoc)
  35. : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
  36. InputSourceLoc(InputSourceLoc) {}
  37. /// Lex the provided source and emit the minimized output.
  38. ///
  39. /// \returns True on error.
  40. bool minimize();
  41. private:
  42. struct IdInfo {
  43. const char *Last;
  44. StringRef Name;
  45. };
  46. /// Lex an identifier.
  47. ///
  48. /// \pre First points at a valid identifier head.
  49. LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
  50. LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
  51. const char *const End);
  52. LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
  53. LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
  54. LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
  55. LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
  56. LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
  57. LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
  58. LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
  59. LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
  60. const char *&First, const char *const End);
  61. Token &makeToken(TokenKind K) {
  62. Tokens.emplace_back(K, Out.size());
  63. return Tokens.back();
  64. }
  65. void popToken() {
  66. Out.resize(Tokens.back().Offset);
  67. Tokens.pop_back();
  68. }
  69. TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
  70. Minimizer &put(char Byte) {
  71. Out.push_back(Byte);
  72. return *this;
  73. }
  74. Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
  75. Minimizer &append(const char *First, const char *Last) {
  76. Out.append(First, Last);
  77. return *this;
  78. }
  79. void printToNewline(const char *&First, const char *const End);
  80. void printAdjacentModuleNameParts(const char *&First, const char *const End);
  81. LLVM_NODISCARD bool printAtImportBody(const char *&First,
  82. const char *const End);
  83. void printDirectiveBody(const char *&First, const char *const End);
  84. void printAdjacentMacroArgs(const char *&First, const char *const End);
  85. LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
  86. /// Reports a diagnostic if the diagnostic engine is provided. Always returns
  87. /// true at the end.
  88. bool reportError(const char *CurPtr, unsigned Err);
  89. StringMap<char> SplitIds;
  90. StringRef Input;
  91. DiagnosticsEngine *Diags;
  92. SourceLocation InputSourceLoc;
  93. };
  94. } // end anonymous namespace
  95. bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
  96. if (!Diags)
  97. return true;
  98. assert(CurPtr >= Input.data() && "invalid buffer ptr");
  99. Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
  100. return true;
  101. }
  102. static void skipOverSpaces(const char *&First, const char *const End) {
  103. while (First != End && isHorizontalWhitespace(*First))
  104. ++First;
  105. }
  106. LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
  107. const char *Current) {
  108. assert(First <= Current);
  109. // Check if we can even back up.
  110. if (*Current != '"' || First == Current)
  111. return false;
  112. // Check for an "R".
  113. --Current;
  114. if (*Current != 'R')
  115. return false;
  116. if (First == Current || !isAsciiIdentifierContinue(*--Current))
  117. return true;
  118. // Check for a prefix of "u", "U", or "L".
  119. if (*Current == 'u' || *Current == 'U' || *Current == 'L')
  120. return First == Current || !isAsciiIdentifierContinue(*--Current);
  121. // Check for a prefix of "u8".
  122. if (*Current != '8' || First == Current || *Current-- != 'u')
  123. return false;
  124. return First == Current || !isAsciiIdentifierContinue(*--Current);
  125. }
  126. static void skipRawString(const char *&First, const char *const End) {
  127. assert(First[0] == '"');
  128. assert(First[-1] == 'R');
  129. const char *Last = ++First;
  130. while (Last != End && *Last != '(')
  131. ++Last;
  132. if (Last == End) {
  133. First = Last; // Hit the end... just give up.
  134. return;
  135. }
  136. StringRef Terminator(First, Last - First);
  137. for (;;) {
  138. // Move First to just past the next ")".
  139. First = Last;
  140. while (First != End && *First != ')')
  141. ++First;
  142. if (First == End)
  143. return;
  144. ++First;
  145. // Look ahead for the terminator sequence.
  146. Last = First;
  147. while (Last != End && size_t(Last - First) < Terminator.size() &&
  148. Terminator[Last - First] == *Last)
  149. ++Last;
  150. // Check if we hit it (or the end of the file).
  151. if (Last == End) {
  152. First = Last;
  153. return;
  154. }
  155. if (size_t(Last - First) < Terminator.size())
  156. continue;
  157. if (*Last != '"')
  158. continue;
  159. First = Last + 1;
  160. return;
  161. }
  162. }
  163. // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
  164. static unsigned isEOL(const char *First, const char *const End) {
  165. if (First == End)
  166. return 0;
  167. if (End - First > 1 && isVerticalWhitespace(First[0]) &&
  168. isVerticalWhitespace(First[1]) && First[0] != First[1])
  169. return 2;
  170. return !!isVerticalWhitespace(First[0]);
  171. }
  172. static void skipString(const char *&First, const char *const End) {
  173. assert(*First == '\'' || *First == '"' || *First == '<');
  174. const char Terminator = *First == '<' ? '>' : *First;
  175. for (++First; First != End && *First != Terminator; ++First) {
  176. // String and character literals don't extend past the end of the line.
  177. if (isVerticalWhitespace(*First))
  178. return;
  179. if (*First != '\\')
  180. continue;
  181. // Skip past backslash to the next character. This ensures that the
  182. // character right after it is skipped as well, which matters if it's
  183. // the terminator.
  184. if (++First == End)
  185. return;
  186. if (!isWhitespace(*First))
  187. continue;
  188. // Whitespace after the backslash might indicate a line continuation.
  189. const char *FirstAfterBackslashPastSpace = First;
  190. skipOverSpaces(FirstAfterBackslashPastSpace, End);
  191. if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
  192. // Advance the character pointer to the next line for the next
  193. // iteration.
  194. First = FirstAfterBackslashPastSpace + NLSize - 1;
  195. }
  196. }
  197. if (First != End)
  198. ++First; // Finish off the string.
  199. }
  200. // Returns the length of the skipped newline
  201. static unsigned skipNewline(const char *&First, const char *End) {
  202. if (First == End)
  203. return 0;
  204. assert(isVerticalWhitespace(*First));
  205. unsigned Len = isEOL(First, End);
  206. assert(Len && "expected newline");
  207. First += Len;
  208. return Len;
  209. }
  210. static bool wasLineContinuation(const char *First, unsigned EOLLen) {
  211. return *(First - (int)EOLLen - 1) == '\\';
  212. }
  213. static void skipToNewlineRaw(const char *&First, const char *const End) {
  214. for (;;) {
  215. if (First == End)
  216. return;
  217. unsigned Len = isEOL(First, End);
  218. if (Len)
  219. return;
  220. do {
  221. if (++First == End)
  222. return;
  223. Len = isEOL(First, End);
  224. } while (!Len);
  225. if (First[-1] != '\\')
  226. return;
  227. First += Len;
  228. // Keep skipping lines...
  229. }
  230. }
  231. static const char *findLastNonSpace(const char *First, const char *Last) {
  232. assert(First <= Last);
  233. while (First != Last && isHorizontalWhitespace(Last[-1]))
  234. --Last;
  235. return Last;
  236. }
  237. static const char *findFirstTrailingSpace(const char *First,
  238. const char *Last) {
  239. const char *LastNonSpace = findLastNonSpace(First, Last);
  240. if (Last == LastNonSpace)
  241. return Last;
  242. assert(isHorizontalWhitespace(LastNonSpace[0]));
  243. return LastNonSpace + 1;
  244. }
  245. static void skipLineComment(const char *&First, const char *const End) {
  246. assert(First[0] == '/' && First[1] == '/');
  247. First += 2;
  248. skipToNewlineRaw(First, End);
  249. }
  250. static void skipBlockComment(const char *&First, const char *const End) {
  251. assert(First[0] == '/' && First[1] == '*');
  252. if (End - First < 4) {
  253. First = End;
  254. return;
  255. }
  256. for (First += 3; First != End; ++First)
  257. if (First[-1] == '*' && First[0] == '/') {
  258. ++First;
  259. return;
  260. }
  261. }
  262. /// \returns True if the current single quotation mark character is a C++ 14
  263. /// digit separator.
  264. static bool isQuoteCppDigitSeparator(const char *const Start,
  265. const char *const Cur,
  266. const char *const End) {
  267. assert(*Cur == '\'' && "expected quotation character");
  268. // skipLine called in places where we don't expect a valid number
  269. // body before `start` on the same line, so always return false at the start.
  270. if (Start == Cur)
  271. return false;
  272. // The previous character must be a valid PP number character.
  273. // Make sure that the L, u, U, u8 prefixes don't get marked as a
  274. // separator though.
  275. char Prev = *(Cur - 1);
  276. if (Prev == 'L' || Prev == 'U' || Prev == 'u')
  277. return false;
  278. if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
  279. return false;
  280. if (!isPreprocessingNumberBody(Prev))
  281. return false;
  282. // The next character should be a valid identifier body character.
  283. return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));
  284. }
  285. static void skipLine(const char *&First, const char *const End) {
  286. for (;;) {
  287. assert(First <= End);
  288. if (First == End)
  289. return;
  290. if (isVerticalWhitespace(*First)) {
  291. skipNewline(First, End);
  292. return;
  293. }
  294. const char *Start = First;
  295. while (First != End && !isVerticalWhitespace(*First)) {
  296. // Iterate over strings correctly to avoid comments and newlines.
  297. if (*First == '"' ||
  298. (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
  299. if (isRawStringLiteral(Start, First))
  300. skipRawString(First, End);
  301. else
  302. skipString(First, End);
  303. continue;
  304. }
  305. // Iterate over comments correctly.
  306. if (*First != '/' || End - First < 2) {
  307. ++First;
  308. continue;
  309. }
  310. if (First[1] == '/') {
  311. // "//...".
  312. skipLineComment(First, End);
  313. continue;
  314. }
  315. if (First[1] != '*') {
  316. ++First;
  317. continue;
  318. }
  319. // "/*...*/".
  320. skipBlockComment(First, End);
  321. }
  322. if (First == End)
  323. return;
  324. // Skip over the newline.
  325. unsigned Len = skipNewline(First, End);
  326. if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
  327. break;
  328. }
  329. }
  330. static void skipDirective(StringRef Name, const char *&First,
  331. const char *const End) {
  332. if (llvm::StringSwitch<bool>(Name)
  333. .Case("warning", true)
  334. .Case("error", true)
  335. .Default(false))
  336. // Do not process quotes or comments.
  337. skipToNewlineRaw(First, End);
  338. else
  339. skipLine(First, End);
  340. }
  341. void Minimizer::printToNewline(const char *&First, const char *const End) {
  342. while (First != End && !isVerticalWhitespace(*First)) {
  343. const char *Last = First;
  344. do {
  345. // Iterate over strings correctly to avoid comments and newlines.
  346. if (*Last == '"' || *Last == '\'' ||
  347. (*Last == '<' && top() == pp_include)) {
  348. if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
  349. skipRawString(Last, End);
  350. else
  351. skipString(Last, End);
  352. continue;
  353. }
  354. if (*Last != '/' || End - Last < 2) {
  355. ++Last;
  356. continue; // Gather the rest up to print verbatim.
  357. }
  358. if (Last[1] != '/' && Last[1] != '*') {
  359. ++Last;
  360. continue;
  361. }
  362. // Deal with "//..." and "/*...*/".
  363. append(First, findFirstTrailingSpace(First, Last));
  364. First = Last;
  365. if (Last[1] == '/') {
  366. skipLineComment(First, End);
  367. return;
  368. }
  369. put(' ');
  370. skipBlockComment(First, End);
  371. skipOverSpaces(First, End);
  372. Last = First;
  373. } while (Last != End && !isVerticalWhitespace(*Last));
  374. // Print out the string.
  375. const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
  376. if (Last == End || LastBeforeTrailingSpace == First ||
  377. LastBeforeTrailingSpace[-1] != '\\') {
  378. append(First, LastBeforeTrailingSpace);
  379. First = Last;
  380. skipNewline(First, End);
  381. return;
  382. }
  383. // Print up to the backslash, backing up over spaces. Preserve at least one
  384. // space, as the space matters when tokens are separated by a line
  385. // continuation.
  386. append(First, findFirstTrailingSpace(
  387. First, LastBeforeTrailingSpace - 1));
  388. First = Last;
  389. skipNewline(First, End);
  390. skipOverSpaces(First, End);
  391. }
  392. }
  393. static void skipWhitespace(const char *&First, const char *const End) {
  394. for (;;) {
  395. assert(First <= End);
  396. skipOverSpaces(First, End);
  397. if (End - First < 2)
  398. return;
  399. if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
  400. skipNewline(++First, End);
  401. continue;
  402. }
  403. // Check for a non-comment character.
  404. if (First[0] != '/')
  405. return;
  406. // "// ...".
  407. if (First[1] == '/') {
  408. skipLineComment(First, End);
  409. return;
  410. }
  411. // Cannot be a comment.
  412. if (First[1] != '*')
  413. return;
  414. // "/*...*/".
  415. skipBlockComment(First, End);
  416. }
  417. }
  418. void Minimizer::printAdjacentModuleNameParts(const char *&First,
  419. const char *const End) {
  420. // Skip over parts of the body.
  421. const char *Last = First;
  422. do
  423. ++Last;
  424. while (Last != End && (isAsciiIdentifierContinue(*Last) || *Last == '.'));
  425. append(First, Last);
  426. First = Last;
  427. }
  428. bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
  429. for (;;) {
  430. skipWhitespace(First, End);
  431. if (First == End)
  432. return true;
  433. if (isVerticalWhitespace(*First)) {
  434. skipNewline(First, End);
  435. continue;
  436. }
  437. // Found a semicolon.
  438. if (*First == ';') {
  439. put(*First++).put('\n');
  440. return false;
  441. }
  442. // Don't handle macro expansions inside @import for now.
  443. if (!isAsciiIdentifierContinue(*First) && *First != '.')
  444. return true;
  445. printAdjacentModuleNameParts(First, End);
  446. }
  447. }
  448. void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
  449. skipWhitespace(First, End); // Skip initial whitespace.
  450. printToNewline(First, End);
  451. while (Out.back() == ' ')
  452. Out.pop_back();
  453. put('\n');
  454. }
  455. LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
  456. const char *const End) {
  457. assert(isAsciiIdentifierContinue(*First) && "invalid identifer");
  458. const char *Last = First + 1;
  459. while (Last != End && isAsciiIdentifierContinue(*Last))
  460. ++Last;
  461. return Last;
  462. }
  463. LLVM_NODISCARD static const char *
  464. getIdentifierContinuation(const char *First, const char *const End) {
  465. if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
  466. return nullptr;
  467. ++First;
  468. skipNewline(First, End);
  469. if (First == End)
  470. return nullptr;
  471. return isAsciiIdentifierContinue(First[0]) ? First : nullptr;
  472. }
  473. Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
  474. const char *const End) {
  475. const char *Last = lexRawIdentifier(First, End);
  476. const char *Next = getIdentifierContinuation(Last, End);
  477. if (LLVM_LIKELY(!Next))
  478. return IdInfo{Last, StringRef(First, Last - First)};
  479. // Slow path, where identifiers are split over lines.
  480. SmallVector<char, 64> Id(First, Last);
  481. while (Next) {
  482. Last = lexRawIdentifier(Next, End);
  483. Id.append(Next, Last);
  484. Next = getIdentifierContinuation(Last, End);
  485. }
  486. return IdInfo{
  487. Last,
  488. SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
  489. }
  490. void Minimizer::printAdjacentMacroArgs(const char *&First,
  491. const char *const End) {
  492. // Skip over parts of the body.
  493. const char *Last = First;
  494. do
  495. ++Last;
  496. while (Last != End &&
  497. (isAsciiIdentifierContinue(*Last) || *Last == '.' || *Last == ','));
  498. append(First, Last);
  499. First = Last;
  500. }
  501. bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
  502. assert(*First == '(');
  503. put(*First++);
  504. for (;;) {
  505. skipWhitespace(First, End);
  506. if (First == End)
  507. return true;
  508. if (*First == ')') {
  509. put(*First++);
  510. return false;
  511. }
  512. // This is intentionally fairly liberal.
  513. if (!(isAsciiIdentifierContinue(*First) || *First == '.' || *First == ','))
  514. return true;
  515. printAdjacentMacroArgs(First, End);
  516. }
  517. }
  518. /// Looks for an identifier starting from Last.
  519. ///
  520. /// Updates "First" to just past the next identifier, if any. Returns true iff
  521. /// the identifier matches "Id".
  522. bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
  523. const char *const End) {
  524. skipWhitespace(First, End);
  525. if (First == End || !isAsciiIdentifierStart(*First))
  526. return false;
  527. IdInfo FoundId = lexIdentifier(First, End);
  528. First = FoundId.Last;
  529. return FoundId.Name == Id;
  530. }
  531. bool Minimizer::lexAt(const char *&First, const char *const End) {
  532. // Handle "@import".
  533. const char *ImportLoc = First++;
  534. if (!isNextIdentifier("import", First, End)) {
  535. skipLine(First, End);
  536. return false;
  537. }
  538. makeToken(decl_at_import);
  539. append("@import ");
  540. if (printAtImportBody(First, End))
  541. return reportError(
  542. ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
  543. skipWhitespace(First, End);
  544. if (First == End)
  545. return false;
  546. if (!isVerticalWhitespace(*First))
  547. return reportError(
  548. ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
  549. skipNewline(First, End);
  550. return false;
  551. }
  552. bool Minimizer::lexModule(const char *&First, const char *const End) {
  553. IdInfo Id = lexIdentifier(First, End);
  554. First = Id.Last;
  555. bool Export = false;
  556. if (Id.Name == "export") {
  557. Export = true;
  558. skipWhitespace(First, End);
  559. if (!isAsciiIdentifierContinue(*First)) {
  560. skipLine(First, End);
  561. return false;
  562. }
  563. Id = lexIdentifier(First, End);
  564. First = Id.Last;
  565. }
  566. if (Id.Name != "module" && Id.Name != "import") {
  567. skipLine(First, End);
  568. return false;
  569. }
  570. skipWhitespace(First, End);
  571. // Ignore this as a module directive if the next character can't be part of
  572. // an import.
  573. switch (*First) {
  574. case ':':
  575. case '<':
  576. case '"':
  577. break;
  578. default:
  579. if (!isAsciiIdentifierContinue(*First)) {
  580. skipLine(First, End);
  581. return false;
  582. }
  583. }
  584. if (Export) {
  585. makeToken(cxx_export_decl);
  586. append("export ");
  587. }
  588. if (Id.Name == "module")
  589. makeToken(cxx_module_decl);
  590. else
  591. makeToken(cxx_import_decl);
  592. append(Id.Name);
  593. append(" ");
  594. printToNewline(First, End);
  595. append("\n");
  596. return false;
  597. }
  598. bool Minimizer::lexDefine(const char *&First, const char *const End) {
  599. makeToken(pp_define);
  600. append("#define ");
  601. skipWhitespace(First, End);
  602. if (!isAsciiIdentifierStart(*First))
  603. return reportError(First, diag::err_pp_macro_not_identifier);
  604. IdInfo Id = lexIdentifier(First, End);
  605. const char *Last = Id.Last;
  606. append(Id.Name);
  607. if (Last == End)
  608. return false;
  609. if (*Last == '(') {
  610. size_t Size = Out.size();
  611. if (printMacroArgs(Last, End)) {
  612. // Be robust to bad macro arguments, since they can show up in disabled
  613. // code.
  614. Out.resize(Size);
  615. append("(/* invalid */\n");
  616. skipLine(Last, End);
  617. return false;
  618. }
  619. }
  620. skipWhitespace(Last, End);
  621. if (Last == End)
  622. return false;
  623. if (!isVerticalWhitespace(*Last))
  624. put(' ');
  625. printDirectiveBody(Last, End);
  626. First = Last;
  627. return false;
  628. }
  629. bool Minimizer::lexPragma(const char *&First, const char *const End) {
  630. // #pragma.
  631. skipWhitespace(First, End);
  632. if (First == End || !isAsciiIdentifierStart(*First))
  633. return false;
  634. IdInfo FoundId = lexIdentifier(First, End);
  635. First = FoundId.Last;
  636. if (FoundId.Name == "once") {
  637. // #pragma once
  638. skipLine(First, End);
  639. makeToken(pp_pragma_once);
  640. append("#pragma once\n");
  641. return false;
  642. }
  643. if (FoundId.Name == "push_macro") {
  644. // #pragma push_macro
  645. makeToken(pp_pragma_push_macro);
  646. append("#pragma push_macro");
  647. printDirectiveBody(First, End);
  648. return false;
  649. }
  650. if (FoundId.Name == "pop_macro") {
  651. // #pragma pop_macro
  652. makeToken(pp_pragma_pop_macro);
  653. append("#pragma pop_macro");
  654. printDirectiveBody(First, End);
  655. return false;
  656. }
  657. if (FoundId.Name == "include_alias") {
  658. // #pragma include_alias
  659. makeToken(pp_pragma_include_alias);
  660. append("#pragma include_alias");
  661. printDirectiveBody(First, End);
  662. return false;
  663. }
  664. if (FoundId.Name != "clang") {
  665. skipLine(First, End);
  666. return false;
  667. }
  668. // #pragma clang.
  669. if (!isNextIdentifier("module", First, End)) {
  670. skipLine(First, End);
  671. return false;
  672. }
  673. // #pragma clang module.
  674. if (!isNextIdentifier("import", First, End)) {
  675. skipLine(First, End);
  676. return false;
  677. }
  678. // #pragma clang module import.
  679. makeToken(pp_pragma_import);
  680. append("#pragma clang module import ");
  681. printDirectiveBody(First, End);
  682. return false;
  683. }
  684. bool Minimizer::lexEndif(const char *&First, const char *const End) {
  685. // Strip out "#else" if it's empty.
  686. if (top() == pp_else)
  687. popToken();
  688. // If "#ifdef" is empty, strip it and skip the "#endif".
  689. //
  690. // FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
  691. // we can skip empty `#if` and `#elif` blocks as well after scanning for a
  692. // literal __has_include in the condition. Even without that rule we could
  693. // drop the tokens if we scan for identifiers in the condition and find none.
  694. if (top() == pp_ifdef || top() == pp_ifndef) {
  695. popToken();
  696. skipLine(First, End);
  697. return false;
  698. }
  699. return lexDefault(pp_endif, "endif", First, End);
  700. }
  701. bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
  702. const char *&First, const char *const End) {
  703. makeToken(Kind);
  704. put('#').append(Directive).put(' ');
  705. printDirectiveBody(First, End);
  706. return false;
  707. }
  708. static bool isStartOfRelevantLine(char First) {
  709. switch (First) {
  710. case '#':
  711. case '@':
  712. case 'i':
  713. case 'e':
  714. case 'm':
  715. return true;
  716. }
  717. return false;
  718. }
  719. bool Minimizer::lexPPLine(const char *&First, const char *const End) {
  720. assert(First != End);
  721. skipWhitespace(First, End);
  722. assert(First <= End);
  723. if (First == End)
  724. return false;
  725. if (!isStartOfRelevantLine(*First)) {
  726. skipLine(First, End);
  727. assert(First <= End);
  728. return false;
  729. }
  730. // Handle "@import".
  731. if (*First == '@')
  732. return lexAt(First, End);
  733. if (*First == 'i' || *First == 'e' || *First == 'm')
  734. return lexModule(First, End);
  735. // Handle preprocessing directives.
  736. ++First; // Skip over '#'.
  737. skipWhitespace(First, End);
  738. if (First == End)
  739. return reportError(First, diag::err_pp_expected_eol);
  740. if (!isAsciiIdentifierStart(*First)) {
  741. skipLine(First, End);
  742. return false;
  743. }
  744. // Figure out the token.
  745. IdInfo Id = lexIdentifier(First, End);
  746. First = Id.Last;
  747. if (Id.Name == "pragma")
  748. return lexPragma(First, End);
  749. auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
  750. .Case("include", pp_include)
  751. .Case("__include_macros", pp___include_macros)
  752. .Case("define", pp_define)
  753. .Case("undef", pp_undef)
  754. .Case("import", pp_import)
  755. .Case("include_next", pp_include_next)
  756. .Case("if", pp_if)
  757. .Case("ifdef", pp_ifdef)
  758. .Case("ifndef", pp_ifndef)
  759. .Case("elif", pp_elif)
  760. .Case("elifdef", pp_elifdef)
  761. .Case("elifndef", pp_elifndef)
  762. .Case("else", pp_else)
  763. .Case("endif", pp_endif)
  764. .Default(pp_none);
  765. if (Kind == pp_none) {
  766. skipDirective(Id.Name, First, End);
  767. return false;
  768. }
  769. if (Kind == pp_endif)
  770. return lexEndif(First, End);
  771. if (Kind == pp_define)
  772. return lexDefine(First, End);
  773. // Everything else.
  774. return lexDefault(Kind, Id.Name, First, End);
  775. }
  776. static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
  777. if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
  778. First[2] == '\xbf')
  779. First += 3;
  780. }
  781. bool Minimizer::minimizeImpl(const char *First, const char *const End) {
  782. skipUTF8ByteOrderMark(First, End);
  783. while (First != End)
  784. if (lexPPLine(First, End))
  785. return true;
  786. return false;
  787. }
  788. bool Minimizer::minimize() {
  789. bool Error = minimizeImpl(Input.begin(), Input.end());
  790. if (!Error) {
  791. // Add a trailing newline and an EOF on success.
  792. if (!Out.empty() && Out.back() != '\n')
  793. Out.push_back('\n');
  794. makeToken(pp_eof);
  795. }
  796. // Null-terminate the output. This way the memory buffer that's passed to
  797. // Clang will not have to worry about the terminating '\0'.
  798. Out.push_back(0);
  799. Out.pop_back();
  800. return Error;
  801. }
  802. bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
  803. ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
  804. struct Directive {
  805. enum DirectiveKind {
  806. If, // if/ifdef/ifndef
  807. Else // elif/elifdef/elifndef, else
  808. };
  809. int Offset;
  810. DirectiveKind Kind;
  811. };
  812. llvm::SmallVector<Directive, 32> Offsets;
  813. for (const Token &T : Input) {
  814. switch (T.K) {
  815. case pp_if:
  816. case pp_ifdef:
  817. case pp_ifndef:
  818. Offsets.push_back({T.Offset, Directive::If});
  819. break;
  820. case pp_elif:
  821. case pp_elifdef:
  822. case pp_elifndef:
  823. case pp_else: {
  824. if (Offsets.empty())
  825. return true;
  826. int PreviousOffset = Offsets.back().Offset;
  827. Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
  828. Offsets.push_back({T.Offset, Directive::Else});
  829. break;
  830. }
  831. case pp_endif: {
  832. if (Offsets.empty())
  833. return true;
  834. int PreviousOffset = Offsets.back().Offset;
  835. Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
  836. do {
  837. Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
  838. if (Kind == Directive::If)
  839. break;
  840. } while (!Offsets.empty());
  841. break;
  842. }
  843. default:
  844. break;
  845. }
  846. }
  847. return false;
  848. }
  849. bool clang::minimizeSourceToDependencyDirectives(
  850. StringRef Input, SmallVectorImpl<char> &Output,
  851. SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
  852. SourceLocation InputSourceLoc) {
  853. Output.clear();
  854. Tokens.clear();
  855. return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
  856. }