MacroArgs.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. //===--- MacroArgs.cpp - Formal argument info for Macros ------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the MacroArgs interface.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "clang/Lex/MacroArgs.h"
  13. #include "clang/Lex/LexDiagnostic.h"
  14. #include "clang/Lex/MacroInfo.h"
  15. #include "clang/Lex/Preprocessor.h"
  16. #include "llvm/ADT/SmallString.h"
  17. #include "llvm/Support/SaveAndRestore.h"
  18. #include <algorithm>
  19. using namespace clang;
  20. /// MacroArgs ctor function - This destroys the vector passed in.
  21. MacroArgs *MacroArgs::create(const MacroInfo *MI,
  22. ArrayRef<Token> UnexpArgTokens,
  23. bool VarargsElided, Preprocessor &PP) {
  24. assert(MI->isFunctionLike() &&
  25. "Can't have args for an object-like macro!");
  26. MacroArgs **ResultEnt = nullptr;
  27. unsigned ClosestMatch = ~0U;
  28. // See if we have an entry with a big enough argument list to reuse on the
  29. // free list. If so, reuse it.
  30. for (MacroArgs **Entry = &PP.MacroArgCache; *Entry;
  31. Entry = &(*Entry)->ArgCache) {
  32. if ((*Entry)->NumUnexpArgTokens >= UnexpArgTokens.size() &&
  33. (*Entry)->NumUnexpArgTokens < ClosestMatch) {
  34. ResultEnt = Entry;
  35. // If we have an exact match, use it.
  36. if ((*Entry)->NumUnexpArgTokens == UnexpArgTokens.size())
  37. break;
  38. // Otherwise, use the best fit.
  39. ClosestMatch = (*Entry)->NumUnexpArgTokens;
  40. }
  41. }
  42. MacroArgs *Result;
  43. if (!ResultEnt) {
  44. // Allocate memory for a MacroArgs object with the lexer tokens at the end,
  45. // and construct the MacroArgs object.
  46. Result = new (
  47. llvm::safe_malloc(totalSizeToAlloc<Token>(UnexpArgTokens.size())))
  48. MacroArgs(UnexpArgTokens.size(), VarargsElided, MI->getNumParams());
  49. } else {
  50. Result = *ResultEnt;
  51. // Unlink this node from the preprocessors singly linked list.
  52. *ResultEnt = Result->ArgCache;
  53. Result->NumUnexpArgTokens = UnexpArgTokens.size();
  54. Result->VarargsElided = VarargsElided;
  55. Result->NumMacroArgs = MI->getNumParams();
  56. }
  57. // Copy the actual unexpanded tokens to immediately after the result ptr.
  58. if (!UnexpArgTokens.empty()) {
  59. static_assert(std::is_trivial<Token>::value,
  60. "assume trivial copyability if copying into the "
  61. "uninitialized array (as opposed to reusing a cached "
  62. "MacroArgs)");
  63. std::copy(UnexpArgTokens.begin(), UnexpArgTokens.end(),
  64. Result->getTrailingObjects<Token>());
  65. }
  66. return Result;
  67. }
  68. /// destroy - Destroy and deallocate the memory for this object.
  69. ///
  70. void MacroArgs::destroy(Preprocessor &PP) {
  71. // Don't clear PreExpArgTokens, just clear the entries. Clearing the entries
  72. // would deallocate the element vectors.
  73. for (unsigned i = 0, e = PreExpArgTokens.size(); i != e; ++i)
  74. PreExpArgTokens[i].clear();
  75. // Add this to the preprocessor's free list.
  76. ArgCache = PP.MacroArgCache;
  77. PP.MacroArgCache = this;
  78. }
  79. /// deallocate - This should only be called by the Preprocessor when managing
  80. /// its freelist.
  81. MacroArgs *MacroArgs::deallocate() {
  82. MacroArgs *Next = ArgCache;
  83. // Run the dtor to deallocate the vectors.
  84. this->~MacroArgs();
  85. // Release the memory for the object.
  86. static_assert(std::is_trivially_destructible<Token>::value,
  87. "assume trivially destructible and forego destructors");
  88. free(this);
  89. return Next;
  90. }
  91. /// getArgLength - Given a pointer to an expanded or unexpanded argument,
  92. /// return the number of tokens, not counting the EOF, that make up the
  93. /// argument.
  94. unsigned MacroArgs::getArgLength(const Token *ArgPtr) {
  95. unsigned NumArgTokens = 0;
  96. for (; ArgPtr->isNot(tok::eof); ++ArgPtr)
  97. ++NumArgTokens;
  98. return NumArgTokens;
  99. }
  100. /// getUnexpArgument - Return the unexpanded tokens for the specified formal.
  101. ///
  102. const Token *MacroArgs::getUnexpArgument(unsigned Arg) const {
  103. assert(Arg < getNumMacroArguments() && "Invalid arg #");
  104. // The unexpanded argument tokens start immediately after the MacroArgs object
  105. // in memory.
  106. const Token *Start = getTrailingObjects<Token>();
  107. const Token *Result = Start;
  108. // Scan to find Arg.
  109. for (; Arg; ++Result) {
  110. assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
  111. if (Result->is(tok::eof))
  112. --Arg;
  113. }
  114. assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
  115. return Result;
  116. }
  117. bool MacroArgs::invokedWithVariadicArgument(const MacroInfo *const MI,
  118. Preprocessor &PP) {
  119. if (!MI->isVariadic())
  120. return false;
  121. const int VariadicArgIndex = getNumMacroArguments() - 1;
  122. return getPreExpArgument(VariadicArgIndex, PP).front().isNot(tok::eof);
  123. }
  124. /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
  125. /// by pre-expansion, return false. Otherwise, conservatively return true.
  126. bool MacroArgs::ArgNeedsPreexpansion(const Token *ArgTok,
  127. Preprocessor &PP) const {
  128. // If there are no identifiers in the argument list, or if the identifiers are
  129. // known to not be macros, pre-expansion won't modify it.
  130. for (; ArgTok->isNot(tok::eof); ++ArgTok)
  131. if (IdentifierInfo *II = ArgTok->getIdentifierInfo())
  132. if (II->hasMacroDefinition())
  133. // Return true even though the macro could be a function-like macro
  134. // without a following '(' token, or could be disabled, or not visible.
  135. return true;
  136. return false;
  137. }
  138. /// getPreExpArgument - Return the pre-expanded form of the specified
  139. /// argument.
  140. const std::vector<Token> &MacroArgs::getPreExpArgument(unsigned Arg,
  141. Preprocessor &PP) {
  142. assert(Arg < getNumMacroArguments() && "Invalid argument number!");
  143. // If we have already computed this, return it.
  144. if (PreExpArgTokens.size() < getNumMacroArguments())
  145. PreExpArgTokens.resize(getNumMacroArguments());
  146. std::vector<Token> &Result = PreExpArgTokens[Arg];
  147. if (!Result.empty()) return Result;
  148. SaveAndRestore<bool> PreExpandingMacroArgs(PP.InMacroArgPreExpansion, true);
  149. const Token *AT = getUnexpArgument(Arg);
  150. unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
  151. // Otherwise, we have to pre-expand this argument, populating Result. To do
  152. // this, we set up a fake TokenLexer to lex from the unexpanded argument
  153. // list. With this installed, we lex expanded tokens until we hit the EOF
  154. // token at the end of the unexp list.
  155. PP.EnterTokenStream(AT, NumToks, false /*disable expand*/,
  156. false /*owns tokens*/, false /*is reinject*/);
  157. // Lex all of the macro-expanded tokens into Result.
  158. do {
  159. Result.push_back(Token());
  160. Token &Tok = Result.back();
  161. PP.Lex(Tok);
  162. } while (Result.back().isNot(tok::eof));
  163. // Pop the token stream off the top of the stack. We know that the internal
  164. // pointer inside of it is to the "end" of the token stream, but the stack
  165. // will not otherwise be popped until the next token is lexed. The problem is
  166. // that the token may be lexed sometime after the vector of tokens itself is
  167. // destroyed, which would be badness.
  168. if (PP.InCachingLexMode())
  169. PP.ExitCachingLexMode();
  170. PP.RemoveTopOfLexerStack();
  171. return Result;
  172. }
  173. /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
  174. /// tokens into the literal string token that should be produced by the C #
  175. /// preprocessor operator. If Charify is true, then it should be turned into
  176. /// a character literal for the Microsoft charize (#@) extension.
  177. ///
  178. Token MacroArgs::StringifyArgument(const Token *ArgToks,
  179. Preprocessor &PP, bool Charify,
  180. SourceLocation ExpansionLocStart,
  181. SourceLocation ExpansionLocEnd) {
  182. Token Tok;
  183. Tok.startToken();
  184. Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
  185. const Token *ArgTokStart = ArgToks;
  186. // Stringify all the tokens.
  187. SmallString<128> Result;
  188. Result += "\"";
  189. bool isFirst = true;
  190. for (; ArgToks->isNot(tok::eof); ++ArgToks) {
  191. const Token &Tok = *ArgToks;
  192. if (!isFirst && (Tok.hasLeadingSpace() || Tok.isAtStartOfLine()))
  193. Result += ' ';
  194. isFirst = false;
  195. // If this is a string or character constant, escape the token as specified
  196. // by 6.10.3.2p2.
  197. if (tok::isStringLiteral(Tok.getKind()) || // "foo", u8R"x(foo)x"_bar, etc.
  198. Tok.is(tok::char_constant) || // 'x'
  199. Tok.is(tok::wide_char_constant) || // L'x'.
  200. Tok.is(tok::utf8_char_constant) || // u8'x'.
  201. Tok.is(tok::utf16_char_constant) || // u'x'.
  202. Tok.is(tok::utf32_char_constant)) { // U'x'.
  203. bool Invalid = false;
  204. std::string TokStr = PP.getSpelling(Tok, &Invalid);
  205. if (!Invalid) {
  206. std::string Str = Lexer::Stringify(TokStr);
  207. Result.append(Str.begin(), Str.end());
  208. }
  209. } else if (Tok.is(tok::code_completion)) {
  210. PP.CodeCompleteNaturalLanguage();
  211. } else {
  212. // Otherwise, just append the token. Do some gymnastics to get the token
  213. // in place and avoid copies where possible.
  214. unsigned CurStrLen = Result.size();
  215. Result.resize(CurStrLen+Tok.getLength());
  216. const char *BufPtr = Result.data() + CurStrLen;
  217. bool Invalid = false;
  218. unsigned ActualTokLen = PP.getSpelling(Tok, BufPtr, &Invalid);
  219. if (!Invalid) {
  220. // If getSpelling returned a pointer to an already uniqued version of
  221. // the string instead of filling in BufPtr, memcpy it onto our string.
  222. if (ActualTokLen && BufPtr != &Result[CurStrLen])
  223. memcpy(&Result[CurStrLen], BufPtr, ActualTokLen);
  224. // If the token was dirty, the spelling may be shorter than the token.
  225. if (ActualTokLen != Tok.getLength())
  226. Result.resize(CurStrLen+ActualTokLen);
  227. }
  228. }
  229. }
  230. // If the last character of the string is a \, and if it isn't escaped, this
  231. // is an invalid string literal, diagnose it as specified in C99.
  232. if (Result.back() == '\\') {
  233. // Count the number of consecutive \ characters. If even, then they are
  234. // just escaped backslashes, otherwise it's an error.
  235. unsigned FirstNonSlash = Result.size()-2;
  236. // Guaranteed to find the starting " if nothing else.
  237. while (Result[FirstNonSlash] == '\\')
  238. --FirstNonSlash;
  239. if ((Result.size()-1-FirstNonSlash) & 1) {
  240. // Diagnose errors for things like: #define F(X) #X / F(\)
  241. PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
  242. Result.pop_back(); // remove one of the \'s.
  243. }
  244. }
  245. Result += '"';
  246. // If this is the charify operation and the result is not a legal character
  247. // constant, diagnose it.
  248. if (Charify) {
  249. // First step, turn double quotes into single quotes:
  250. Result[0] = '\'';
  251. Result[Result.size()-1] = '\'';
  252. // Check for bogus character.
  253. bool isBad = false;
  254. if (Result.size() == 3)
  255. isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
  256. else
  257. isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
  258. if (isBad) {
  259. PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
  260. Result = "' '"; // Use something arbitrary, but legal.
  261. }
  262. }
  263. PP.CreateString(Result, Tok,
  264. ExpansionLocStart, ExpansionLocEnd);
  265. return Tok;
  266. }