MILexer.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. //===- MILexer.cpp - Machine instructions lexer implementation ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the lexing of machine instructions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "MILexer.h"
  13. #include "llvm/ADT/StringExtras.h"
  14. #include "llvm/ADT/StringSwitch.h"
  15. #include "llvm/ADT/Twine.h"
  16. #include <cassert>
  17. #include <cctype>
  18. #include <string>
  19. using namespace llvm;
  20. namespace {
  21. using ErrorCallbackType =
  22. function_ref<void(StringRef::iterator Loc, const Twine &)>;
  23. /// This class provides a way to iterate and get characters from the source
  24. /// string.
  25. class Cursor {
  26. const char *Ptr = nullptr;
  27. const char *End = nullptr;
  28. public:
  29. Cursor(std::nullopt_t) {}
  30. explicit Cursor(StringRef Str) {
  31. Ptr = Str.data();
  32. End = Ptr + Str.size();
  33. }
  34. bool isEOF() const { return Ptr == End; }
  35. char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; }
  36. void advance(unsigned I = 1) { Ptr += I; }
  37. StringRef remaining() const { return StringRef(Ptr, End - Ptr); }
  38. StringRef upto(Cursor C) const {
  39. assert(C.Ptr >= Ptr && C.Ptr <= End);
  40. return StringRef(Ptr, C.Ptr - Ptr);
  41. }
  42. StringRef::iterator location() const { return Ptr; }
  43. operator bool() const { return Ptr != nullptr; }
  44. };
  45. } // end anonymous namespace
  46. MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
  47. this->Kind = Kind;
  48. this->Range = Range;
  49. return *this;
  50. }
  51. MIToken &MIToken::setStringValue(StringRef StrVal) {
  52. StringValue = StrVal;
  53. return *this;
  54. }
  55. MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
  56. StringValueStorage = std::move(StrVal);
  57. StringValue = StringValueStorage;
  58. return *this;
  59. }
  60. MIToken &MIToken::setIntegerValue(APSInt IntVal) {
  61. this->IntVal = std::move(IntVal);
  62. return *this;
  63. }
  64. /// Skip the leading whitespace characters and return the updated cursor.
  65. static Cursor skipWhitespace(Cursor C) {
  66. while (isblank(C.peek()))
  67. C.advance();
  68. return C;
  69. }
  70. static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
  71. /// Skip a line comment and return the updated cursor.
  72. static Cursor skipComment(Cursor C) {
  73. if (C.peek() != ';')
  74. return C;
  75. while (!isNewlineChar(C.peek()) && !C.isEOF())
  76. C.advance();
  77. return C;
  78. }
  79. /// Machine operands can have comments, enclosed between /* and */.
  80. /// This eats up all tokens, including /* and */.
  81. static Cursor skipMachineOperandComment(Cursor C) {
  82. if (C.peek() != '/' || C.peek(1) != '*')
  83. return C;
  84. while (C.peek() != '*' || C.peek(1) != '/')
  85. C.advance();
  86. C.advance();
  87. C.advance();
  88. return C;
  89. }
  90. /// Return true if the given character satisfies the following regular
  91. /// expression: [-a-zA-Z$._0-9]
  92. static bool isIdentifierChar(char C) {
  93. return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
  94. C == '$';
  95. }
  96. /// Unescapes the given string value.
  97. ///
  98. /// Expects the string value to be quoted.
  99. static std::string unescapeQuotedString(StringRef Value) {
  100. assert(Value.front() == '"' && Value.back() == '"');
  101. Cursor C = Cursor(Value.substr(1, Value.size() - 2));
  102. std::string Str;
  103. Str.reserve(C.remaining().size());
  104. while (!C.isEOF()) {
  105. char Char = C.peek();
  106. if (Char == '\\') {
  107. if (C.peek(1) == '\\') {
  108. // Two '\' become one
  109. Str += '\\';
  110. C.advance(2);
  111. continue;
  112. }
  113. if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
  114. Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
  115. C.advance(3);
  116. continue;
  117. }
  118. }
  119. Str += Char;
  120. C.advance();
  121. }
  122. return Str;
  123. }
  124. /// Lex a string constant using the following regular expression: \"[^\"]*\"
  125. static Cursor lexStringConstant(Cursor C, ErrorCallbackType ErrorCallback) {
  126. assert(C.peek() == '"');
  127. for (C.advance(); C.peek() != '"'; C.advance()) {
  128. if (C.isEOF() || isNewlineChar(C.peek())) {
  129. ErrorCallback(
  130. C.location(),
  131. "end of machine instruction reached before the closing '\"'");
  132. return std::nullopt;
  133. }
  134. }
  135. C.advance();
  136. return C;
  137. }
  138. static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
  139. unsigned PrefixLength, ErrorCallbackType ErrorCallback) {
  140. auto Range = C;
  141. C.advance(PrefixLength);
  142. if (C.peek() == '"') {
  143. if (Cursor R = lexStringConstant(C, ErrorCallback)) {
  144. StringRef String = Range.upto(R);
  145. Token.reset(Type, String)
  146. .setOwnedStringValue(
  147. unescapeQuotedString(String.drop_front(PrefixLength)));
  148. return R;
  149. }
  150. Token.reset(MIToken::Error, Range.remaining());
  151. return Range;
  152. }
  153. while (isIdentifierChar(C.peek()))
  154. C.advance();
  155. Token.reset(Type, Range.upto(C))
  156. .setStringValue(Range.upto(C).drop_front(PrefixLength));
  157. return C;
  158. }
  159. static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
  160. return StringSwitch<MIToken::TokenKind>(Identifier)
  161. .Case("_", MIToken::underscore)
  162. .Case("implicit", MIToken::kw_implicit)
  163. .Case("implicit-def", MIToken::kw_implicit_define)
  164. .Case("def", MIToken::kw_def)
  165. .Case("dead", MIToken::kw_dead)
  166. .Case("killed", MIToken::kw_killed)
  167. .Case("undef", MIToken::kw_undef)
  168. .Case("internal", MIToken::kw_internal)
  169. .Case("early-clobber", MIToken::kw_early_clobber)
  170. .Case("debug-use", MIToken::kw_debug_use)
  171. .Case("renamable", MIToken::kw_renamable)
  172. .Case("tied-def", MIToken::kw_tied_def)
  173. .Case("frame-setup", MIToken::kw_frame_setup)
  174. .Case("frame-destroy", MIToken::kw_frame_destroy)
  175. .Case("nnan", MIToken::kw_nnan)
  176. .Case("ninf", MIToken::kw_ninf)
  177. .Case("nsz", MIToken::kw_nsz)
  178. .Case("arcp", MIToken::kw_arcp)
  179. .Case("contract", MIToken::kw_contract)
  180. .Case("afn", MIToken::kw_afn)
  181. .Case("reassoc", MIToken::kw_reassoc)
  182. .Case("nuw", MIToken::kw_nuw)
  183. .Case("nsw", MIToken::kw_nsw)
  184. .Case("exact", MIToken::kw_exact)
  185. .Case("nofpexcept", MIToken::kw_nofpexcept)
  186. .Case("debug-location", MIToken::kw_debug_location)
  187. .Case("debug-instr-number", MIToken::kw_debug_instr_number)
  188. .Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
  189. .Case("same_value", MIToken::kw_cfi_same_value)
  190. .Case("offset", MIToken::kw_cfi_offset)
  191. .Case("rel_offset", MIToken::kw_cfi_rel_offset)
  192. .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
  193. .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
  194. .Case("adjust_cfa_offset", MIToken::kw_cfi_adjust_cfa_offset)
  195. .Case("escape", MIToken::kw_cfi_escape)
  196. .Case("def_cfa", MIToken::kw_cfi_def_cfa)
  197. .Case("llvm_def_aspace_cfa", MIToken::kw_cfi_llvm_def_aspace_cfa)
  198. .Case("remember_state", MIToken::kw_cfi_remember_state)
  199. .Case("restore", MIToken::kw_cfi_restore)
  200. .Case("restore_state", MIToken::kw_cfi_restore_state)
  201. .Case("undefined", MIToken::kw_cfi_undefined)
  202. .Case("register", MIToken::kw_cfi_register)
  203. .Case("window_save", MIToken::kw_cfi_window_save)
  204. .Case("negate_ra_sign_state",
  205. MIToken::kw_cfi_aarch64_negate_ra_sign_state)
  206. .Case("blockaddress", MIToken::kw_blockaddress)
  207. .Case("intrinsic", MIToken::kw_intrinsic)
  208. .Case("target-index", MIToken::kw_target_index)
  209. .Case("half", MIToken::kw_half)
  210. .Case("float", MIToken::kw_float)
  211. .Case("double", MIToken::kw_double)
  212. .Case("x86_fp80", MIToken::kw_x86_fp80)
  213. .Case("fp128", MIToken::kw_fp128)
  214. .Case("ppc_fp128", MIToken::kw_ppc_fp128)
  215. .Case("target-flags", MIToken::kw_target_flags)
  216. .Case("volatile", MIToken::kw_volatile)
  217. .Case("non-temporal", MIToken::kw_non_temporal)
  218. .Case("dereferenceable", MIToken::kw_dereferenceable)
  219. .Case("invariant", MIToken::kw_invariant)
  220. .Case("align", MIToken::kw_align)
  221. .Case("basealign", MIToken::kw_basealign)
  222. .Case("addrspace", MIToken::kw_addrspace)
  223. .Case("stack", MIToken::kw_stack)
  224. .Case("got", MIToken::kw_got)
  225. .Case("jump-table", MIToken::kw_jump_table)
  226. .Case("constant-pool", MIToken::kw_constant_pool)
  227. .Case("call-entry", MIToken::kw_call_entry)
  228. .Case("custom", MIToken::kw_custom)
  229. .Case("liveout", MIToken::kw_liveout)
  230. .Case("landing-pad", MIToken::kw_landing_pad)
  231. .Case("inlineasm-br-indirect-target",
  232. MIToken::kw_inlineasm_br_indirect_target)
  233. .Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
  234. .Case("liveins", MIToken::kw_liveins)
  235. .Case("successors", MIToken::kw_successors)
  236. .Case("floatpred", MIToken::kw_floatpred)
  237. .Case("intpred", MIToken::kw_intpred)
  238. .Case("shufflemask", MIToken::kw_shufflemask)
  239. .Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
  240. .Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
  241. .Case("heap-alloc-marker", MIToken::kw_heap_alloc_marker)
  242. .Case("pcsections", MIToken::kw_pcsections)
  243. .Case("cfi-type", MIToken::kw_cfi_type)
  244. .Case("bbsections", MIToken::kw_bbsections)
  245. .Case("bb_id", MIToken::kw_bb_id)
  246. .Case("unknown-size", MIToken::kw_unknown_size)
  247. .Case("unknown-address", MIToken::kw_unknown_address)
  248. .Case("distinct", MIToken::kw_distinct)
  249. .Case("ir-block-address-taken", MIToken::kw_ir_block_address_taken)
  250. .Case("machine-block-address-taken",
  251. MIToken::kw_machine_block_address_taken)
  252. .Default(MIToken::Identifier);
  253. }
  254. static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
  255. if (!isalpha(C.peek()) && C.peek() != '_')
  256. return std::nullopt;
  257. auto Range = C;
  258. while (isIdentifierChar(C.peek()))
  259. C.advance();
  260. auto Identifier = Range.upto(C);
  261. Token.reset(getIdentifierKind(Identifier), Identifier)
  262. .setStringValue(Identifier);
  263. return C;
  264. }
  265. static Cursor maybeLexMachineBasicBlock(Cursor C, MIToken &Token,
  266. ErrorCallbackType ErrorCallback) {
  267. bool IsReference = C.remaining().startswith("%bb.");
  268. if (!IsReference && !C.remaining().startswith("bb."))
  269. return std::nullopt;
  270. auto Range = C;
  271. unsigned PrefixLength = IsReference ? 4 : 3;
  272. C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
  273. if (!isdigit(C.peek())) {
  274. Token.reset(MIToken::Error, C.remaining());
  275. ErrorCallback(C.location(), "expected a number after '%bb.'");
  276. return C;
  277. }
  278. auto NumberRange = C;
  279. while (isdigit(C.peek()))
  280. C.advance();
  281. StringRef Number = NumberRange.upto(C);
  282. unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
  283. // TODO: The format bb.<id>.<irname> is supported only when it's not a
  284. // reference. Once we deprecate the format where the irname shows up, we
  285. // should only lex forward if it is a reference.
  286. if (C.peek() == '.') {
  287. C.advance(); // Skip '.'
  288. ++StringOffset;
  289. while (isIdentifierChar(C.peek()))
  290. C.advance();
  291. }
  292. Token.reset(IsReference ? MIToken::MachineBasicBlock
  293. : MIToken::MachineBasicBlockLabel,
  294. Range.upto(C))
  295. .setIntegerValue(APSInt(Number))
  296. .setStringValue(Range.upto(C).drop_front(StringOffset));
  297. return C;
  298. }
  299. static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
  300. MIToken::TokenKind Kind) {
  301. if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
  302. return std::nullopt;
  303. auto Range = C;
  304. C.advance(Rule.size());
  305. auto NumberRange = C;
  306. while (isdigit(C.peek()))
  307. C.advance();
  308. Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
  309. return C;
  310. }
  311. static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
  312. MIToken::TokenKind Kind) {
  313. if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
  314. return std::nullopt;
  315. auto Range = C;
  316. C.advance(Rule.size());
  317. auto NumberRange = C;
  318. while (isdigit(C.peek()))
  319. C.advance();
  320. StringRef Number = NumberRange.upto(C);
  321. unsigned StringOffset = Rule.size() + Number.size();
  322. if (C.peek() == '.') {
  323. C.advance();
  324. ++StringOffset;
  325. while (isIdentifierChar(C.peek()))
  326. C.advance();
  327. }
  328. Token.reset(Kind, Range.upto(C))
  329. .setIntegerValue(APSInt(Number))
  330. .setStringValue(Range.upto(C).drop_front(StringOffset));
  331. return C;
  332. }
  333. static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
  334. return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
  335. }
  336. static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
  337. return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
  338. }
  339. static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
  340. return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
  341. }
  342. static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
  343. return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
  344. }
  345. static Cursor maybeLexSubRegisterIndex(Cursor C, MIToken &Token,
  346. ErrorCallbackType ErrorCallback) {
  347. const StringRef Rule = "%subreg.";
  348. if (!C.remaining().startswith(Rule))
  349. return std::nullopt;
  350. return lexName(C, Token, MIToken::SubRegisterIndex, Rule.size(),
  351. ErrorCallback);
  352. }
  353. static Cursor maybeLexIRBlock(Cursor C, MIToken &Token,
  354. ErrorCallbackType ErrorCallback) {
  355. const StringRef Rule = "%ir-block.";
  356. if (!C.remaining().startswith(Rule))
  357. return std::nullopt;
  358. if (isdigit(C.peek(Rule.size())))
  359. return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
  360. return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
  361. }
  362. static Cursor maybeLexIRValue(Cursor C, MIToken &Token,
  363. ErrorCallbackType ErrorCallback) {
  364. const StringRef Rule = "%ir.";
  365. if (!C.remaining().startswith(Rule))
  366. return std::nullopt;
  367. if (isdigit(C.peek(Rule.size())))
  368. return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
  369. return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
  370. }
  371. static Cursor maybeLexStringConstant(Cursor C, MIToken &Token,
  372. ErrorCallbackType ErrorCallback) {
  373. if (C.peek() != '"')
  374. return std::nullopt;
  375. return lexName(C, Token, MIToken::StringConstant, /*PrefixLength=*/0,
  376. ErrorCallback);
  377. }
  378. static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
  379. auto Range = C;
  380. C.advance(); // Skip '%'
  381. auto NumberRange = C;
  382. while (isdigit(C.peek()))
  383. C.advance();
  384. Token.reset(MIToken::VirtualRegister, Range.upto(C))
  385. .setIntegerValue(APSInt(NumberRange.upto(C)));
  386. return C;
  387. }
  388. /// Returns true for a character allowed in a register name.
  389. static bool isRegisterChar(char C) {
  390. return isIdentifierChar(C) && C != '.';
  391. }
  392. static Cursor lexNamedVirtualRegister(Cursor C, MIToken &Token) {
  393. Cursor Range = C;
  394. C.advance(); // Skip '%'
  395. while (isRegisterChar(C.peek()))
  396. C.advance();
  397. Token.reset(MIToken::NamedVirtualRegister, Range.upto(C))
  398. .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
  399. return C;
  400. }
  401. static Cursor maybeLexRegister(Cursor C, MIToken &Token,
  402. ErrorCallbackType ErrorCallback) {
  403. if (C.peek() != '%' && C.peek() != '$')
  404. return std::nullopt;
  405. if (C.peek() == '%') {
  406. if (isdigit(C.peek(1)))
  407. return lexVirtualRegister(C, Token);
  408. if (isRegisterChar(C.peek(1)))
  409. return lexNamedVirtualRegister(C, Token);
  410. return std::nullopt;
  411. }
  412. assert(C.peek() == '$');
  413. auto Range = C;
  414. C.advance(); // Skip '$'
  415. while (isRegisterChar(C.peek()))
  416. C.advance();
  417. Token.reset(MIToken::NamedRegister, Range.upto(C))
  418. .setStringValue(Range.upto(C).drop_front(1)); // Drop the '$'
  419. return C;
  420. }
  421. static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token,
  422. ErrorCallbackType ErrorCallback) {
  423. if (C.peek() != '@')
  424. return std::nullopt;
  425. if (!isdigit(C.peek(1)))
  426. return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
  427. ErrorCallback);
  428. auto Range = C;
  429. C.advance(1); // Skip the '@'
  430. auto NumberRange = C;
  431. while (isdigit(C.peek()))
  432. C.advance();
  433. Token.reset(MIToken::GlobalValue, Range.upto(C))
  434. .setIntegerValue(APSInt(NumberRange.upto(C)));
  435. return C;
  436. }
  437. static Cursor maybeLexExternalSymbol(Cursor C, MIToken &Token,
  438. ErrorCallbackType ErrorCallback) {
  439. if (C.peek() != '&')
  440. return std::nullopt;
  441. return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
  442. ErrorCallback);
  443. }
  444. static Cursor maybeLexMCSymbol(Cursor C, MIToken &Token,
  445. ErrorCallbackType ErrorCallback) {
  446. const StringRef Rule = "<mcsymbol ";
  447. if (!C.remaining().startswith(Rule))
  448. return std::nullopt;
  449. auto Start = C;
  450. C.advance(Rule.size());
  451. // Try a simple unquoted name.
  452. if (C.peek() != '"') {
  453. while (isIdentifierChar(C.peek()))
  454. C.advance();
  455. StringRef String = Start.upto(C).drop_front(Rule.size());
  456. if (C.peek() != '>') {
  457. ErrorCallback(C.location(),
  458. "expected the '<mcsymbol ...' to be closed by a '>'");
  459. Token.reset(MIToken::Error, Start.remaining());
  460. return Start;
  461. }
  462. C.advance();
  463. Token.reset(MIToken::MCSymbol, Start.upto(C)).setStringValue(String);
  464. return C;
  465. }
  466. // Otherwise lex out a quoted name.
  467. Cursor R = lexStringConstant(C, ErrorCallback);
  468. if (!R) {
  469. ErrorCallback(C.location(),
  470. "unable to parse quoted string from opening quote");
  471. Token.reset(MIToken::Error, Start.remaining());
  472. return Start;
  473. }
  474. StringRef String = Start.upto(R).drop_front(Rule.size());
  475. if (R.peek() != '>') {
  476. ErrorCallback(R.location(),
  477. "expected the '<mcsymbol ...' to be closed by a '>'");
  478. Token.reset(MIToken::Error, Start.remaining());
  479. return Start;
  480. }
  481. R.advance();
  482. Token.reset(MIToken::MCSymbol, Start.upto(R))
  483. .setOwnedStringValue(unescapeQuotedString(String));
  484. return R;
  485. }
  486. static bool isValidHexFloatingPointPrefix(char C) {
  487. return C == 'H' || C == 'K' || C == 'L' || C == 'M' || C == 'R';
  488. }
  489. static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
  490. C.advance();
  491. // Skip over [0-9]*([eE][-+]?[0-9]+)?
  492. while (isdigit(C.peek()))
  493. C.advance();
  494. if ((C.peek() == 'e' || C.peek() == 'E') &&
  495. (isdigit(C.peek(1)) ||
  496. ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
  497. C.advance(2);
  498. while (isdigit(C.peek()))
  499. C.advance();
  500. }
  501. Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
  502. return C;
  503. }
  504. static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
  505. if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
  506. return std::nullopt;
  507. Cursor Range = C;
  508. C.advance(2);
  509. unsigned PrefLen = 2;
  510. if (isValidHexFloatingPointPrefix(C.peek())) {
  511. C.advance();
  512. PrefLen++;
  513. }
  514. while (isxdigit(C.peek()))
  515. C.advance();
  516. StringRef StrVal = Range.upto(C);
  517. if (StrVal.size() <= PrefLen)
  518. return std::nullopt;
  519. if (PrefLen == 2)
  520. Token.reset(MIToken::HexLiteral, Range.upto(C));
  521. else // It must be 3, which means that there was a floating-point prefix.
  522. Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
  523. return C;
  524. }
  525. static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
  526. if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
  527. return std::nullopt;
  528. auto Range = C;
  529. C.advance();
  530. while (isdigit(C.peek()))
  531. C.advance();
  532. if (C.peek() == '.')
  533. return lexFloatingPointLiteral(Range, C, Token);
  534. StringRef StrVal = Range.upto(C);
  535. Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
  536. return C;
  537. }
  538. static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
  539. return StringSwitch<MIToken::TokenKind>(Identifier)
  540. .Case("!tbaa", MIToken::md_tbaa)
  541. .Case("!alias.scope", MIToken::md_alias_scope)
  542. .Case("!noalias", MIToken::md_noalias)
  543. .Case("!range", MIToken::md_range)
  544. .Case("!DIExpression", MIToken::md_diexpr)
  545. .Case("!DILocation", MIToken::md_dilocation)
  546. .Default(MIToken::Error);
  547. }
  548. static Cursor maybeLexExclaim(Cursor C, MIToken &Token,
  549. ErrorCallbackType ErrorCallback) {
  550. if (C.peek() != '!')
  551. return std::nullopt;
  552. auto Range = C;
  553. C.advance(1);
  554. if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
  555. Token.reset(MIToken::exclaim, Range.upto(C));
  556. return C;
  557. }
  558. while (isIdentifierChar(C.peek()))
  559. C.advance();
  560. StringRef StrVal = Range.upto(C);
  561. Token.reset(getMetadataKeywordKind(StrVal), StrVal);
  562. if (Token.isError())
  563. ErrorCallback(Token.location(),
  564. "use of unknown metadata keyword '" + StrVal + "'");
  565. return C;
  566. }
  567. static MIToken::TokenKind symbolToken(char C) {
  568. switch (C) {
  569. case ',':
  570. return MIToken::comma;
  571. case '.':
  572. return MIToken::dot;
  573. case '=':
  574. return MIToken::equal;
  575. case ':':
  576. return MIToken::colon;
  577. case '(':
  578. return MIToken::lparen;
  579. case ')':
  580. return MIToken::rparen;
  581. case '{':
  582. return MIToken::lbrace;
  583. case '}':
  584. return MIToken::rbrace;
  585. case '+':
  586. return MIToken::plus;
  587. case '-':
  588. return MIToken::minus;
  589. case '<':
  590. return MIToken::less;
  591. case '>':
  592. return MIToken::greater;
  593. default:
  594. return MIToken::Error;
  595. }
  596. }
  597. static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
  598. MIToken::TokenKind Kind;
  599. unsigned Length = 1;
  600. if (C.peek() == ':' && C.peek(1) == ':') {
  601. Kind = MIToken::coloncolon;
  602. Length = 2;
  603. } else
  604. Kind = symbolToken(C.peek());
  605. if (Kind == MIToken::Error)
  606. return std::nullopt;
  607. auto Range = C;
  608. C.advance(Length);
  609. Token.reset(Kind, Range.upto(C));
  610. return C;
  611. }
  612. static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
  613. if (!isNewlineChar(C.peek()))
  614. return std::nullopt;
  615. auto Range = C;
  616. C.advance();
  617. Token.reset(MIToken::Newline, Range.upto(C));
  618. return C;
  619. }
  620. static Cursor maybeLexEscapedIRValue(Cursor C, MIToken &Token,
  621. ErrorCallbackType ErrorCallback) {
  622. if (C.peek() != '`')
  623. return std::nullopt;
  624. auto Range = C;
  625. C.advance();
  626. auto StrRange = C;
  627. while (C.peek() != '`') {
  628. if (C.isEOF() || isNewlineChar(C.peek())) {
  629. ErrorCallback(
  630. C.location(),
  631. "end of machine instruction reached before the closing '`'");
  632. Token.reset(MIToken::Error, Range.remaining());
  633. return C;
  634. }
  635. C.advance();
  636. }
  637. StringRef Value = StrRange.upto(C);
  638. C.advance();
  639. Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
  640. return C;
  641. }
  642. StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
  643. ErrorCallbackType ErrorCallback) {
  644. auto C = skipComment(skipWhitespace(Cursor(Source)));
  645. if (C.isEOF()) {
  646. Token.reset(MIToken::Eof, C.remaining());
  647. return C.remaining();
  648. }
  649. C = skipMachineOperandComment(C);
  650. if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
  651. return R.remaining();
  652. if (Cursor R = maybeLexIdentifier(C, Token))
  653. return R.remaining();
  654. if (Cursor R = maybeLexJumpTableIndex(C, Token))
  655. return R.remaining();
  656. if (Cursor R = maybeLexStackObject(C, Token))
  657. return R.remaining();
  658. if (Cursor R = maybeLexFixedStackObject(C, Token))
  659. return R.remaining();
  660. if (Cursor R = maybeLexConstantPoolItem(C, Token))
  661. return R.remaining();
  662. if (Cursor R = maybeLexSubRegisterIndex(C, Token, ErrorCallback))
  663. return R.remaining();
  664. if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
  665. return R.remaining();
  666. if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
  667. return R.remaining();
  668. if (Cursor R = maybeLexRegister(C, Token, ErrorCallback))
  669. return R.remaining();
  670. if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
  671. return R.remaining();
  672. if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
  673. return R.remaining();
  674. if (Cursor R = maybeLexMCSymbol(C, Token, ErrorCallback))
  675. return R.remaining();
  676. if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
  677. return R.remaining();
  678. if (Cursor R = maybeLexNumericalLiteral(C, Token))
  679. return R.remaining();
  680. if (Cursor R = maybeLexExclaim(C, Token, ErrorCallback))
  681. return R.remaining();
  682. if (Cursor R = maybeLexSymbol(C, Token))
  683. return R.remaining();
  684. if (Cursor R = maybeLexNewline(C, Token))
  685. return R.remaining();
  686. if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
  687. return R.remaining();
  688. if (Cursor R = maybeLexStringConstant(C, Token, ErrorCallback))
  689. return R.remaining();
  690. Token.reset(MIToken::Error, C.remaining());
  691. ErrorCallback(C.location(),
  692. Twine("unexpected character '") + Twine(C.peek()) + "'");
  693. return C.remaining();
  694. }