MCTargetAsmParser.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
  14. #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
  15. #include "llvm/ADT/StringRef.h"
  16. #include "llvm/MC/MCExpr.h"
  17. #include "llvm/MC/MCParser/MCAsmParserExtension.h"
  18. #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  19. #include "llvm/MC/MCTargetOptions.h"
  20. #include "llvm/MC/SubtargetFeature.h"
  21. #include "llvm/Support/SMLoc.h"
  22. #include <cstdint>
  23. #include <memory>
  24. namespace llvm {
  25. class MCContext;
  26. class MCInst;
  27. class MCInstrInfo;
  28. class MCRegister;
  29. class MCStreamer;
  30. class MCSubtargetInfo;
  31. class MCSymbol;
  32. template <typename T> class SmallVectorImpl;
  33. using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
  34. enum AsmRewriteKind {
  35. AOK_Align, // Rewrite align as .align.
  36. AOK_EVEN, // Rewrite even as .even.
  37. AOK_Emit, // Rewrite _emit as .byte.
  38. AOK_CallInput, // Rewrite in terms of ${N:P}.
  39. AOK_Input, // Rewrite in terms of $N.
  40. AOK_Output, // Rewrite in terms of $N.
  41. AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr).
  42. AOK_Label, // Rewrite local labels.
  43. AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
  44. AOK_Skip, // Skip emission (e.g., offset/type operators).
  45. AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
  46. };
  47. const char AsmRewritePrecedence [] = {
  48. 2, // AOK_Align
  49. 2, // AOK_EVEN
  50. 2, // AOK_Emit
  51. 3, // AOK_Input
  52. 3, // AOK_CallInput
  53. 3, // AOK_Output
  54. 5, // AOK_SizeDirective
  55. 1, // AOK_Label
  56. 5, // AOK_EndOfStatement
  57. 2, // AOK_Skip
  58. 2 // AOK_IntelExpr
  59. };
  60. // Represnt the various parts which makes up an intel expression,
  61. // used for emitting compound intel expressions
  62. struct IntelExpr {
  63. bool NeedBracs;
  64. int64_t Imm;
  65. StringRef BaseReg;
  66. StringRef IndexReg;
  67. StringRef OffsetName;
  68. unsigned Scale;
  69. IntelExpr()
  70. : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
  71. OffsetName(StringRef()), Scale(1) {}
  72. // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
  73. IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
  74. StringRef offsetName, int64_t imm, bool needBracs)
  75. : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
  76. OffsetName(offsetName), Scale(1) {
  77. if (scale)
  78. Scale = scale;
  79. }
  80. bool hasBaseReg() const { return !BaseReg.empty(); }
  81. bool hasIndexReg() const { return !IndexReg.empty(); }
  82. bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
  83. bool hasOffset() const { return !OffsetName.empty(); }
  84. // Normally we won't emit immediates unconditionally,
  85. // unless we've got no other components
  86. bool emitImm() const { return !(hasRegs() || hasOffset()); }
  87. bool isValid() const {
  88. return (Scale == 1) ||
  89. (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
  90. }
  91. };
  92. struct AsmRewrite {
  93. AsmRewriteKind Kind;
  94. SMLoc Loc;
  95. unsigned Len;
  96. bool Done;
  97. int64_t Val;
  98. StringRef Label;
  99. IntelExpr IntelExp;
  100. bool IntelExpRestricted;
  101. public:
  102. AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
  103. bool Restricted = false)
  104. : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
  105. IntelExpRestricted = Restricted;
  106. }
  107. AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
  108. : AsmRewrite(kind, loc, len) { Label = label; }
  109. AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
  110. : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
  111. };
  112. struct ParseInstructionInfo {
  113. SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
  114. ParseInstructionInfo() = default;
  115. ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
  116. : AsmRewrites(rewrites) {}
  117. };
  118. enum OperandMatchResultTy {
  119. MatchOperand_Success, // operand matched successfully
  120. MatchOperand_NoMatch, // operand did not match
  121. MatchOperand_ParseFail // operand matched but had errors
  122. };
  123. enum class DiagnosticPredicateTy {
  124. Match,
  125. NearMatch,
  126. NoMatch,
  127. };
  128. // When an operand is parsed, the assembler will try to iterate through a set of
  129. // possible operand classes that the operand might match and call the
  130. // corresponding PredicateMethod to determine that.
  131. //
  132. // If there are two AsmOperands that would give a specific diagnostic if there
  133. // is no match, there is currently no mechanism to distinguish which operand is
  134. // a closer match. The DiagnosticPredicate distinguishes between 'completely
  135. // no match' and 'near match', so the assembler can decide whether to give a
  136. // specific diagnostic, or use 'InvalidOperand' and continue to find a
  137. // 'better matching' diagnostic.
  138. //
  139. // For example:
  140. // opcode opnd0, onpd1, opnd2
  141. //
  142. // where:
  143. // opnd2 could be an 'immediate of range [-8, 7]'
  144. // opnd2 could be a 'register + shift/extend'.
  145. //
  146. // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
  147. // little sense to give a diagnostic that the operand should be an immediate
  148. // in range [-8, 7].
  149. //
  150. // This is a light-weight alternative to the 'NearMissInfo' approach
  151. // below which collects *all* possible diagnostics. This alternative
  152. // is optional and fully backward compatible with existing
  153. // PredicateMethods that return a 'bool' (match or no match).
  154. struct DiagnosticPredicate {
  155. DiagnosticPredicateTy Type;
  156. explicit DiagnosticPredicate(bool Match)
  157. : Type(Match ? DiagnosticPredicateTy::Match
  158. : DiagnosticPredicateTy::NearMatch) {}
  159. DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
  160. DiagnosticPredicate(const DiagnosticPredicate &) = default;
  161. DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
  162. operator bool() const { return Type == DiagnosticPredicateTy::Match; }
  163. bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
  164. bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
  165. bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
  166. };
  167. // When matching of an assembly instruction fails, there may be multiple
  168. // encodings that are close to being a match. It's often ambiguous which one
  169. // the programmer intended to use, so we want to report an error which mentions
  170. // each of these "near-miss" encodings. This struct contains information about
  171. // one such encoding, and why it did not match the parsed instruction.
  172. class NearMissInfo {
  173. public:
  174. enum NearMissKind {
  175. NoNearMiss,
  176. NearMissOperand,
  177. NearMissFeature,
  178. NearMissPredicate,
  179. NearMissTooFewOperands,
  180. };
  181. // The encoding is valid for the parsed assembly string. This is only used
  182. // internally to the table-generated assembly matcher.
  183. static NearMissInfo getSuccess() { return NearMissInfo(); }
  184. // The instruction encoding is not valid because it requires some target
  185. // features that are not currently enabled. MissingFeatures has a bit set for
  186. // each feature that the encoding needs but which is not enabled.
  187. static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
  188. NearMissInfo Result;
  189. Result.Kind = NearMissFeature;
  190. Result.Features = MissingFeatures;
  191. return Result;
  192. }
  193. // The instruction encoding is not valid because the target-specific
  194. // predicate function returned an error code. FailureCode is the
  195. // target-specific error code returned by the predicate.
  196. static NearMissInfo getMissedPredicate(unsigned FailureCode) {
  197. NearMissInfo Result;
  198. Result.Kind = NearMissPredicate;
  199. Result.PredicateError = FailureCode;
  200. return Result;
  201. }
  202. // The instruction encoding is not valid because one (and only one) parsed
  203. // operand is not of the correct type. OperandError is the error code
  204. // relating to the operand class expected by the encoding. OperandClass is
  205. // the type of the expected operand. Opcode is the opcode of the encoding.
  206. // OperandIndex is the index into the parsed operand list.
  207. static NearMissInfo getMissedOperand(unsigned OperandError,
  208. unsigned OperandClass, unsigned Opcode,
  209. unsigned OperandIndex) {
  210. NearMissInfo Result;
  211. Result.Kind = NearMissOperand;
  212. Result.MissedOperand.Error = OperandError;
  213. Result.MissedOperand.Class = OperandClass;
  214. Result.MissedOperand.Opcode = Opcode;
  215. Result.MissedOperand.Index = OperandIndex;
  216. return Result;
  217. }
  218. // The instruction encoding is not valid because it expects more operands
  219. // than were parsed. OperandClass is the class of the expected operand that
  220. // was not provided. Opcode is the instruction encoding.
  221. static NearMissInfo getTooFewOperands(unsigned OperandClass,
  222. unsigned Opcode) {
  223. NearMissInfo Result;
  224. Result.Kind = NearMissTooFewOperands;
  225. Result.TooFewOperands.Class = OperandClass;
  226. Result.TooFewOperands.Opcode = Opcode;
  227. return Result;
  228. }
  229. operator bool() const { return Kind != NoNearMiss; }
  230. NearMissKind getKind() const { return Kind; }
  231. // Feature flags required by the instruction, that the current target does
  232. // not have.
  233. const FeatureBitset& getFeatures() const {
  234. assert(Kind == NearMissFeature);
  235. return Features;
  236. }
  237. // Error code returned by the target predicate when validating this
  238. // instruction encoding.
  239. unsigned getPredicateError() const {
  240. assert(Kind == NearMissPredicate);
  241. return PredicateError;
  242. }
  243. // MatchClassKind of the operand that we expected to see.
  244. unsigned getOperandClass() const {
  245. assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
  246. return MissedOperand.Class;
  247. }
  248. // Opcode of the encoding we were trying to match.
  249. unsigned getOpcode() const {
  250. assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
  251. return MissedOperand.Opcode;
  252. }
  253. // Error code returned when validating the operand.
  254. unsigned getOperandError() const {
  255. assert(Kind == NearMissOperand);
  256. return MissedOperand.Error;
  257. }
  258. // Index of the actual operand we were trying to match in the list of parsed
  259. // operands.
  260. unsigned getOperandIndex() const {
  261. assert(Kind == NearMissOperand);
  262. return MissedOperand.Index;
  263. }
  264. private:
  265. NearMissKind Kind;
  266. // These two structs share a common prefix, so we can safely rely on the fact
  267. // that they overlap in the union.
  268. struct MissedOpInfo {
  269. unsigned Class;
  270. unsigned Opcode;
  271. unsigned Error;
  272. unsigned Index;
  273. };
  274. struct TooFewOperandsInfo {
  275. unsigned Class;
  276. unsigned Opcode;
  277. };
  278. union {
  279. FeatureBitset Features;
  280. unsigned PredicateError;
  281. MissedOpInfo MissedOperand;
  282. TooFewOperandsInfo TooFewOperands;
  283. };
  284. NearMissInfo() : Kind(NoNearMiss) {}
  285. };
  286. /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
  287. class MCTargetAsmParser : public MCAsmParserExtension {
  288. public:
  289. enum MatchResultTy {
  290. Match_InvalidOperand,
  291. Match_InvalidTiedOperand,
  292. Match_MissingFeature,
  293. Match_MnemonicFail,
  294. Match_Success,
  295. Match_NearMisses,
  296. FIRST_TARGET_MATCH_RESULT_TY
  297. };
  298. protected: // Can only create subclasses.
  299. MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
  300. const MCInstrInfo &MII);
  301. /// Create a copy of STI and return a non-const reference to it.
  302. MCSubtargetInfo &copySTI();
  303. /// AvailableFeatures - The current set of available features.
  304. FeatureBitset AvailableFeatures;
  305. /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
  306. bool ParsingMSInlineAsm = false;
  307. /// SemaCallback - The Sema callback implementation. Must be set when parsing
  308. /// ms-style inline assembly.
  309. MCAsmParserSemaCallback *SemaCallback = nullptr;
  310. /// Set of options which affects instrumentation of inline assembly.
  311. MCTargetOptions MCOptions;
  312. /// Current STI.
  313. const MCSubtargetInfo *STI;
  314. const MCInstrInfo &MII;
  315. public:
  316. MCTargetAsmParser(const MCTargetAsmParser &) = delete;
  317. MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
  318. ~MCTargetAsmParser() override;
  319. const MCSubtargetInfo &getSTI() const;
  320. const FeatureBitset& getAvailableFeatures() const {
  321. return AvailableFeatures;
  322. }
  323. void setAvailableFeatures(const FeatureBitset& Value) {
  324. AvailableFeatures = Value;
  325. }
  326. bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
  327. void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
  328. MCTargetOptions getTargetOptions() const { return MCOptions; }
  329. void setSemaCallback(MCAsmParserSemaCallback *Callback) {
  330. SemaCallback = Callback;
  331. }
  332. // Target-specific parsing of expression.
  333. virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
  334. return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
  335. }
  336. virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc,
  337. SMLoc &EndLoc) = 0;
  338. /// tryParseRegister - parse one register if possible
  339. ///
  340. /// Check whether a register specification can be parsed at the current
  341. /// location, without failing the entire parse if it can't. Must not consume
  342. /// tokens if the parse fails.
  343. virtual OperandMatchResultTy
  344. tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
  345. /// ParseInstruction - Parse one assembly instruction.
  346. ///
  347. /// The parser is positioned following the instruction name. The target
  348. /// specific instruction parser should parse the entire instruction and
  349. /// construct the appropriate MCInst, or emit an error. On success, the entire
  350. /// line should be parsed up to and including the end-of-statement token. On
  351. /// failure, the parser is not required to read to the end of the line.
  352. //
  353. /// \param Name - The instruction name.
  354. /// \param NameLoc - The source location of the name.
  355. /// \param Operands [out] - The list of parsed operands, this returns
  356. /// ownership of them to the caller.
  357. /// \return True on failure.
  358. virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
  359. SMLoc NameLoc, OperandVector &Operands) = 0;
  360. virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
  361. AsmToken Token, OperandVector &Operands) {
  362. return ParseInstruction(Info, Name, Token.getLoc(), Operands);
  363. }
  364. /// ParseDirective - Parse a target specific assembler directive
  365. ///
  366. /// The parser is positioned following the directive name. The target
  367. /// specific directive parser should parse the entire directive doing or
  368. /// recording any target specific work, or return true and do nothing if the
  369. /// directive is not target specific. If the directive is specific for
  370. /// the target, the entire line is parsed up to and including the
  371. /// end-of-statement token and false is returned.
  372. ///
  373. /// \param DirectiveID - the identifier token of the directive.
  374. virtual bool ParseDirective(AsmToken DirectiveID) = 0;
  375. /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
  376. /// instruction as an actual MCInst and emit it to the specified MCStreamer.
  377. /// This returns false on success and returns true on failure to match.
  378. ///
  379. /// On failure, the target parser is responsible for emitting a diagnostic
  380. /// explaining the match failure.
  381. virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  382. OperandVector &Operands, MCStreamer &Out,
  383. uint64_t &ErrorInfo,
  384. bool MatchingInlineAsm) = 0;
  385. /// Allows targets to let registers opt out of clobber lists.
  386. virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
  387. /// Allow a target to add special case operand matching for things that
  388. /// tblgen doesn't/can't handle effectively. For example, literal
  389. /// immediates on ARM. TableGen expects a token operand, but the parser
  390. /// will recognize them as immediates.
  391. virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
  392. unsigned Kind) {
  393. return Match_InvalidOperand;
  394. }
  395. /// Validate the instruction match against any complex target predicates
  396. /// before rendering any operands to it.
  397. virtual unsigned
  398. checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
  399. return Match_Success;
  400. }
  401. /// checkTargetMatchPredicate - Validate the instruction match against
  402. /// any complex target predicates not expressible via match classes.
  403. virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
  404. return Match_Success;
  405. }
  406. virtual void convertToMapAndConstraints(unsigned Kind,
  407. const OperandVector &Operands) = 0;
  408. /// Returns whether two operands are registers and are equal. This is used
  409. /// by the tied-operands checks in the AsmMatcher. This method can be
  410. /// overridden to allow e.g. a sub- or super-register as the tied operand.
  411. virtual bool areEqualRegs(const MCParsedAsmOperand &Op1,
  412. const MCParsedAsmOperand &Op2) const {
  413. return Op1.isReg() && Op2.isReg() && Op1.getReg() == Op2.getReg();
  414. }
  415. // Return whether this parser uses assignment statements with equals tokens
  416. virtual bool equalIsAsmAssignment() { return true; };
  417. // Return whether this start of statement identifier is a label
  418. virtual bool isLabel(AsmToken &Token) { return true; };
  419. // Return whether this parser accept star as start of statement
  420. virtual bool starIsStartOfStatement() { return false; };
  421. virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
  422. MCSymbolRefExpr::VariantKind,
  423. MCContext &Ctx) {
  424. return nullptr;
  425. }
  426. // For actions that have to be performed before a label is emitted
  427. virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {}
  428. virtual void onLabelParsed(MCSymbol *Symbol) {}
  429. /// Ensure that all previously parsed instructions have been emitted to the
  430. /// output streamer, if the target does not emit them immediately.
  431. virtual void flushPendingInstructions(MCStreamer &Out) {}
  432. virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
  433. AsmToken::TokenKind OperatorToken,
  434. MCContext &Ctx) {
  435. return nullptr;
  436. }
  437. // For any initialization at the beginning of parsing.
  438. virtual void onBeginOfFile() {}
  439. // For any checks or cleanups at the end of parsing.
  440. virtual void onEndOfFile() {}
  441. };
  442. } // end namespace llvm
  443. #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
  444. #ifdef __GNUC__
  445. #pragma GCC diagnostic pop
  446. #endif