X86Disassembler.cpp 81 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387
  1. //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is part of the X86 Disassembler.
  10. // It contains code to translate the data produced by the decoder into
  11. // MCInsts.
  12. //
  13. //
  14. // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
  15. // 64-bit X86 instruction sets. The main decode sequence for an assembly
  16. // instruction in this disassembler is:
  17. //
  18. // 1. Read the prefix bytes and determine the attributes of the instruction.
  19. // These attributes, recorded in enum attributeBits
  20. // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
  21. // provides a mapping from bitmasks to contexts, which are represented by
  22. // enum InstructionContext (ibid.).
  23. //
  24. // 2. Read the opcode, and determine what kind of opcode it is. The
  25. // disassembler distinguishes four kinds of opcodes, which are enumerated in
  26. // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
  27. // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
  28. // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
  29. //
  30. // 3. Depending on the opcode type, look in one of four ClassDecision structures
  31. // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
  32. // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
  33. // a ModRMDecision (ibid.).
  34. //
  35. // 4. Some instructions, such as escape opcodes or extended opcodes, or even
  36. // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
  37. // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
  38. // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
  39. // ModR/M byte is required and how to interpret it.
  40. //
  41. // 5. After resolving the ModRMDecision, the disassembler has a unique ID
  42. // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
  43. // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
  44. // meanings of its operands.
  45. //
  46. // 6. For each operand, its encoding is an entry from OperandEncoding
  47. // (X86DisassemblerDecoderCommon.h) and its type is an entry from
  48. // OperandType (ibid.). The encoding indicates how to read it from the
  49. // instruction; the type indicates how to interpret the value once it has
  50. // been read. For example, a register operand could be stored in the R/M
  51. // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
  52. // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
  53. // register, for instance). Given this information, the operands can be
  54. // extracted and interpreted.
  55. //
  56. // 7. As the last step, the disassembler translates the instruction information
  57. // and operands into a format understandable by the client - in this case, an
  58. // MCInst for use by the MC infrastructure.
  59. //
  60. // The disassembler is broken broadly into two parts: the table emitter that
  61. // emits the instruction decode tables discussed above during compilation, and
  62. // the disassembler itself. The table emitter is documented in more detail in
  63. // utils/TableGen/X86DisassemblerEmitter.h.
  64. //
  65. // X86Disassembler.cpp contains the code responsible for step 7, and for
  66. // invoking the decoder to execute steps 1-6.
  67. // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
  68. // table emitter and the disassembler.
  69. // X86DisassemblerDecoder.h contains the public interface of the decoder,
  70. // factored out into C for possible use by other projects.
  71. // X86DisassemblerDecoder.c contains the source code of the decoder, which is
  72. // responsible for steps 1-6.
  73. //
  74. //===----------------------------------------------------------------------===//
  75. #include "MCTargetDesc/X86BaseInfo.h"
  76. #include "MCTargetDesc/X86MCTargetDesc.h"
  77. #include "TargetInfo/X86TargetInfo.h"
  78. #include "X86DisassemblerDecoder.h"
  79. #include "llvm/MC/MCContext.h"
  80. #include "llvm/MC/MCDisassembler/MCDisassembler.h"
  81. #include "llvm/MC/MCExpr.h"
  82. #include "llvm/MC/MCInst.h"
  83. #include "llvm/MC/MCInstrInfo.h"
  84. #include "llvm/MC/MCSubtargetInfo.h"
  85. #include "llvm/MC/TargetRegistry.h"
  86. #include "llvm/Support/Debug.h"
  87. #include "llvm/Support/Format.h"
  88. #include "llvm/Support/raw_ostream.h"
  89. using namespace llvm;
  90. using namespace llvm::X86Disassembler;
  91. #define DEBUG_TYPE "x86-disassembler"
  92. #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
  93. // Specifies whether a ModR/M byte is needed and (if so) which
  94. // instruction each possible value of the ModR/M byte corresponds to. Once
  95. // this information is known, we have narrowed down to a single instruction.
  96. struct ModRMDecision {
  97. uint8_t modrm_type;
  98. uint16_t instructionIDs;
  99. };
  100. // Specifies which set of ModR/M->instruction tables to look at
  101. // given a particular opcode.
  102. struct OpcodeDecision {
  103. ModRMDecision modRMDecisions[256];
  104. };
  105. // Specifies which opcode->instruction tables to look at given
  106. // a particular context (set of attributes). Since there are many possible
  107. // contexts, the decoder first uses CONTEXTS_SYM to determine which context
  108. // applies given a specific set of attributes. Hence there are only IC_max
  109. // entries in this table, rather than 2^(ATTR_max).
  110. struct ContextDecision {
  111. OpcodeDecision opcodeDecisions[IC_max];
  112. };
  113. #include "X86GenDisassemblerTables.inc"
  114. static InstrUID decode(OpcodeType type, InstructionContext insnContext,
  115. uint8_t opcode, uint8_t modRM) {
  116. const struct ModRMDecision *dec;
  117. switch (type) {
  118. case ONEBYTE:
  119. dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  120. break;
  121. case TWOBYTE:
  122. dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  123. break;
  124. case THREEBYTE_38:
  125. dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  126. break;
  127. case THREEBYTE_3A:
  128. dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  129. break;
  130. case XOP8_MAP:
  131. dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  132. break;
  133. case XOP9_MAP:
  134. dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  135. break;
  136. case XOPA_MAP:
  137. dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  138. break;
  139. case THREEDNOW_MAP:
  140. dec =
  141. &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  142. break;
  143. case MAP5:
  144. dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  145. break;
  146. case MAP6:
  147. dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  148. break;
  149. }
  150. switch (dec->modrm_type) {
  151. default:
  152. llvm_unreachable("Corrupt table! Unknown modrm_type");
  153. return 0;
  154. case MODRM_ONEENTRY:
  155. return modRMTable[dec->instructionIDs];
  156. case MODRM_SPLITRM:
  157. if (modFromModRM(modRM) == 0x3)
  158. return modRMTable[dec->instructionIDs + 1];
  159. return modRMTable[dec->instructionIDs];
  160. case MODRM_SPLITREG:
  161. if (modFromModRM(modRM) == 0x3)
  162. return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
  163. return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
  164. case MODRM_SPLITMISC:
  165. if (modFromModRM(modRM) == 0x3)
  166. return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
  167. return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
  168. case MODRM_FULL:
  169. return modRMTable[dec->instructionIDs + modRM];
  170. }
  171. }
  172. static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
  173. uint64_t offset = insn->readerCursor - insn->startLocation;
  174. if (offset >= insn->bytes.size())
  175. return true;
  176. byte = insn->bytes[offset];
  177. return false;
  178. }
  179. template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
  180. auto r = insn->bytes;
  181. uint64_t offset = insn->readerCursor - insn->startLocation;
  182. if (offset + sizeof(T) > r.size())
  183. return true;
  184. T ret = 0;
  185. for (unsigned i = 0; i < sizeof(T); ++i)
  186. ret |= (uint64_t)r[offset + i] << (i * 8);
  187. ptr = ret;
  188. insn->readerCursor += sizeof(T);
  189. return false;
  190. }
  191. static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
  192. return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
  193. }
  194. // Consumes all of an instruction's prefix bytes, and marks the
  195. // instruction as having them. Also sets the instruction's default operand,
  196. // address, and other relevant data sizes to report operands correctly.
  197. //
  198. // insn must not be empty.
  199. static int readPrefixes(struct InternalInstruction *insn) {
  200. bool isPrefix = true;
  201. uint8_t byte = 0;
  202. uint8_t nextByte;
  203. LLVM_DEBUG(dbgs() << "readPrefixes()");
  204. while (isPrefix) {
  205. // If we fail reading prefixes, just stop here and let the opcode reader
  206. // deal with it.
  207. if (consume(insn, byte))
  208. break;
  209. // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
  210. // break and let it be disassembled as a normal "instruction".
  211. if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
  212. break;
  213. if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
  214. // If the byte is 0xf2 or 0xf3, and any of the following conditions are
  215. // met:
  216. // - it is followed by a LOCK (0xf0) prefix
  217. // - it is followed by an xchg instruction
  218. // then it should be disassembled as a xacquire/xrelease not repne/rep.
  219. if (((nextByte == 0xf0) ||
  220. ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
  221. insn->xAcquireRelease = true;
  222. if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
  223. break;
  224. }
  225. // Also if the byte is 0xf3, and the following condition is met:
  226. // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
  227. // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
  228. // then it should be disassembled as an xrelease not rep.
  229. if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
  230. nextByte == 0xc6 || nextByte == 0xc7)) {
  231. insn->xAcquireRelease = true;
  232. break;
  233. }
  234. if (isREX(insn, nextByte)) {
  235. uint8_t nnextByte;
  236. // Go to REX prefix after the current one
  237. if (consume(insn, nnextByte))
  238. return -1;
  239. // We should be able to read next byte after REX prefix
  240. if (peek(insn, nnextByte))
  241. return -1;
  242. --insn->readerCursor;
  243. }
  244. }
  245. switch (byte) {
  246. case 0xf0: // LOCK
  247. insn->hasLockPrefix = true;
  248. break;
  249. case 0xf2: // REPNE/REPNZ
  250. case 0xf3: { // REP or REPE/REPZ
  251. uint8_t nextByte;
  252. if (peek(insn, nextByte))
  253. break;
  254. // TODO:
  255. // 1. There could be several 0x66
  256. // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
  257. // it's not mandatory prefix
  258. // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
  259. // 0x0f exactly after it to be mandatory prefix
  260. if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
  261. // The last of 0xf2 /0xf3 is mandatory prefix
  262. insn->mandatoryPrefix = byte;
  263. insn->repeatPrefix = byte;
  264. break;
  265. }
  266. case 0x2e: // CS segment override -OR- Branch not taken
  267. insn->segmentOverride = SEG_OVERRIDE_CS;
  268. break;
  269. case 0x36: // SS segment override -OR- Branch taken
  270. insn->segmentOverride = SEG_OVERRIDE_SS;
  271. break;
  272. case 0x3e: // DS segment override
  273. insn->segmentOverride = SEG_OVERRIDE_DS;
  274. break;
  275. case 0x26: // ES segment override
  276. insn->segmentOverride = SEG_OVERRIDE_ES;
  277. break;
  278. case 0x64: // FS segment override
  279. insn->segmentOverride = SEG_OVERRIDE_FS;
  280. break;
  281. case 0x65: // GS segment override
  282. insn->segmentOverride = SEG_OVERRIDE_GS;
  283. break;
  284. case 0x66: { // Operand-size override {
  285. uint8_t nextByte;
  286. insn->hasOpSize = true;
  287. if (peek(insn, nextByte))
  288. break;
  289. // 0x66 can't overwrite existing mandatory prefix and should be ignored
  290. if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
  291. insn->mandatoryPrefix = byte;
  292. break;
  293. }
  294. case 0x67: // Address-size override
  295. insn->hasAdSize = true;
  296. break;
  297. default: // Not a prefix byte
  298. isPrefix = false;
  299. break;
  300. }
  301. if (isPrefix)
  302. LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
  303. }
  304. insn->vectorExtensionType = TYPE_NO_VEX_XOP;
  305. if (byte == 0x62) {
  306. uint8_t byte1, byte2;
  307. if (consume(insn, byte1)) {
  308. LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
  309. return -1;
  310. }
  311. if (peek(insn, byte2)) {
  312. LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
  313. return -1;
  314. }
  315. if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
  316. ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
  317. insn->vectorExtensionType = TYPE_EVEX;
  318. } else {
  319. --insn->readerCursor; // unconsume byte1
  320. --insn->readerCursor; // unconsume byte
  321. }
  322. if (insn->vectorExtensionType == TYPE_EVEX) {
  323. insn->vectorExtensionPrefix[0] = byte;
  324. insn->vectorExtensionPrefix[1] = byte1;
  325. if (consume(insn, insn->vectorExtensionPrefix[2])) {
  326. LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
  327. return -1;
  328. }
  329. if (consume(insn, insn->vectorExtensionPrefix[3])) {
  330. LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
  331. return -1;
  332. }
  333. // We simulate the REX prefix for simplicity's sake
  334. if (insn->mode == MODE_64BIT) {
  335. insn->rexPrefix = 0x40 |
  336. (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
  337. (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
  338. (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
  339. (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
  340. }
  341. LLVM_DEBUG(
  342. dbgs() << format(
  343. "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
  344. insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
  345. insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
  346. }
  347. } else if (byte == 0xc4) {
  348. uint8_t byte1;
  349. if (peek(insn, byte1)) {
  350. LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
  351. return -1;
  352. }
  353. if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
  354. insn->vectorExtensionType = TYPE_VEX_3B;
  355. else
  356. --insn->readerCursor;
  357. if (insn->vectorExtensionType == TYPE_VEX_3B) {
  358. insn->vectorExtensionPrefix[0] = byte;
  359. consume(insn, insn->vectorExtensionPrefix[1]);
  360. consume(insn, insn->vectorExtensionPrefix[2]);
  361. // We simulate the REX prefix for simplicity's sake
  362. if (insn->mode == MODE_64BIT)
  363. insn->rexPrefix = 0x40 |
  364. (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
  365. (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
  366. (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
  367. (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
  368. LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
  369. insn->vectorExtensionPrefix[0],
  370. insn->vectorExtensionPrefix[1],
  371. insn->vectorExtensionPrefix[2]));
  372. }
  373. } else if (byte == 0xc5) {
  374. uint8_t byte1;
  375. if (peek(insn, byte1)) {
  376. LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
  377. return -1;
  378. }
  379. if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
  380. insn->vectorExtensionType = TYPE_VEX_2B;
  381. else
  382. --insn->readerCursor;
  383. if (insn->vectorExtensionType == TYPE_VEX_2B) {
  384. insn->vectorExtensionPrefix[0] = byte;
  385. consume(insn, insn->vectorExtensionPrefix[1]);
  386. if (insn->mode == MODE_64BIT)
  387. insn->rexPrefix =
  388. 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
  389. switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
  390. default:
  391. break;
  392. case VEX_PREFIX_66:
  393. insn->hasOpSize = true;
  394. break;
  395. }
  396. LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
  397. insn->vectorExtensionPrefix[0],
  398. insn->vectorExtensionPrefix[1]));
  399. }
  400. } else if (byte == 0x8f) {
  401. uint8_t byte1;
  402. if (peek(insn, byte1)) {
  403. LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
  404. return -1;
  405. }
  406. if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
  407. insn->vectorExtensionType = TYPE_XOP;
  408. else
  409. --insn->readerCursor;
  410. if (insn->vectorExtensionType == TYPE_XOP) {
  411. insn->vectorExtensionPrefix[0] = byte;
  412. consume(insn, insn->vectorExtensionPrefix[1]);
  413. consume(insn, insn->vectorExtensionPrefix[2]);
  414. // We simulate the REX prefix for simplicity's sake
  415. if (insn->mode == MODE_64BIT)
  416. insn->rexPrefix = 0x40 |
  417. (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
  418. (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
  419. (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
  420. (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
  421. switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
  422. default:
  423. break;
  424. case VEX_PREFIX_66:
  425. insn->hasOpSize = true;
  426. break;
  427. }
  428. LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
  429. insn->vectorExtensionPrefix[0],
  430. insn->vectorExtensionPrefix[1],
  431. insn->vectorExtensionPrefix[2]));
  432. }
  433. } else if (isREX(insn, byte)) {
  434. if (peek(insn, nextByte))
  435. return -1;
  436. insn->rexPrefix = byte;
  437. LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
  438. } else
  439. --insn->readerCursor;
  440. if (insn->mode == MODE_16BIT) {
  441. insn->registerSize = (insn->hasOpSize ? 4 : 2);
  442. insn->addressSize = (insn->hasAdSize ? 4 : 2);
  443. insn->displacementSize = (insn->hasAdSize ? 4 : 2);
  444. insn->immediateSize = (insn->hasOpSize ? 4 : 2);
  445. } else if (insn->mode == MODE_32BIT) {
  446. insn->registerSize = (insn->hasOpSize ? 2 : 4);
  447. insn->addressSize = (insn->hasAdSize ? 2 : 4);
  448. insn->displacementSize = (insn->hasAdSize ? 2 : 4);
  449. insn->immediateSize = (insn->hasOpSize ? 2 : 4);
  450. } else if (insn->mode == MODE_64BIT) {
  451. if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
  452. insn->registerSize = 8;
  453. insn->addressSize = (insn->hasAdSize ? 4 : 8);
  454. insn->displacementSize = 4;
  455. insn->immediateSize = 4;
  456. insn->hasOpSize = false;
  457. } else {
  458. insn->registerSize = (insn->hasOpSize ? 2 : 4);
  459. insn->addressSize = (insn->hasAdSize ? 4 : 8);
  460. insn->displacementSize = (insn->hasOpSize ? 2 : 4);
  461. insn->immediateSize = (insn->hasOpSize ? 2 : 4);
  462. }
  463. }
  464. return 0;
  465. }
  466. // Consumes the SIB byte to determine addressing information.
  467. static int readSIB(struct InternalInstruction *insn) {
  468. SIBBase sibBaseBase = SIB_BASE_NONE;
  469. uint8_t index, base;
  470. LLVM_DEBUG(dbgs() << "readSIB()");
  471. switch (insn->addressSize) {
  472. case 2:
  473. default:
  474. llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
  475. case 4:
  476. insn->sibIndexBase = SIB_INDEX_EAX;
  477. sibBaseBase = SIB_BASE_EAX;
  478. break;
  479. case 8:
  480. insn->sibIndexBase = SIB_INDEX_RAX;
  481. sibBaseBase = SIB_BASE_RAX;
  482. break;
  483. }
  484. if (consume(insn, insn->sib))
  485. return -1;
  486. index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
  487. if (index == 0x4) {
  488. insn->sibIndex = SIB_INDEX_NONE;
  489. } else {
  490. insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
  491. }
  492. insn->sibScale = 1 << scaleFromSIB(insn->sib);
  493. base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
  494. switch (base) {
  495. case 0x5:
  496. case 0xd:
  497. switch (modFromModRM(insn->modRM)) {
  498. case 0x0:
  499. insn->eaDisplacement = EA_DISP_32;
  500. insn->sibBase = SIB_BASE_NONE;
  501. break;
  502. case 0x1:
  503. insn->eaDisplacement = EA_DISP_8;
  504. insn->sibBase = (SIBBase)(sibBaseBase + base);
  505. break;
  506. case 0x2:
  507. insn->eaDisplacement = EA_DISP_32;
  508. insn->sibBase = (SIBBase)(sibBaseBase + base);
  509. break;
  510. default:
  511. llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
  512. }
  513. break;
  514. default:
  515. insn->sibBase = (SIBBase)(sibBaseBase + base);
  516. break;
  517. }
  518. return 0;
  519. }
  520. static int readDisplacement(struct InternalInstruction *insn) {
  521. int8_t d8;
  522. int16_t d16;
  523. int32_t d32;
  524. LLVM_DEBUG(dbgs() << "readDisplacement()");
  525. insn->displacementOffset = insn->readerCursor - insn->startLocation;
  526. switch (insn->eaDisplacement) {
  527. case EA_DISP_NONE:
  528. break;
  529. case EA_DISP_8:
  530. if (consume(insn, d8))
  531. return -1;
  532. insn->displacement = d8;
  533. break;
  534. case EA_DISP_16:
  535. if (consume(insn, d16))
  536. return -1;
  537. insn->displacement = d16;
  538. break;
  539. case EA_DISP_32:
  540. if (consume(insn, d32))
  541. return -1;
  542. insn->displacement = d32;
  543. break;
  544. }
  545. return 0;
  546. }
  547. // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
  548. static int readModRM(struct InternalInstruction *insn) {
  549. uint8_t mod, rm, reg, evexrm;
  550. LLVM_DEBUG(dbgs() << "readModRM()");
  551. if (insn->consumedModRM)
  552. return 0;
  553. if (consume(insn, insn->modRM))
  554. return -1;
  555. insn->consumedModRM = true;
  556. mod = modFromModRM(insn->modRM);
  557. rm = rmFromModRM(insn->modRM);
  558. reg = regFromModRM(insn->modRM);
  559. // This goes by insn->registerSize to pick the correct register, which messes
  560. // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
  561. // fixupReg().
  562. switch (insn->registerSize) {
  563. case 2:
  564. insn->regBase = MODRM_REG_AX;
  565. insn->eaRegBase = EA_REG_AX;
  566. break;
  567. case 4:
  568. insn->regBase = MODRM_REG_EAX;
  569. insn->eaRegBase = EA_REG_EAX;
  570. break;
  571. case 8:
  572. insn->regBase = MODRM_REG_RAX;
  573. insn->eaRegBase = EA_REG_RAX;
  574. break;
  575. }
  576. reg |= rFromREX(insn->rexPrefix) << 3;
  577. rm |= bFromREX(insn->rexPrefix) << 3;
  578. evexrm = 0;
  579. if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
  580. reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
  581. evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
  582. }
  583. insn->reg = (Reg)(insn->regBase + reg);
  584. switch (insn->addressSize) {
  585. case 2: {
  586. EABase eaBaseBase = EA_BASE_BX_SI;
  587. switch (mod) {
  588. case 0x0:
  589. if (rm == 0x6) {
  590. insn->eaBase = EA_BASE_NONE;
  591. insn->eaDisplacement = EA_DISP_16;
  592. if (readDisplacement(insn))
  593. return -1;
  594. } else {
  595. insn->eaBase = (EABase)(eaBaseBase + rm);
  596. insn->eaDisplacement = EA_DISP_NONE;
  597. }
  598. break;
  599. case 0x1:
  600. insn->eaBase = (EABase)(eaBaseBase + rm);
  601. insn->eaDisplacement = EA_DISP_8;
  602. insn->displacementSize = 1;
  603. if (readDisplacement(insn))
  604. return -1;
  605. break;
  606. case 0x2:
  607. insn->eaBase = (EABase)(eaBaseBase + rm);
  608. insn->eaDisplacement = EA_DISP_16;
  609. if (readDisplacement(insn))
  610. return -1;
  611. break;
  612. case 0x3:
  613. insn->eaBase = (EABase)(insn->eaRegBase + rm);
  614. if (readDisplacement(insn))
  615. return -1;
  616. break;
  617. }
  618. break;
  619. }
  620. case 4:
  621. case 8: {
  622. EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
  623. switch (mod) {
  624. case 0x0:
  625. insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
  626. // In determining whether RIP-relative mode is used (rm=5),
  627. // or whether a SIB byte is present (rm=4),
  628. // the extension bits (REX.b and EVEX.x) are ignored.
  629. switch (rm & 7) {
  630. case 0x4: // SIB byte is present
  631. insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
  632. if (readSIB(insn) || readDisplacement(insn))
  633. return -1;
  634. break;
  635. case 0x5: // RIP-relative
  636. insn->eaBase = EA_BASE_NONE;
  637. insn->eaDisplacement = EA_DISP_32;
  638. if (readDisplacement(insn))
  639. return -1;
  640. break;
  641. default:
  642. insn->eaBase = (EABase)(eaBaseBase + rm);
  643. break;
  644. }
  645. break;
  646. case 0x1:
  647. insn->displacementSize = 1;
  648. LLVM_FALLTHROUGH;
  649. case 0x2:
  650. insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
  651. switch (rm & 7) {
  652. case 0x4: // SIB byte is present
  653. insn->eaBase = EA_BASE_sib;
  654. if (readSIB(insn) || readDisplacement(insn))
  655. return -1;
  656. break;
  657. default:
  658. insn->eaBase = (EABase)(eaBaseBase + rm);
  659. if (readDisplacement(insn))
  660. return -1;
  661. break;
  662. }
  663. break;
  664. case 0x3:
  665. insn->eaDisplacement = EA_DISP_NONE;
  666. insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
  667. break;
  668. }
  669. break;
  670. }
  671. } // switch (insn->addressSize)
  672. return 0;
  673. }
  674. #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
  675. static uint16_t name(struct InternalInstruction *insn, OperandType type, \
  676. uint8_t index, uint8_t *valid) { \
  677. *valid = 1; \
  678. switch (type) { \
  679. default: \
  680. debug("Unhandled register type"); \
  681. *valid = 0; \
  682. return 0; \
  683. case TYPE_Rv: \
  684. return base + index; \
  685. case TYPE_R8: \
  686. index &= mask; \
  687. if (index > 0xf) \
  688. *valid = 0; \
  689. if (insn->rexPrefix && index >= 4 && index <= 7) { \
  690. return prefix##_SPL + (index - 4); \
  691. } else { \
  692. return prefix##_AL + index; \
  693. } \
  694. case TYPE_R16: \
  695. index &= mask; \
  696. if (index > 0xf) \
  697. *valid = 0; \
  698. return prefix##_AX + index; \
  699. case TYPE_R32: \
  700. index &= mask; \
  701. if (index > 0xf) \
  702. *valid = 0; \
  703. return prefix##_EAX + index; \
  704. case TYPE_R64: \
  705. index &= mask; \
  706. if (index > 0xf) \
  707. *valid = 0; \
  708. return prefix##_RAX + index; \
  709. case TYPE_ZMM: \
  710. return prefix##_ZMM0 + index; \
  711. case TYPE_YMM: \
  712. return prefix##_YMM0 + index; \
  713. case TYPE_XMM: \
  714. return prefix##_XMM0 + index; \
  715. case TYPE_TMM: \
  716. if (index > 7) \
  717. *valid = 0; \
  718. return prefix##_TMM0 + index; \
  719. case TYPE_VK: \
  720. index &= 0xf; \
  721. if (index > 7) \
  722. *valid = 0; \
  723. return prefix##_K0 + index; \
  724. case TYPE_VK_PAIR: \
  725. if (index > 7) \
  726. *valid = 0; \
  727. return prefix##_K0_K1 + (index / 2); \
  728. case TYPE_MM64: \
  729. return prefix##_MM0 + (index & 0x7); \
  730. case TYPE_SEGMENTREG: \
  731. if ((index & 7) > 5) \
  732. *valid = 0; \
  733. return prefix##_ES + (index & 7); \
  734. case TYPE_DEBUGREG: \
  735. return prefix##_DR0 + index; \
  736. case TYPE_CONTROLREG: \
  737. return prefix##_CR0 + index; \
  738. case TYPE_MVSIBX: \
  739. return prefix##_XMM0 + index; \
  740. case TYPE_MVSIBY: \
  741. return prefix##_YMM0 + index; \
  742. case TYPE_MVSIBZ: \
  743. return prefix##_ZMM0 + index; \
  744. } \
  745. }
  746. // Consult an operand type to determine the meaning of the reg or R/M field. If
  747. // the operand is an XMM operand, for example, an operand would be XMM0 instead
  748. // of AX, which readModRM() would otherwise misinterpret it as.
  749. //
  750. // @param insn - The instruction containing the operand.
  751. // @param type - The operand type.
  752. // @param index - The existing value of the field as reported by readModRM().
  753. // @param valid - The address of a uint8_t. The target is set to 1 if the
  754. // field is valid for the register class; 0 if not.
  755. // @return - The proper value.
  756. GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
  757. GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
  758. // Consult an operand specifier to determine which of the fixup*Value functions
  759. // to use in correcting readModRM()'ss interpretation.
  760. //
  761. // @param insn - See fixup*Value().
  762. // @param op - The operand specifier.
  763. // @return - 0 if fixup was successful; -1 if the register returned was
  764. // invalid for its class.
  765. static int fixupReg(struct InternalInstruction *insn,
  766. const struct OperandSpecifier *op) {
  767. uint8_t valid;
  768. LLVM_DEBUG(dbgs() << "fixupReg()");
  769. switch ((OperandEncoding)op->encoding) {
  770. default:
  771. debug("Expected a REG or R/M encoding in fixupReg");
  772. return -1;
  773. case ENCODING_VVVV:
  774. insn->vvvv =
  775. (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
  776. if (!valid)
  777. return -1;
  778. break;
  779. case ENCODING_REG:
  780. insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
  781. insn->reg - insn->regBase, &valid);
  782. if (!valid)
  783. return -1;
  784. break;
  785. case ENCODING_SIB:
  786. CASE_ENCODING_RM:
  787. if (insn->eaBase >= insn->eaRegBase) {
  788. insn->eaBase = (EABase)fixupRMValue(
  789. insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
  790. if (!valid)
  791. return -1;
  792. }
  793. break;
  794. }
  795. return 0;
  796. }
  797. // Read the opcode (except the ModR/M byte in the case of extended or escape
  798. // opcodes).
  799. static bool readOpcode(struct InternalInstruction *insn) {
  800. uint8_t current;
  801. LLVM_DEBUG(dbgs() << "readOpcode()");
  802. insn->opcodeType = ONEBYTE;
  803. if (insn->vectorExtensionType == TYPE_EVEX) {
  804. switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
  805. default:
  806. LLVM_DEBUG(
  807. dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
  808. mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
  809. return true;
  810. case VEX_LOB_0F:
  811. insn->opcodeType = TWOBYTE;
  812. return consume(insn, insn->opcode);
  813. case VEX_LOB_0F38:
  814. insn->opcodeType = THREEBYTE_38;
  815. return consume(insn, insn->opcode);
  816. case VEX_LOB_0F3A:
  817. insn->opcodeType = THREEBYTE_3A;
  818. return consume(insn, insn->opcode);
  819. case VEX_LOB_MAP5:
  820. insn->opcodeType = MAP5;
  821. return consume(insn, insn->opcode);
  822. case VEX_LOB_MAP6:
  823. insn->opcodeType = MAP6;
  824. return consume(insn, insn->opcode);
  825. }
  826. } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
  827. switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
  828. default:
  829. LLVM_DEBUG(
  830. dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
  831. mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
  832. return true;
  833. case VEX_LOB_0F:
  834. insn->opcodeType = TWOBYTE;
  835. return consume(insn, insn->opcode);
  836. case VEX_LOB_0F38:
  837. insn->opcodeType = THREEBYTE_38;
  838. return consume(insn, insn->opcode);
  839. case VEX_LOB_0F3A:
  840. insn->opcodeType = THREEBYTE_3A;
  841. return consume(insn, insn->opcode);
  842. case VEX_LOB_MAP5:
  843. insn->opcodeType = MAP5;
  844. return consume(insn, insn->opcode);
  845. case VEX_LOB_MAP6:
  846. insn->opcodeType = MAP6;
  847. return consume(insn, insn->opcode);
  848. }
  849. } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
  850. insn->opcodeType = TWOBYTE;
  851. return consume(insn, insn->opcode);
  852. } else if (insn->vectorExtensionType == TYPE_XOP) {
  853. switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
  854. default:
  855. LLVM_DEBUG(
  856. dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
  857. mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
  858. return true;
  859. case XOP_MAP_SELECT_8:
  860. insn->opcodeType = XOP8_MAP;
  861. return consume(insn, insn->opcode);
  862. case XOP_MAP_SELECT_9:
  863. insn->opcodeType = XOP9_MAP;
  864. return consume(insn, insn->opcode);
  865. case XOP_MAP_SELECT_A:
  866. insn->opcodeType = XOPA_MAP;
  867. return consume(insn, insn->opcode);
  868. }
  869. }
  870. if (consume(insn, current))
  871. return true;
  872. if (current == 0x0f) {
  873. LLVM_DEBUG(
  874. dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
  875. if (consume(insn, current))
  876. return true;
  877. if (current == 0x38) {
  878. LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
  879. current));
  880. if (consume(insn, current))
  881. return true;
  882. insn->opcodeType = THREEBYTE_38;
  883. } else if (current == 0x3a) {
  884. LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
  885. current));
  886. if (consume(insn, current))
  887. return true;
  888. insn->opcodeType = THREEBYTE_3A;
  889. } else if (current == 0x0f) {
  890. LLVM_DEBUG(
  891. dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
  892. // Consume operands before the opcode to comply with the 3DNow encoding
  893. if (readModRM(insn))
  894. return true;
  895. if (consume(insn, current))
  896. return true;
  897. insn->opcodeType = THREEDNOW_MAP;
  898. } else {
  899. LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
  900. insn->opcodeType = TWOBYTE;
  901. }
  902. } else if (insn->mandatoryPrefix)
  903. // The opcode with mandatory prefix must start with opcode escape.
  904. // If not it's legacy repeat prefix
  905. insn->mandatoryPrefix = 0;
  906. // At this point we have consumed the full opcode.
  907. // Anything we consume from here on must be unconsumed.
  908. insn->opcode = current;
  909. return false;
  910. }
  911. // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
  912. static bool is16BitEquivalent(const char *orig, const char *equiv) {
  913. for (int i = 0;; i++) {
  914. if (orig[i] == '\0' && equiv[i] == '\0')
  915. return true;
  916. if (orig[i] == '\0' || equiv[i] == '\0')
  917. return false;
  918. if (orig[i] != equiv[i]) {
  919. if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
  920. continue;
  921. if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
  922. continue;
  923. if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
  924. continue;
  925. return false;
  926. }
  927. }
  928. }
  929. // Determine whether this instruction is a 64-bit instruction.
  930. static bool is64Bit(const char *name) {
  931. for (int i = 0;; ++i) {
  932. if (name[i] == '\0')
  933. return false;
  934. if (name[i] == '6' && name[i + 1] == '4')
  935. return true;
  936. }
  937. }
  938. // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
  939. // for extended and escape opcodes, and using a supplied attribute mask.
  940. static int getInstructionIDWithAttrMask(uint16_t *instructionID,
  941. struct InternalInstruction *insn,
  942. uint16_t attrMask) {
  943. auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
  944. const ContextDecision *decision;
  945. switch (insn->opcodeType) {
  946. case ONEBYTE:
  947. decision = &ONEBYTE_SYM;
  948. break;
  949. case TWOBYTE:
  950. decision = &TWOBYTE_SYM;
  951. break;
  952. case THREEBYTE_38:
  953. decision = &THREEBYTE38_SYM;
  954. break;
  955. case THREEBYTE_3A:
  956. decision = &THREEBYTE3A_SYM;
  957. break;
  958. case XOP8_MAP:
  959. decision = &XOP8_MAP_SYM;
  960. break;
  961. case XOP9_MAP:
  962. decision = &XOP9_MAP_SYM;
  963. break;
  964. case XOPA_MAP:
  965. decision = &XOPA_MAP_SYM;
  966. break;
  967. case THREEDNOW_MAP:
  968. decision = &THREEDNOW_MAP_SYM;
  969. break;
  970. case MAP5:
  971. decision = &MAP5_SYM;
  972. break;
  973. case MAP6:
  974. decision = &MAP6_SYM;
  975. break;
  976. }
  977. if (decision->opcodeDecisions[insnCtx]
  978. .modRMDecisions[insn->opcode]
  979. .modrm_type != MODRM_ONEENTRY) {
  980. if (readModRM(insn))
  981. return -1;
  982. *instructionID =
  983. decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
  984. } else {
  985. *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
  986. }
  987. return 0;
  988. }
  989. // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
  990. // for extended and escape opcodes. Determines the attributes and context for
  991. // the instruction before doing so.
  992. static int getInstructionID(struct InternalInstruction *insn,
  993. const MCInstrInfo *mii) {
  994. uint16_t attrMask;
  995. uint16_t instructionID;
  996. LLVM_DEBUG(dbgs() << "getID()");
  997. attrMask = ATTR_NONE;
  998. if (insn->mode == MODE_64BIT)
  999. attrMask |= ATTR_64BIT;
  1000. if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
  1001. attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
  1002. if (insn->vectorExtensionType == TYPE_EVEX) {
  1003. switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
  1004. case VEX_PREFIX_66:
  1005. attrMask |= ATTR_OPSIZE;
  1006. break;
  1007. case VEX_PREFIX_F3:
  1008. attrMask |= ATTR_XS;
  1009. break;
  1010. case VEX_PREFIX_F2:
  1011. attrMask |= ATTR_XD;
  1012. break;
  1013. }
  1014. if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1015. attrMask |= ATTR_EVEXKZ;
  1016. if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1017. attrMask |= ATTR_EVEXB;
  1018. if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1019. attrMask |= ATTR_EVEXK;
  1020. if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1021. attrMask |= ATTR_VEXL;
  1022. if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1023. attrMask |= ATTR_EVEXL2;
  1024. } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
  1025. switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
  1026. case VEX_PREFIX_66:
  1027. attrMask |= ATTR_OPSIZE;
  1028. break;
  1029. case VEX_PREFIX_F3:
  1030. attrMask |= ATTR_XS;
  1031. break;
  1032. case VEX_PREFIX_F2:
  1033. attrMask |= ATTR_XD;
  1034. break;
  1035. }
  1036. if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
  1037. attrMask |= ATTR_VEXL;
  1038. } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
  1039. switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
  1040. case VEX_PREFIX_66:
  1041. attrMask |= ATTR_OPSIZE;
  1042. if (insn->hasAdSize)
  1043. attrMask |= ATTR_ADSIZE;
  1044. break;
  1045. case VEX_PREFIX_F3:
  1046. attrMask |= ATTR_XS;
  1047. break;
  1048. case VEX_PREFIX_F2:
  1049. attrMask |= ATTR_XD;
  1050. break;
  1051. }
  1052. if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
  1053. attrMask |= ATTR_VEXL;
  1054. } else if (insn->vectorExtensionType == TYPE_XOP) {
  1055. switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
  1056. case VEX_PREFIX_66:
  1057. attrMask |= ATTR_OPSIZE;
  1058. break;
  1059. case VEX_PREFIX_F3:
  1060. attrMask |= ATTR_XS;
  1061. break;
  1062. case VEX_PREFIX_F2:
  1063. attrMask |= ATTR_XD;
  1064. break;
  1065. }
  1066. if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
  1067. attrMask |= ATTR_VEXL;
  1068. } else {
  1069. return -1;
  1070. }
  1071. } else if (!insn->mandatoryPrefix) {
  1072. // If we don't have mandatory prefix we should use legacy prefixes here
  1073. if (insn->hasOpSize && (insn->mode != MODE_16BIT))
  1074. attrMask |= ATTR_OPSIZE;
  1075. if (insn->hasAdSize)
  1076. attrMask |= ATTR_ADSIZE;
  1077. if (insn->opcodeType == ONEBYTE) {
  1078. if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
  1079. // Special support for PAUSE
  1080. attrMask |= ATTR_XS;
  1081. } else {
  1082. if (insn->repeatPrefix == 0xf2)
  1083. attrMask |= ATTR_XD;
  1084. else if (insn->repeatPrefix == 0xf3)
  1085. attrMask |= ATTR_XS;
  1086. }
  1087. } else {
  1088. switch (insn->mandatoryPrefix) {
  1089. case 0xf2:
  1090. attrMask |= ATTR_XD;
  1091. break;
  1092. case 0xf3:
  1093. attrMask |= ATTR_XS;
  1094. break;
  1095. case 0x66:
  1096. if (insn->mode != MODE_16BIT)
  1097. attrMask |= ATTR_OPSIZE;
  1098. if (insn->hasAdSize)
  1099. attrMask |= ATTR_ADSIZE;
  1100. break;
  1101. case 0x67:
  1102. attrMask |= ATTR_ADSIZE;
  1103. break;
  1104. }
  1105. }
  1106. if (insn->rexPrefix & 0x08) {
  1107. attrMask |= ATTR_REXW;
  1108. attrMask &= ~ATTR_ADSIZE;
  1109. }
  1110. if (insn->mode == MODE_16BIT) {
  1111. // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
  1112. // of the AdSize prefix is inverted w.r.t. 32-bit mode.
  1113. if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
  1114. attrMask ^= ATTR_ADSIZE;
  1115. // If we're in 16-bit mode and this is one of the relative jumps and opsize
  1116. // prefix isn't present, we need to force the opsize attribute since the
  1117. // prefix is inverted relative to 32-bit mode.
  1118. if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
  1119. (insn->opcode == 0xE8 || insn->opcode == 0xE9))
  1120. attrMask |= ATTR_OPSIZE;
  1121. if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
  1122. insn->opcode >= 0x80 && insn->opcode <= 0x8F)
  1123. attrMask |= ATTR_OPSIZE;
  1124. }
  1125. if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
  1126. return -1;
  1127. // The following clauses compensate for limitations of the tables.
  1128. if (insn->mode != MODE_64BIT &&
  1129. insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
  1130. // The tables can't distinquish between cases where the W-bit is used to
  1131. // select register size and cases where its a required part of the opcode.
  1132. if ((insn->vectorExtensionType == TYPE_EVEX &&
  1133. wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
  1134. (insn->vectorExtensionType == TYPE_VEX_3B &&
  1135. wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
  1136. (insn->vectorExtensionType == TYPE_XOP &&
  1137. wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
  1138. uint16_t instructionIDWithREXW;
  1139. if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
  1140. attrMask | ATTR_REXW)) {
  1141. insn->instructionID = instructionID;
  1142. insn->spec = &INSTRUCTIONS_SYM[instructionID];
  1143. return 0;
  1144. }
  1145. auto SpecName = mii->getName(instructionIDWithREXW);
  1146. // If not a 64-bit instruction. Switch the opcode.
  1147. if (!is64Bit(SpecName.data())) {
  1148. insn->instructionID = instructionIDWithREXW;
  1149. insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
  1150. return 0;
  1151. }
  1152. }
  1153. }
  1154. // Absolute moves, umonitor, and movdir64b need special handling.
  1155. // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
  1156. // inverted w.r.t.
  1157. // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
  1158. // any position.
  1159. if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
  1160. (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
  1161. (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
  1162. // Make sure we observed the prefixes in any position.
  1163. if (insn->hasAdSize)
  1164. attrMask |= ATTR_ADSIZE;
  1165. if (insn->hasOpSize)
  1166. attrMask |= ATTR_OPSIZE;
  1167. // In 16-bit, invert the attributes.
  1168. if (insn->mode == MODE_16BIT) {
  1169. attrMask ^= ATTR_ADSIZE;
  1170. // The OpSize attribute is only valid with the absolute moves.
  1171. if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
  1172. attrMask ^= ATTR_OPSIZE;
  1173. }
  1174. if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
  1175. return -1;
  1176. insn->instructionID = instructionID;
  1177. insn->spec = &INSTRUCTIONS_SYM[instructionID];
  1178. return 0;
  1179. }
  1180. if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
  1181. !(attrMask & ATTR_OPSIZE)) {
  1182. // The instruction tables make no distinction between instructions that
  1183. // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
  1184. // particular spot (i.e., many MMX operations). In general we're
  1185. // conservative, but in the specific case where OpSize is present but not in
  1186. // the right place we check if there's a 16-bit operation.
  1187. const struct InstructionSpecifier *spec;
  1188. uint16_t instructionIDWithOpsize;
  1189. llvm::StringRef specName, specWithOpSizeName;
  1190. spec = &INSTRUCTIONS_SYM[instructionID];
  1191. if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
  1192. attrMask | ATTR_OPSIZE)) {
  1193. // ModRM required with OpSize but not present. Give up and return the
  1194. // version without OpSize set.
  1195. insn->instructionID = instructionID;
  1196. insn->spec = spec;
  1197. return 0;
  1198. }
  1199. specName = mii->getName(instructionID);
  1200. specWithOpSizeName = mii->getName(instructionIDWithOpsize);
  1201. if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
  1202. (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
  1203. insn->instructionID = instructionIDWithOpsize;
  1204. insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
  1205. } else {
  1206. insn->instructionID = instructionID;
  1207. insn->spec = spec;
  1208. }
  1209. return 0;
  1210. }
  1211. if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
  1212. insn->rexPrefix & 0x01) {
  1213. // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
  1214. // as XCHG %r8, %eax.
  1215. const struct InstructionSpecifier *spec;
  1216. uint16_t instructionIDWithNewOpcode;
  1217. const struct InstructionSpecifier *specWithNewOpcode;
  1218. spec = &INSTRUCTIONS_SYM[instructionID];
  1219. // Borrow opcode from one of the other XCHGar opcodes
  1220. insn->opcode = 0x91;
  1221. if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
  1222. attrMask)) {
  1223. insn->opcode = 0x90;
  1224. insn->instructionID = instructionID;
  1225. insn->spec = spec;
  1226. return 0;
  1227. }
  1228. specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
  1229. // Change back
  1230. insn->opcode = 0x90;
  1231. insn->instructionID = instructionIDWithNewOpcode;
  1232. insn->spec = specWithNewOpcode;
  1233. return 0;
  1234. }
  1235. insn->instructionID = instructionID;
  1236. insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
  1237. return 0;
  1238. }
  1239. // Read an operand from the opcode field of an instruction and interprets it
  1240. // appropriately given the operand width. Handles AddRegFrm instructions.
  1241. //
  1242. // @param insn - the instruction whose opcode field is to be read.
  1243. // @param size - The width (in bytes) of the register being specified.
  1244. // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
  1245. // RAX.
  1246. // @return - 0 on success; nonzero otherwise.
  1247. static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
  1248. LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
  1249. if (size == 0)
  1250. size = insn->registerSize;
  1251. switch (size) {
  1252. case 1:
  1253. insn->opcodeRegister = (Reg)(
  1254. MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1255. if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
  1256. insn->opcodeRegister < MODRM_REG_AL + 0x8) {
  1257. insn->opcodeRegister =
  1258. (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
  1259. }
  1260. break;
  1261. case 2:
  1262. insn->opcodeRegister = (Reg)(
  1263. MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1264. break;
  1265. case 4:
  1266. insn->opcodeRegister =
  1267. (Reg)(MODRM_REG_EAX +
  1268. ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1269. break;
  1270. case 8:
  1271. insn->opcodeRegister =
  1272. (Reg)(MODRM_REG_RAX +
  1273. ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1274. break;
  1275. }
  1276. return 0;
  1277. }
  1278. // Consume an immediate operand from an instruction, given the desired operand
  1279. // size.
  1280. //
  1281. // @param insn - The instruction whose operand is to be read.
  1282. // @param size - The width (in bytes) of the operand.
  1283. // @return - 0 if the immediate was successfully consumed; nonzero
  1284. // otherwise.
  1285. static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
  1286. uint8_t imm8;
  1287. uint16_t imm16;
  1288. uint32_t imm32;
  1289. uint64_t imm64;
  1290. LLVM_DEBUG(dbgs() << "readImmediate()");
  1291. assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
  1292. insn->immediateSize = size;
  1293. insn->immediateOffset = insn->readerCursor - insn->startLocation;
  1294. switch (size) {
  1295. case 1:
  1296. if (consume(insn, imm8))
  1297. return -1;
  1298. insn->immediates[insn->numImmediatesConsumed] = imm8;
  1299. break;
  1300. case 2:
  1301. if (consume(insn, imm16))
  1302. return -1;
  1303. insn->immediates[insn->numImmediatesConsumed] = imm16;
  1304. break;
  1305. case 4:
  1306. if (consume(insn, imm32))
  1307. return -1;
  1308. insn->immediates[insn->numImmediatesConsumed] = imm32;
  1309. break;
  1310. case 8:
  1311. if (consume(insn, imm64))
  1312. return -1;
  1313. insn->immediates[insn->numImmediatesConsumed] = imm64;
  1314. break;
  1315. default:
  1316. llvm_unreachable("invalid size");
  1317. }
  1318. insn->numImmediatesConsumed++;
  1319. return 0;
  1320. }
  1321. // Consume vvvv from an instruction if it has a VEX prefix.
  1322. static int readVVVV(struct InternalInstruction *insn) {
  1323. LLVM_DEBUG(dbgs() << "readVVVV()");
  1324. int vvvv;
  1325. if (insn->vectorExtensionType == TYPE_EVEX)
  1326. vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
  1327. vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
  1328. else if (insn->vectorExtensionType == TYPE_VEX_3B)
  1329. vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
  1330. else if (insn->vectorExtensionType == TYPE_VEX_2B)
  1331. vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
  1332. else if (insn->vectorExtensionType == TYPE_XOP)
  1333. vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
  1334. else
  1335. return -1;
  1336. if (insn->mode != MODE_64BIT)
  1337. vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
  1338. insn->vvvv = static_cast<Reg>(vvvv);
  1339. return 0;
  1340. }
  1341. // Read an mask register from the opcode field of an instruction.
  1342. //
  1343. // @param insn - The instruction whose opcode field is to be read.
  1344. // @return - 0 on success; nonzero otherwise.
  1345. static int readMaskRegister(struct InternalInstruction *insn) {
  1346. LLVM_DEBUG(dbgs() << "readMaskRegister()");
  1347. if (insn->vectorExtensionType != TYPE_EVEX)
  1348. return -1;
  1349. insn->writemask =
  1350. static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
  1351. return 0;
  1352. }
  1353. // Consults the specifier for an instruction and consumes all
  1354. // operands for that instruction, interpreting them as it goes.
  1355. static int readOperands(struct InternalInstruction *insn) {
  1356. int hasVVVV, needVVVV;
  1357. int sawRegImm = 0;
  1358. LLVM_DEBUG(dbgs() << "readOperands()");
  1359. // If non-zero vvvv specified, make sure one of the operands uses it.
  1360. hasVVVV = !readVVVV(insn);
  1361. needVVVV = hasVVVV && (insn->vvvv != 0);
  1362. for (const auto &Op : x86OperandSets[insn->spec->operands]) {
  1363. switch (Op.encoding) {
  1364. case ENCODING_NONE:
  1365. case ENCODING_SI:
  1366. case ENCODING_DI:
  1367. break;
  1368. CASE_ENCODING_VSIB:
  1369. // VSIB can use the V2 bit so check only the other bits.
  1370. if (needVVVV)
  1371. needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
  1372. if (readModRM(insn))
  1373. return -1;
  1374. // Reject if SIB wasn't used.
  1375. if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
  1376. return -1;
  1377. // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
  1378. if (insn->sibIndex == SIB_INDEX_NONE)
  1379. insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
  1380. // If EVEX.v2 is set this is one of the 16-31 registers.
  1381. if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
  1382. v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1383. insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
  1384. // Adjust the index register to the correct size.
  1385. switch ((OperandType)Op.type) {
  1386. default:
  1387. debug("Unhandled VSIB index type");
  1388. return -1;
  1389. case TYPE_MVSIBX:
  1390. insn->sibIndex =
  1391. (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
  1392. break;
  1393. case TYPE_MVSIBY:
  1394. insn->sibIndex =
  1395. (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
  1396. break;
  1397. case TYPE_MVSIBZ:
  1398. insn->sibIndex =
  1399. (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
  1400. break;
  1401. }
  1402. // Apply the AVX512 compressed displacement scaling factor.
  1403. if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
  1404. insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
  1405. break;
  1406. case ENCODING_SIB:
  1407. // Reject if SIB wasn't used.
  1408. if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
  1409. return -1;
  1410. if (readModRM(insn))
  1411. return -1;
  1412. if (fixupReg(insn, &Op))
  1413. return -1;
  1414. break;
  1415. case ENCODING_REG:
  1416. CASE_ENCODING_RM:
  1417. if (readModRM(insn))
  1418. return -1;
  1419. if (fixupReg(insn, &Op))
  1420. return -1;
  1421. // Apply the AVX512 compressed displacement scaling factor.
  1422. if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
  1423. insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
  1424. break;
  1425. case ENCODING_IB:
  1426. if (sawRegImm) {
  1427. // Saw a register immediate so don't read again and instead split the
  1428. // previous immediate. FIXME: This is a hack.
  1429. insn->immediates[insn->numImmediatesConsumed] =
  1430. insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
  1431. ++insn->numImmediatesConsumed;
  1432. break;
  1433. }
  1434. if (readImmediate(insn, 1))
  1435. return -1;
  1436. if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
  1437. sawRegImm = 1;
  1438. break;
  1439. case ENCODING_IW:
  1440. if (readImmediate(insn, 2))
  1441. return -1;
  1442. break;
  1443. case ENCODING_ID:
  1444. if (readImmediate(insn, 4))
  1445. return -1;
  1446. break;
  1447. case ENCODING_IO:
  1448. if (readImmediate(insn, 8))
  1449. return -1;
  1450. break;
  1451. case ENCODING_Iv:
  1452. if (readImmediate(insn, insn->immediateSize))
  1453. return -1;
  1454. break;
  1455. case ENCODING_Ia:
  1456. if (readImmediate(insn, insn->addressSize))
  1457. return -1;
  1458. break;
  1459. case ENCODING_IRC:
  1460. insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
  1461. lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
  1462. break;
  1463. case ENCODING_RB:
  1464. if (readOpcodeRegister(insn, 1))
  1465. return -1;
  1466. break;
  1467. case ENCODING_RW:
  1468. if (readOpcodeRegister(insn, 2))
  1469. return -1;
  1470. break;
  1471. case ENCODING_RD:
  1472. if (readOpcodeRegister(insn, 4))
  1473. return -1;
  1474. break;
  1475. case ENCODING_RO:
  1476. if (readOpcodeRegister(insn, 8))
  1477. return -1;
  1478. break;
  1479. case ENCODING_Rv:
  1480. if (readOpcodeRegister(insn, 0))
  1481. return -1;
  1482. break;
  1483. case ENCODING_CC:
  1484. insn->immediates[1] = insn->opcode & 0xf;
  1485. break;
  1486. case ENCODING_FP:
  1487. break;
  1488. case ENCODING_VVVV:
  1489. needVVVV = 0; // Mark that we have found a VVVV operand.
  1490. if (!hasVVVV)
  1491. return -1;
  1492. if (insn->mode != MODE_64BIT)
  1493. insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
  1494. if (fixupReg(insn, &Op))
  1495. return -1;
  1496. break;
  1497. case ENCODING_WRITEMASK:
  1498. if (readMaskRegister(insn))
  1499. return -1;
  1500. break;
  1501. case ENCODING_DUP:
  1502. break;
  1503. default:
  1504. LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
  1505. return -1;
  1506. }
  1507. }
  1508. // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
  1509. if (needVVVV)
  1510. return -1;
  1511. return 0;
  1512. }
  1513. namespace llvm {
  1514. // Fill-ins to make the compiler happy. These constants are never actually
  1515. // assigned; they are just filler to make an automatically-generated switch
  1516. // statement work.
  1517. namespace X86 {
  1518. enum {
  1519. BX_SI = 500,
  1520. BX_DI = 501,
  1521. BP_SI = 502,
  1522. BP_DI = 503,
  1523. sib = 504,
  1524. sib64 = 505
  1525. };
  1526. } // namespace X86
  1527. } // namespace llvm
  1528. static bool translateInstruction(MCInst &target,
  1529. InternalInstruction &source,
  1530. const MCDisassembler *Dis);
  1531. namespace {
  1532. /// Generic disassembler for all X86 platforms. All each platform class should
  1533. /// have to do is subclass the constructor, and provide a different
  1534. /// disassemblerMode value.
  1535. class X86GenericDisassembler : public MCDisassembler {
  1536. std::unique_ptr<const MCInstrInfo> MII;
  1537. public:
  1538. X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
  1539. std::unique_ptr<const MCInstrInfo> MII);
  1540. public:
  1541. DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
  1542. ArrayRef<uint8_t> Bytes, uint64_t Address,
  1543. raw_ostream &cStream) const override;
  1544. private:
  1545. DisassemblerMode fMode;
  1546. };
  1547. } // namespace
  1548. X86GenericDisassembler::X86GenericDisassembler(
  1549. const MCSubtargetInfo &STI,
  1550. MCContext &Ctx,
  1551. std::unique_ptr<const MCInstrInfo> MII)
  1552. : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
  1553. const FeatureBitset &FB = STI.getFeatureBits();
  1554. if (FB[X86::Mode16Bit]) {
  1555. fMode = MODE_16BIT;
  1556. return;
  1557. } else if (FB[X86::Mode32Bit]) {
  1558. fMode = MODE_32BIT;
  1559. return;
  1560. } else if (FB[X86::Mode64Bit]) {
  1561. fMode = MODE_64BIT;
  1562. return;
  1563. }
  1564. llvm_unreachable("Invalid CPU mode");
  1565. }
  1566. MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
  1567. MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
  1568. raw_ostream &CStream) const {
  1569. CommentStream = &CStream;
  1570. InternalInstruction Insn;
  1571. memset(&Insn, 0, sizeof(InternalInstruction));
  1572. Insn.bytes = Bytes;
  1573. Insn.startLocation = Address;
  1574. Insn.readerCursor = Address;
  1575. Insn.mode = fMode;
  1576. if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
  1577. getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
  1578. readOperands(&Insn)) {
  1579. Size = Insn.readerCursor - Address;
  1580. return Fail;
  1581. }
  1582. Insn.operands = x86OperandSets[Insn.spec->operands];
  1583. Insn.length = Insn.readerCursor - Insn.startLocation;
  1584. Size = Insn.length;
  1585. if (Size > 15)
  1586. LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
  1587. bool Ret = translateInstruction(Instr, Insn, this);
  1588. if (!Ret) {
  1589. unsigned Flags = X86::IP_NO_PREFIX;
  1590. if (Insn.hasAdSize)
  1591. Flags |= X86::IP_HAS_AD_SIZE;
  1592. if (!Insn.mandatoryPrefix) {
  1593. if (Insn.hasOpSize)
  1594. Flags |= X86::IP_HAS_OP_SIZE;
  1595. if (Insn.repeatPrefix == 0xf2)
  1596. Flags |= X86::IP_HAS_REPEAT_NE;
  1597. else if (Insn.repeatPrefix == 0xf3 &&
  1598. // It should not be 'pause' f3 90
  1599. Insn.opcode != 0x90)
  1600. Flags |= X86::IP_HAS_REPEAT;
  1601. if (Insn.hasLockPrefix)
  1602. Flags |= X86::IP_HAS_LOCK;
  1603. }
  1604. Instr.setFlags(Flags);
  1605. }
  1606. return (!Ret) ? Success : Fail;
  1607. }
  1608. //
  1609. // Private code that translates from struct InternalInstructions to MCInsts.
  1610. //
  1611. /// translateRegister - Translates an internal register to the appropriate LLVM
  1612. /// register, and appends it as an operand to an MCInst.
  1613. ///
  1614. /// @param mcInst - The MCInst to append to.
  1615. /// @param reg - The Reg to append.
  1616. static void translateRegister(MCInst &mcInst, Reg reg) {
  1617. #define ENTRY(x) X86::x,
  1618. static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
  1619. #undef ENTRY
  1620. MCPhysReg llvmRegnum = llvmRegnums[reg];
  1621. mcInst.addOperand(MCOperand::createReg(llvmRegnum));
  1622. }
  1623. /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
  1624. /// immediate Value in the MCInst.
  1625. ///
  1626. /// @param Value - The immediate Value, has had any PC adjustment made by
  1627. /// the caller.
  1628. /// @param isBranch - If the instruction is a branch instruction
  1629. /// @param Address - The starting address of the instruction
  1630. /// @param Offset - The byte offset to this immediate in the instruction
  1631. /// @param Width - The byte width of this immediate in the instruction
  1632. ///
  1633. /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
  1634. /// called then that function is called to get any symbolic information for the
  1635. /// immediate in the instruction using the Address, Offset and Width. If that
  1636. /// returns non-zero then the symbolic information it returns is used to create
  1637. /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
  1638. /// returns zero and isBranch is true then a symbol look up for immediate Value
  1639. /// is done and if a symbol is found an MCExpr is created with that, else
  1640. /// an MCExpr with the immediate Value is created. This function returns true
  1641. /// if it adds an operand to the MCInst and false otherwise.
  1642. static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
  1643. uint64_t Address, uint64_t Offset,
  1644. uint64_t Width, MCInst &MI,
  1645. const MCDisassembler *Dis) {
  1646. return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
  1647. Offset, Width);
  1648. }
  1649. /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
  1650. /// referenced by a load instruction with the base register that is the rip.
  1651. /// These can often be addresses in a literal pool. The Address of the
  1652. /// instruction and its immediate Value are used to determine the address
  1653. /// being referenced in the literal pool entry. The SymbolLookUp call back will
  1654. /// return a pointer to a literal 'C' string if the referenced address is an
  1655. /// address into a section with 'C' string literals.
  1656. static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
  1657. const void *Decoder) {
  1658. const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
  1659. Dis->tryAddingPcLoadReferenceComment(Value, Address);
  1660. }
  1661. static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
  1662. 0, // SEG_OVERRIDE_NONE
  1663. X86::CS,
  1664. X86::SS,
  1665. X86::DS,
  1666. X86::ES,
  1667. X86::FS,
  1668. X86::GS
  1669. };
  1670. /// translateSrcIndex - Appends a source index operand to an MCInst.
  1671. ///
  1672. /// @param mcInst - The MCInst to append to.
  1673. /// @param insn - The internal instruction.
  1674. static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
  1675. unsigned baseRegNo;
  1676. if (insn.mode == MODE_64BIT)
  1677. baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
  1678. else if (insn.mode == MODE_32BIT)
  1679. baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
  1680. else {
  1681. assert(insn.mode == MODE_16BIT);
  1682. baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
  1683. }
  1684. MCOperand baseReg = MCOperand::createReg(baseRegNo);
  1685. mcInst.addOperand(baseReg);
  1686. MCOperand segmentReg;
  1687. segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
  1688. mcInst.addOperand(segmentReg);
  1689. return false;
  1690. }
  1691. /// translateDstIndex - Appends a destination index operand to an MCInst.
  1692. ///
  1693. /// @param mcInst - The MCInst to append to.
  1694. /// @param insn - The internal instruction.
  1695. static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
  1696. unsigned baseRegNo;
  1697. if (insn.mode == MODE_64BIT)
  1698. baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
  1699. else if (insn.mode == MODE_32BIT)
  1700. baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
  1701. else {
  1702. assert(insn.mode == MODE_16BIT);
  1703. baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
  1704. }
  1705. MCOperand baseReg = MCOperand::createReg(baseRegNo);
  1706. mcInst.addOperand(baseReg);
  1707. return false;
  1708. }
  1709. /// translateImmediate - Appends an immediate operand to an MCInst.
  1710. ///
  1711. /// @param mcInst - The MCInst to append to.
  1712. /// @param immediate - The immediate value to append.
  1713. /// @param operand - The operand, as stored in the descriptor table.
  1714. /// @param insn - The internal instruction.
  1715. static void translateImmediate(MCInst &mcInst, uint64_t immediate,
  1716. const OperandSpecifier &operand,
  1717. InternalInstruction &insn,
  1718. const MCDisassembler *Dis) {
  1719. // Sign-extend the immediate if necessary.
  1720. OperandType type = (OperandType)operand.type;
  1721. bool isBranch = false;
  1722. uint64_t pcrel = 0;
  1723. if (type == TYPE_REL) {
  1724. isBranch = true;
  1725. pcrel = insn.startLocation +
  1726. insn.immediateOffset + insn.immediateSize;
  1727. switch (operand.encoding) {
  1728. default:
  1729. break;
  1730. case ENCODING_Iv:
  1731. switch (insn.displacementSize) {
  1732. default:
  1733. break;
  1734. case 1:
  1735. if(immediate & 0x80)
  1736. immediate |= ~(0xffull);
  1737. break;
  1738. case 2:
  1739. if(immediate & 0x8000)
  1740. immediate |= ~(0xffffull);
  1741. break;
  1742. case 4:
  1743. if(immediate & 0x80000000)
  1744. immediate |= ~(0xffffffffull);
  1745. break;
  1746. case 8:
  1747. break;
  1748. }
  1749. break;
  1750. case ENCODING_IB:
  1751. if(immediate & 0x80)
  1752. immediate |= ~(0xffull);
  1753. break;
  1754. case ENCODING_IW:
  1755. if(immediate & 0x8000)
  1756. immediate |= ~(0xffffull);
  1757. break;
  1758. case ENCODING_ID:
  1759. if(immediate & 0x80000000)
  1760. immediate |= ~(0xffffffffull);
  1761. break;
  1762. }
  1763. }
  1764. // By default sign-extend all X86 immediates based on their encoding.
  1765. else if (type == TYPE_IMM) {
  1766. switch (operand.encoding) {
  1767. default:
  1768. break;
  1769. case ENCODING_IB:
  1770. if(immediate & 0x80)
  1771. immediate |= ~(0xffull);
  1772. break;
  1773. case ENCODING_IW:
  1774. if(immediate & 0x8000)
  1775. immediate |= ~(0xffffull);
  1776. break;
  1777. case ENCODING_ID:
  1778. if(immediate & 0x80000000)
  1779. immediate |= ~(0xffffffffull);
  1780. break;
  1781. case ENCODING_IO:
  1782. break;
  1783. }
  1784. }
  1785. switch (type) {
  1786. case TYPE_XMM:
  1787. mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
  1788. return;
  1789. case TYPE_YMM:
  1790. mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
  1791. return;
  1792. case TYPE_ZMM:
  1793. mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
  1794. return;
  1795. default:
  1796. // operand is 64 bits wide. Do nothing.
  1797. break;
  1798. }
  1799. if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
  1800. insn.immediateOffset, insn.immediateSize,
  1801. mcInst, Dis))
  1802. mcInst.addOperand(MCOperand::createImm(immediate));
  1803. if (type == TYPE_MOFFS) {
  1804. MCOperand segmentReg;
  1805. segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
  1806. mcInst.addOperand(segmentReg);
  1807. }
  1808. }
  1809. /// translateRMRegister - Translates a register stored in the R/M field of the
  1810. /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
  1811. /// @param mcInst - The MCInst to append to.
  1812. /// @param insn - The internal instruction to extract the R/M field
  1813. /// from.
  1814. /// @return - 0 on success; -1 otherwise
  1815. static bool translateRMRegister(MCInst &mcInst,
  1816. InternalInstruction &insn) {
  1817. if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
  1818. debug("A R/M register operand may not have a SIB byte");
  1819. return true;
  1820. }
  1821. switch (insn.eaBase) {
  1822. default:
  1823. debug("Unexpected EA base register");
  1824. return true;
  1825. case EA_BASE_NONE:
  1826. debug("EA_BASE_NONE for ModR/M base");
  1827. return true;
  1828. #define ENTRY(x) case EA_BASE_##x:
  1829. ALL_EA_BASES
  1830. #undef ENTRY
  1831. debug("A R/M register operand may not have a base; "
  1832. "the operand must be a register.");
  1833. return true;
  1834. #define ENTRY(x) \
  1835. case EA_REG_##x: \
  1836. mcInst.addOperand(MCOperand::createReg(X86::x)); break;
  1837. ALL_REGS
  1838. #undef ENTRY
  1839. }
  1840. return false;
  1841. }
  1842. /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
  1843. /// fields of an internal instruction (and possibly its SIB byte) to a memory
  1844. /// operand in LLVM's format, and appends it to an MCInst.
  1845. ///
  1846. /// @param mcInst - The MCInst to append to.
  1847. /// @param insn - The instruction to extract Mod, R/M, and SIB fields
  1848. /// from.
  1849. /// @param ForceSIB - The instruction must use SIB.
  1850. /// @return - 0 on success; nonzero otherwise
  1851. static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
  1852. const MCDisassembler *Dis,
  1853. bool ForceSIB = false) {
  1854. // Addresses in an MCInst are represented as five operands:
  1855. // 1. basereg (register) The R/M base, or (if there is a SIB) the
  1856. // SIB base
  1857. // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
  1858. // scale amount
  1859. // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
  1860. // the index (which is multiplied by the
  1861. // scale amount)
  1862. // 4. displacement (immediate) 0, or the displacement if there is one
  1863. // 5. segmentreg (register) x86_registerNONE for now, but could be set
  1864. // if we have segment overrides
  1865. MCOperand baseReg;
  1866. MCOperand scaleAmount;
  1867. MCOperand indexReg;
  1868. MCOperand displacement;
  1869. MCOperand segmentReg;
  1870. uint64_t pcrel = 0;
  1871. if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
  1872. if (insn.sibBase != SIB_BASE_NONE) {
  1873. switch (insn.sibBase) {
  1874. default:
  1875. debug("Unexpected sibBase");
  1876. return true;
  1877. #define ENTRY(x) \
  1878. case SIB_BASE_##x: \
  1879. baseReg = MCOperand::createReg(X86::x); break;
  1880. ALL_SIB_BASES
  1881. #undef ENTRY
  1882. }
  1883. } else {
  1884. baseReg = MCOperand::createReg(X86::NoRegister);
  1885. }
  1886. if (insn.sibIndex != SIB_INDEX_NONE) {
  1887. switch (insn.sibIndex) {
  1888. default:
  1889. debug("Unexpected sibIndex");
  1890. return true;
  1891. #define ENTRY(x) \
  1892. case SIB_INDEX_##x: \
  1893. indexReg = MCOperand::createReg(X86::x); break;
  1894. EA_BASES_32BIT
  1895. EA_BASES_64BIT
  1896. REGS_XMM
  1897. REGS_YMM
  1898. REGS_ZMM
  1899. #undef ENTRY
  1900. }
  1901. } else {
  1902. // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
  1903. // but no index is used and modrm alone should have been enough.
  1904. // -No base register in 32-bit mode. In 64-bit mode this is used to
  1905. // avoid rip-relative addressing.
  1906. // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
  1907. // base always requires a SIB byte.
  1908. // -A scale other than 1 is used.
  1909. if (!ForceSIB &&
  1910. (insn.sibScale != 1 ||
  1911. (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
  1912. (insn.sibBase != SIB_BASE_NONE &&
  1913. insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
  1914. insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
  1915. indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
  1916. X86::RIZ);
  1917. } else
  1918. indexReg = MCOperand::createReg(X86::NoRegister);
  1919. }
  1920. scaleAmount = MCOperand::createImm(insn.sibScale);
  1921. } else {
  1922. switch (insn.eaBase) {
  1923. case EA_BASE_NONE:
  1924. if (insn.eaDisplacement == EA_DISP_NONE) {
  1925. debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
  1926. return true;
  1927. }
  1928. if (insn.mode == MODE_64BIT){
  1929. pcrel = insn.startLocation +
  1930. insn.displacementOffset + insn.displacementSize;
  1931. tryAddingPcLoadReferenceComment(insn.startLocation +
  1932. insn.displacementOffset,
  1933. insn.displacement + pcrel, Dis);
  1934. // Section 2.2.1.6
  1935. baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
  1936. X86::RIP);
  1937. }
  1938. else
  1939. baseReg = MCOperand::createReg(X86::NoRegister);
  1940. indexReg = MCOperand::createReg(X86::NoRegister);
  1941. break;
  1942. case EA_BASE_BX_SI:
  1943. baseReg = MCOperand::createReg(X86::BX);
  1944. indexReg = MCOperand::createReg(X86::SI);
  1945. break;
  1946. case EA_BASE_BX_DI:
  1947. baseReg = MCOperand::createReg(X86::BX);
  1948. indexReg = MCOperand::createReg(X86::DI);
  1949. break;
  1950. case EA_BASE_BP_SI:
  1951. baseReg = MCOperand::createReg(X86::BP);
  1952. indexReg = MCOperand::createReg(X86::SI);
  1953. break;
  1954. case EA_BASE_BP_DI:
  1955. baseReg = MCOperand::createReg(X86::BP);
  1956. indexReg = MCOperand::createReg(X86::DI);
  1957. break;
  1958. default:
  1959. indexReg = MCOperand::createReg(X86::NoRegister);
  1960. switch (insn.eaBase) {
  1961. default:
  1962. debug("Unexpected eaBase");
  1963. return true;
  1964. // Here, we will use the fill-ins defined above. However,
  1965. // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
  1966. // sib and sib64 were handled in the top-level if, so they're only
  1967. // placeholders to keep the compiler happy.
  1968. #define ENTRY(x) \
  1969. case EA_BASE_##x: \
  1970. baseReg = MCOperand::createReg(X86::x); break;
  1971. ALL_EA_BASES
  1972. #undef ENTRY
  1973. #define ENTRY(x) case EA_REG_##x:
  1974. ALL_REGS
  1975. #undef ENTRY
  1976. debug("A R/M memory operand may not be a register; "
  1977. "the base field must be a base.");
  1978. return true;
  1979. }
  1980. }
  1981. scaleAmount = MCOperand::createImm(1);
  1982. }
  1983. displacement = MCOperand::createImm(insn.displacement);
  1984. segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
  1985. mcInst.addOperand(baseReg);
  1986. mcInst.addOperand(scaleAmount);
  1987. mcInst.addOperand(indexReg);
  1988. if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
  1989. insn.startLocation, insn.displacementOffset,
  1990. insn.displacementSize, mcInst, Dis))
  1991. mcInst.addOperand(displacement);
  1992. mcInst.addOperand(segmentReg);
  1993. return false;
  1994. }
  1995. /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
  1996. /// byte of an instruction to LLVM form, and appends it to an MCInst.
  1997. ///
  1998. /// @param mcInst - The MCInst to append to.
  1999. /// @param operand - The operand, as stored in the descriptor table.
  2000. /// @param insn - The instruction to extract Mod, R/M, and SIB fields
  2001. /// from.
  2002. /// @return - 0 on success; nonzero otherwise
  2003. static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
  2004. InternalInstruction &insn, const MCDisassembler *Dis) {
  2005. switch (operand.type) {
  2006. default:
  2007. debug("Unexpected type for a R/M operand");
  2008. return true;
  2009. case TYPE_R8:
  2010. case TYPE_R16:
  2011. case TYPE_R32:
  2012. case TYPE_R64:
  2013. case TYPE_Rv:
  2014. case TYPE_MM64:
  2015. case TYPE_XMM:
  2016. case TYPE_YMM:
  2017. case TYPE_ZMM:
  2018. case TYPE_TMM:
  2019. case TYPE_VK_PAIR:
  2020. case TYPE_VK:
  2021. case TYPE_DEBUGREG:
  2022. case TYPE_CONTROLREG:
  2023. case TYPE_BNDR:
  2024. return translateRMRegister(mcInst, insn);
  2025. case TYPE_M:
  2026. case TYPE_MVSIBX:
  2027. case TYPE_MVSIBY:
  2028. case TYPE_MVSIBZ:
  2029. return translateRMMemory(mcInst, insn, Dis);
  2030. case TYPE_MSIB:
  2031. return translateRMMemory(mcInst, insn, Dis, true);
  2032. }
  2033. }
  2034. /// translateFPRegister - Translates a stack position on the FPU stack to its
  2035. /// LLVM form, and appends it to an MCInst.
  2036. ///
  2037. /// @param mcInst - The MCInst to append to.
  2038. /// @param stackPos - The stack position to translate.
  2039. static void translateFPRegister(MCInst &mcInst,
  2040. uint8_t stackPos) {
  2041. mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
  2042. }
  2043. /// translateMaskRegister - Translates a 3-bit mask register number to
  2044. /// LLVM form, and appends it to an MCInst.
  2045. ///
  2046. /// @param mcInst - The MCInst to append to.
  2047. /// @param maskRegNum - Number of mask register from 0 to 7.
  2048. /// @return - false on success; true otherwise.
  2049. static bool translateMaskRegister(MCInst &mcInst,
  2050. uint8_t maskRegNum) {
  2051. if (maskRegNum >= 8) {
  2052. debug("Invalid mask register number");
  2053. return true;
  2054. }
  2055. mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
  2056. return false;
  2057. }
  2058. /// translateOperand - Translates an operand stored in an internal instruction
  2059. /// to LLVM's format and appends it to an MCInst.
  2060. ///
  2061. /// @param mcInst - The MCInst to append to.
  2062. /// @param operand - The operand, as stored in the descriptor table.
  2063. /// @param insn - The internal instruction.
  2064. /// @return - false on success; true otherwise.
  2065. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
  2066. InternalInstruction &insn,
  2067. const MCDisassembler *Dis) {
  2068. switch (operand.encoding) {
  2069. default:
  2070. debug("Unhandled operand encoding during translation");
  2071. return true;
  2072. case ENCODING_REG:
  2073. translateRegister(mcInst, insn.reg);
  2074. return false;
  2075. case ENCODING_WRITEMASK:
  2076. return translateMaskRegister(mcInst, insn.writemask);
  2077. case ENCODING_SIB:
  2078. CASE_ENCODING_RM:
  2079. CASE_ENCODING_VSIB:
  2080. return translateRM(mcInst, operand, insn, Dis);
  2081. case ENCODING_IB:
  2082. case ENCODING_IW:
  2083. case ENCODING_ID:
  2084. case ENCODING_IO:
  2085. case ENCODING_Iv:
  2086. case ENCODING_Ia:
  2087. translateImmediate(mcInst,
  2088. insn.immediates[insn.numImmediatesTranslated++],
  2089. operand,
  2090. insn,
  2091. Dis);
  2092. return false;
  2093. case ENCODING_IRC:
  2094. mcInst.addOperand(MCOperand::createImm(insn.RC));
  2095. return false;
  2096. case ENCODING_SI:
  2097. return translateSrcIndex(mcInst, insn);
  2098. case ENCODING_DI:
  2099. return translateDstIndex(mcInst, insn);
  2100. case ENCODING_RB:
  2101. case ENCODING_RW:
  2102. case ENCODING_RD:
  2103. case ENCODING_RO:
  2104. case ENCODING_Rv:
  2105. translateRegister(mcInst, insn.opcodeRegister);
  2106. return false;
  2107. case ENCODING_CC:
  2108. mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
  2109. return false;
  2110. case ENCODING_FP:
  2111. translateFPRegister(mcInst, insn.modRM & 7);
  2112. return false;
  2113. case ENCODING_VVVV:
  2114. translateRegister(mcInst, insn.vvvv);
  2115. return false;
  2116. case ENCODING_DUP:
  2117. return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
  2118. insn, Dis);
  2119. }
  2120. }
  2121. /// translateInstruction - Translates an internal instruction and all its
  2122. /// operands to an MCInst.
  2123. ///
  2124. /// @param mcInst - The MCInst to populate with the instruction's data.
  2125. /// @param insn - The internal instruction.
  2126. /// @return - false on success; true otherwise.
  2127. static bool translateInstruction(MCInst &mcInst,
  2128. InternalInstruction &insn,
  2129. const MCDisassembler *Dis) {
  2130. if (!insn.spec) {
  2131. debug("Instruction has no specification");
  2132. return true;
  2133. }
  2134. mcInst.clear();
  2135. mcInst.setOpcode(insn.instructionID);
  2136. // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
  2137. // prefix bytes should be disassembled as xrelease and xacquire then set the
  2138. // opcode to those instead of the rep and repne opcodes.
  2139. if (insn.xAcquireRelease) {
  2140. if(mcInst.getOpcode() == X86::REP_PREFIX)
  2141. mcInst.setOpcode(X86::XRELEASE_PREFIX);
  2142. else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
  2143. mcInst.setOpcode(X86::XACQUIRE_PREFIX);
  2144. }
  2145. insn.numImmediatesTranslated = 0;
  2146. for (const auto &Op : insn.operands) {
  2147. if (Op.encoding != ENCODING_NONE) {
  2148. if (translateOperand(mcInst, Op, insn, Dis)) {
  2149. return true;
  2150. }
  2151. }
  2152. }
  2153. return false;
  2154. }
  2155. static MCDisassembler *createX86Disassembler(const Target &T,
  2156. const MCSubtargetInfo &STI,
  2157. MCContext &Ctx) {
  2158. std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
  2159. return new X86GenericDisassembler(STI, Ctx, std::move(MII));
  2160. }
  2161. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
  2162. // Register the disassembler.
  2163. TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
  2164. createX86Disassembler);
  2165. TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),
  2166. createX86Disassembler);
  2167. }