X86Disassembler.cpp 79 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345
  1. //===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file is part of the X86 Disassembler.
  10. // It contains code to translate the data produced by the decoder into
  11. // MCInsts.
  12. //
  13. //
  14. // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
  15. // 64-bit X86 instruction sets. The main decode sequence for an assembly
  16. // instruction in this disassembler is:
  17. //
  18. // 1. Read the prefix bytes and determine the attributes of the instruction.
  19. // These attributes, recorded in enum attributeBits
  20. // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
  21. // provides a mapping from bitmasks to contexts, which are represented by
  22. // enum InstructionContext (ibid.).
  23. //
  24. // 2. Read the opcode, and determine what kind of opcode it is. The
  25. // disassembler distinguishes four kinds of opcodes, which are enumerated in
  26. // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
  27. // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
  28. // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
  29. //
  30. // 3. Depending on the opcode type, look in one of four ClassDecision structures
  31. // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
  32. // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
  33. // a ModRMDecision (ibid.).
  34. //
  35. // 4. Some instructions, such as escape opcodes or extended opcodes, or even
  36. // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
  37. // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
  38. // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
  39. // ModR/M byte is required and how to interpret it.
  40. //
  41. // 5. After resolving the ModRMDecision, the disassembler has a unique ID
  42. // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
  43. // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
  44. // meanings of its operands.
  45. //
  46. // 6. For each operand, its encoding is an entry from OperandEncoding
  47. // (X86DisassemblerDecoderCommon.h) and its type is an entry from
  48. // OperandType (ibid.). The encoding indicates how to read it from the
  49. // instruction; the type indicates how to interpret the value once it has
  50. // been read. For example, a register operand could be stored in the R/M
  51. // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
  52. // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
  53. // register, for instance). Given this information, the operands can be
  54. // extracted and interpreted.
  55. //
  56. // 7. As the last step, the disassembler translates the instruction information
  57. // and operands into a format understandable by the client - in this case, an
  58. // MCInst for use by the MC infrastructure.
  59. //
  60. // The disassembler is broken broadly into two parts: the table emitter that
  61. // emits the instruction decode tables discussed above during compilation, and
  62. // the disassembler itself. The table emitter is documented in more detail in
  63. // utils/TableGen/X86DisassemblerEmitter.h.
  64. //
  65. // X86Disassembler.cpp contains the code responsible for step 7, and for
  66. // invoking the decoder to execute steps 1-6.
  67. // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
  68. // table emitter and the disassembler.
  69. // X86DisassemblerDecoder.h contains the public interface of the decoder,
  70. // factored out into C for possible use by other projects.
  71. // X86DisassemblerDecoder.c contains the source code of the decoder, which is
  72. // responsible for steps 1-6.
  73. //
  74. //===----------------------------------------------------------------------===//
  75. #include "MCTargetDesc/X86BaseInfo.h"
  76. #include "MCTargetDesc/X86MCTargetDesc.h"
  77. #include "TargetInfo/X86TargetInfo.h"
  78. #include "X86DisassemblerDecoder.h"
  79. #include "llvm/MC/MCContext.h"
  80. #include "llvm/MC/MCDisassembler/MCDisassembler.h"
  81. #include "llvm/MC/MCExpr.h"
  82. #include "llvm/MC/MCInst.h"
  83. #include "llvm/MC/MCInstrInfo.h"
  84. #include "llvm/MC/MCSubtargetInfo.h"
  85. #include "llvm/MC/TargetRegistry.h"
  86. #include "llvm/Support/Debug.h"
  87. #include "llvm/Support/Format.h"
  88. #include "llvm/Support/raw_ostream.h"
  89. using namespace llvm;
  90. using namespace llvm::X86Disassembler;
  91. #define DEBUG_TYPE "x86-disassembler"
  92. #define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
  93. // Specifies whether a ModR/M byte is needed and (if so) which
  94. // instruction each possible value of the ModR/M byte corresponds to. Once
  95. // this information is known, we have narrowed down to a single instruction.
  96. struct ModRMDecision {
  97. uint8_t modrm_type;
  98. uint16_t instructionIDs;
  99. };
  100. // Specifies which set of ModR/M->instruction tables to look at
  101. // given a particular opcode.
  102. struct OpcodeDecision {
  103. ModRMDecision modRMDecisions[256];
  104. };
  105. // Specifies which opcode->instruction tables to look at given
  106. // a particular context (set of attributes). Since there are many possible
  107. // contexts, the decoder first uses CONTEXTS_SYM to determine which context
  108. // applies given a specific set of attributes. Hence there are only IC_max
  109. // entries in this table, rather than 2^(ATTR_max).
  110. struct ContextDecision {
  111. OpcodeDecision opcodeDecisions[IC_max];
  112. };
  113. #include "X86GenDisassemblerTables.inc"
  114. static InstrUID decode(OpcodeType type, InstructionContext insnContext,
  115. uint8_t opcode, uint8_t modRM) {
  116. const struct ModRMDecision *dec;
  117. switch (type) {
  118. case ONEBYTE:
  119. dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  120. break;
  121. case TWOBYTE:
  122. dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  123. break;
  124. case THREEBYTE_38:
  125. dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  126. break;
  127. case THREEBYTE_3A:
  128. dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  129. break;
  130. case XOP8_MAP:
  131. dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  132. break;
  133. case XOP9_MAP:
  134. dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  135. break;
  136. case XOPA_MAP:
  137. dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  138. break;
  139. case THREEDNOW_MAP:
  140. dec =
  141. &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  142. break;
  143. case MAP5:
  144. dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  145. break;
  146. case MAP6:
  147. dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
  148. break;
  149. }
  150. switch (dec->modrm_type) {
  151. default:
  152. llvm_unreachable("Corrupt table! Unknown modrm_type");
  153. return 0;
  154. case MODRM_ONEENTRY:
  155. return modRMTable[dec->instructionIDs];
  156. case MODRM_SPLITRM:
  157. if (modFromModRM(modRM) == 0x3)
  158. return modRMTable[dec->instructionIDs + 1];
  159. return modRMTable[dec->instructionIDs];
  160. case MODRM_SPLITREG:
  161. if (modFromModRM(modRM) == 0x3)
  162. return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
  163. return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
  164. case MODRM_SPLITMISC:
  165. if (modFromModRM(modRM) == 0x3)
  166. return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
  167. return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
  168. case MODRM_FULL:
  169. return modRMTable[dec->instructionIDs + modRM];
  170. }
  171. }
  172. static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
  173. uint64_t offset = insn->readerCursor - insn->startLocation;
  174. if (offset >= insn->bytes.size())
  175. return true;
  176. byte = insn->bytes[offset];
  177. return false;
  178. }
  179. template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
  180. auto r = insn->bytes;
  181. uint64_t offset = insn->readerCursor - insn->startLocation;
  182. if (offset + sizeof(T) > r.size())
  183. return true;
  184. ptr = support::endian::read<T>(&r[offset], support::little);
  185. insn->readerCursor += sizeof(T);
  186. return false;
  187. }
  188. static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
  189. return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
  190. }
  191. // Consumes all of an instruction's prefix bytes, and marks the
  192. // instruction as having them. Also sets the instruction's default operand,
  193. // address, and other relevant data sizes to report operands correctly.
  194. //
  195. // insn must not be empty.
  196. static int readPrefixes(struct InternalInstruction *insn) {
  197. bool isPrefix = true;
  198. uint8_t byte = 0;
  199. uint8_t nextByte;
  200. LLVM_DEBUG(dbgs() << "readPrefixes()");
  201. while (isPrefix) {
  202. // If we fail reading prefixes, just stop here and let the opcode reader
  203. // deal with it.
  204. if (consume(insn, byte))
  205. break;
  206. // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
  207. // break and let it be disassembled as a normal "instruction".
  208. if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
  209. break;
  210. if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
  211. // If the byte is 0xf2 or 0xf3, and any of the following conditions are
  212. // met:
  213. // - it is followed by a LOCK (0xf0) prefix
  214. // - it is followed by an xchg instruction
  215. // then it should be disassembled as a xacquire/xrelease not repne/rep.
  216. if (((nextByte == 0xf0) ||
  217. ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
  218. insn->xAcquireRelease = true;
  219. if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
  220. break;
  221. }
  222. // Also if the byte is 0xf3, and the following condition is met:
  223. // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
  224. // "mov mem, imm" (opcode 0xc6/0xc7) instructions.
  225. // then it should be disassembled as an xrelease not rep.
  226. if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
  227. nextByte == 0xc6 || nextByte == 0xc7)) {
  228. insn->xAcquireRelease = true;
  229. break;
  230. }
  231. if (isREX(insn, nextByte)) {
  232. uint8_t nnextByte;
  233. // Go to REX prefix after the current one
  234. if (consume(insn, nnextByte))
  235. return -1;
  236. // We should be able to read next byte after REX prefix
  237. if (peek(insn, nnextByte))
  238. return -1;
  239. --insn->readerCursor;
  240. }
  241. }
  242. switch (byte) {
  243. case 0xf0: // LOCK
  244. insn->hasLockPrefix = true;
  245. break;
  246. case 0xf2: // REPNE/REPNZ
  247. case 0xf3: { // REP or REPE/REPZ
  248. uint8_t nextByte;
  249. if (peek(insn, nextByte))
  250. break;
  251. // TODO:
  252. // 1. There could be several 0x66
  253. // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then
  254. // it's not mandatory prefix
  255. // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
  256. // 0x0f exactly after it to be mandatory prefix
  257. if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66)
  258. // The last of 0xf2 /0xf3 is mandatory prefix
  259. insn->mandatoryPrefix = byte;
  260. insn->repeatPrefix = byte;
  261. break;
  262. }
  263. case 0x2e: // CS segment override -OR- Branch not taken
  264. insn->segmentOverride = SEG_OVERRIDE_CS;
  265. break;
  266. case 0x36: // SS segment override -OR- Branch taken
  267. insn->segmentOverride = SEG_OVERRIDE_SS;
  268. break;
  269. case 0x3e: // DS segment override
  270. insn->segmentOverride = SEG_OVERRIDE_DS;
  271. break;
  272. case 0x26: // ES segment override
  273. insn->segmentOverride = SEG_OVERRIDE_ES;
  274. break;
  275. case 0x64: // FS segment override
  276. insn->segmentOverride = SEG_OVERRIDE_FS;
  277. break;
  278. case 0x65: // GS segment override
  279. insn->segmentOverride = SEG_OVERRIDE_GS;
  280. break;
  281. case 0x66: { // Operand-size override {
  282. uint8_t nextByte;
  283. insn->hasOpSize = true;
  284. if (peek(insn, nextByte))
  285. break;
  286. // 0x66 can't overwrite existing mandatory prefix and should be ignored
  287. if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
  288. insn->mandatoryPrefix = byte;
  289. break;
  290. }
  291. case 0x67: // Address-size override
  292. insn->hasAdSize = true;
  293. break;
  294. default: // Not a prefix byte
  295. isPrefix = false;
  296. break;
  297. }
  298. if (isPrefix)
  299. LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
  300. }
  301. insn->vectorExtensionType = TYPE_NO_VEX_XOP;
  302. if (byte == 0x62) {
  303. uint8_t byte1, byte2;
  304. if (consume(insn, byte1)) {
  305. LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
  306. return -1;
  307. }
  308. if (peek(insn, byte2)) {
  309. LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
  310. return -1;
  311. }
  312. if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
  313. ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
  314. insn->vectorExtensionType = TYPE_EVEX;
  315. } else {
  316. --insn->readerCursor; // unconsume byte1
  317. --insn->readerCursor; // unconsume byte
  318. }
  319. if (insn->vectorExtensionType == TYPE_EVEX) {
  320. insn->vectorExtensionPrefix[0] = byte;
  321. insn->vectorExtensionPrefix[1] = byte1;
  322. if (consume(insn, insn->vectorExtensionPrefix[2])) {
  323. LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
  324. return -1;
  325. }
  326. if (consume(insn, insn->vectorExtensionPrefix[3])) {
  327. LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
  328. return -1;
  329. }
  330. // We simulate the REX prefix for simplicity's sake
  331. if (insn->mode == MODE_64BIT) {
  332. insn->rexPrefix = 0x40 |
  333. (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
  334. (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
  335. (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
  336. (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
  337. }
  338. LLVM_DEBUG(
  339. dbgs() << format(
  340. "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
  341. insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
  342. insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
  343. }
  344. } else if (byte == 0xc4) {
  345. uint8_t byte1;
  346. if (peek(insn, byte1)) {
  347. LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
  348. return -1;
  349. }
  350. if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
  351. insn->vectorExtensionType = TYPE_VEX_3B;
  352. else
  353. --insn->readerCursor;
  354. if (insn->vectorExtensionType == TYPE_VEX_3B) {
  355. insn->vectorExtensionPrefix[0] = byte;
  356. consume(insn, insn->vectorExtensionPrefix[1]);
  357. consume(insn, insn->vectorExtensionPrefix[2]);
  358. // We simulate the REX prefix for simplicity's sake
  359. if (insn->mode == MODE_64BIT)
  360. insn->rexPrefix = 0x40 |
  361. (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
  362. (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
  363. (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
  364. (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
  365. LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
  366. insn->vectorExtensionPrefix[0],
  367. insn->vectorExtensionPrefix[1],
  368. insn->vectorExtensionPrefix[2]));
  369. }
  370. } else if (byte == 0xc5) {
  371. uint8_t byte1;
  372. if (peek(insn, byte1)) {
  373. LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
  374. return -1;
  375. }
  376. if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
  377. insn->vectorExtensionType = TYPE_VEX_2B;
  378. else
  379. --insn->readerCursor;
  380. if (insn->vectorExtensionType == TYPE_VEX_2B) {
  381. insn->vectorExtensionPrefix[0] = byte;
  382. consume(insn, insn->vectorExtensionPrefix[1]);
  383. if (insn->mode == MODE_64BIT)
  384. insn->rexPrefix =
  385. 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
  386. switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
  387. default:
  388. break;
  389. case VEX_PREFIX_66:
  390. insn->hasOpSize = true;
  391. break;
  392. }
  393. LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
  394. insn->vectorExtensionPrefix[0],
  395. insn->vectorExtensionPrefix[1]));
  396. }
  397. } else if (byte == 0x8f) {
  398. uint8_t byte1;
  399. if (peek(insn, byte1)) {
  400. LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
  401. return -1;
  402. }
  403. if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
  404. insn->vectorExtensionType = TYPE_XOP;
  405. else
  406. --insn->readerCursor;
  407. if (insn->vectorExtensionType == TYPE_XOP) {
  408. insn->vectorExtensionPrefix[0] = byte;
  409. consume(insn, insn->vectorExtensionPrefix[1]);
  410. consume(insn, insn->vectorExtensionPrefix[2]);
  411. // We simulate the REX prefix for simplicity's sake
  412. if (insn->mode == MODE_64BIT)
  413. insn->rexPrefix = 0x40 |
  414. (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
  415. (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
  416. (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
  417. (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
  418. switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
  419. default:
  420. break;
  421. case VEX_PREFIX_66:
  422. insn->hasOpSize = true;
  423. break;
  424. }
  425. LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
  426. insn->vectorExtensionPrefix[0],
  427. insn->vectorExtensionPrefix[1],
  428. insn->vectorExtensionPrefix[2]));
  429. }
  430. } else if (isREX(insn, byte)) {
  431. if (peek(insn, nextByte))
  432. return -1;
  433. insn->rexPrefix = byte;
  434. LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
  435. } else
  436. --insn->readerCursor;
  437. if (insn->mode == MODE_16BIT) {
  438. insn->registerSize = (insn->hasOpSize ? 4 : 2);
  439. insn->addressSize = (insn->hasAdSize ? 4 : 2);
  440. insn->displacementSize = (insn->hasAdSize ? 4 : 2);
  441. insn->immediateSize = (insn->hasOpSize ? 4 : 2);
  442. } else if (insn->mode == MODE_32BIT) {
  443. insn->registerSize = (insn->hasOpSize ? 2 : 4);
  444. insn->addressSize = (insn->hasAdSize ? 2 : 4);
  445. insn->displacementSize = (insn->hasAdSize ? 2 : 4);
  446. insn->immediateSize = (insn->hasOpSize ? 2 : 4);
  447. } else if (insn->mode == MODE_64BIT) {
  448. insn->displacementSize = 4;
  449. if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
  450. insn->registerSize = 8;
  451. insn->addressSize = (insn->hasAdSize ? 4 : 8);
  452. insn->immediateSize = 4;
  453. insn->hasOpSize = false;
  454. } else {
  455. insn->registerSize = (insn->hasOpSize ? 2 : 4);
  456. insn->addressSize = (insn->hasAdSize ? 4 : 8);
  457. insn->immediateSize = (insn->hasOpSize ? 2 : 4);
  458. }
  459. }
  460. return 0;
  461. }
  462. // Consumes the SIB byte to determine addressing information.
  463. static int readSIB(struct InternalInstruction *insn) {
  464. SIBBase sibBaseBase = SIB_BASE_NONE;
  465. uint8_t index, base;
  466. LLVM_DEBUG(dbgs() << "readSIB()");
  467. switch (insn->addressSize) {
  468. case 2:
  469. default:
  470. llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
  471. case 4:
  472. insn->sibIndexBase = SIB_INDEX_EAX;
  473. sibBaseBase = SIB_BASE_EAX;
  474. break;
  475. case 8:
  476. insn->sibIndexBase = SIB_INDEX_RAX;
  477. sibBaseBase = SIB_BASE_RAX;
  478. break;
  479. }
  480. if (consume(insn, insn->sib))
  481. return -1;
  482. index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
  483. if (index == 0x4) {
  484. insn->sibIndex = SIB_INDEX_NONE;
  485. } else {
  486. insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
  487. }
  488. insn->sibScale = 1 << scaleFromSIB(insn->sib);
  489. base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
  490. switch (base) {
  491. case 0x5:
  492. case 0xd:
  493. switch (modFromModRM(insn->modRM)) {
  494. case 0x0:
  495. insn->eaDisplacement = EA_DISP_32;
  496. insn->sibBase = SIB_BASE_NONE;
  497. break;
  498. case 0x1:
  499. insn->eaDisplacement = EA_DISP_8;
  500. insn->sibBase = (SIBBase)(sibBaseBase + base);
  501. break;
  502. case 0x2:
  503. insn->eaDisplacement = EA_DISP_32;
  504. insn->sibBase = (SIBBase)(sibBaseBase + base);
  505. break;
  506. default:
  507. llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
  508. }
  509. break;
  510. default:
  511. insn->sibBase = (SIBBase)(sibBaseBase + base);
  512. break;
  513. }
  514. return 0;
  515. }
  516. static int readDisplacement(struct InternalInstruction *insn) {
  517. int8_t d8;
  518. int16_t d16;
  519. int32_t d32;
  520. LLVM_DEBUG(dbgs() << "readDisplacement()");
  521. insn->displacementOffset = insn->readerCursor - insn->startLocation;
  522. switch (insn->eaDisplacement) {
  523. case EA_DISP_NONE:
  524. break;
  525. case EA_DISP_8:
  526. if (consume(insn, d8))
  527. return -1;
  528. insn->displacement = d8;
  529. break;
  530. case EA_DISP_16:
  531. if (consume(insn, d16))
  532. return -1;
  533. insn->displacement = d16;
  534. break;
  535. case EA_DISP_32:
  536. if (consume(insn, d32))
  537. return -1;
  538. insn->displacement = d32;
  539. break;
  540. }
  541. return 0;
  542. }
  543. // Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
  544. static int readModRM(struct InternalInstruction *insn) {
  545. uint8_t mod, rm, reg, evexrm;
  546. LLVM_DEBUG(dbgs() << "readModRM()");
  547. if (insn->consumedModRM)
  548. return 0;
  549. if (consume(insn, insn->modRM))
  550. return -1;
  551. insn->consumedModRM = true;
  552. mod = modFromModRM(insn->modRM);
  553. rm = rmFromModRM(insn->modRM);
  554. reg = regFromModRM(insn->modRM);
  555. // This goes by insn->registerSize to pick the correct register, which messes
  556. // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
  557. // fixupReg().
  558. switch (insn->registerSize) {
  559. case 2:
  560. insn->regBase = MODRM_REG_AX;
  561. insn->eaRegBase = EA_REG_AX;
  562. break;
  563. case 4:
  564. insn->regBase = MODRM_REG_EAX;
  565. insn->eaRegBase = EA_REG_EAX;
  566. break;
  567. case 8:
  568. insn->regBase = MODRM_REG_RAX;
  569. insn->eaRegBase = EA_REG_RAX;
  570. break;
  571. }
  572. reg |= rFromREX(insn->rexPrefix) << 3;
  573. rm |= bFromREX(insn->rexPrefix) << 3;
  574. evexrm = 0;
  575. if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) {
  576. reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
  577. evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
  578. }
  579. insn->reg = (Reg)(insn->regBase + reg);
  580. switch (insn->addressSize) {
  581. case 2: {
  582. EABase eaBaseBase = EA_BASE_BX_SI;
  583. switch (mod) {
  584. case 0x0:
  585. if (rm == 0x6) {
  586. insn->eaBase = EA_BASE_NONE;
  587. insn->eaDisplacement = EA_DISP_16;
  588. if (readDisplacement(insn))
  589. return -1;
  590. } else {
  591. insn->eaBase = (EABase)(eaBaseBase + rm);
  592. insn->eaDisplacement = EA_DISP_NONE;
  593. }
  594. break;
  595. case 0x1:
  596. insn->eaBase = (EABase)(eaBaseBase + rm);
  597. insn->eaDisplacement = EA_DISP_8;
  598. insn->displacementSize = 1;
  599. if (readDisplacement(insn))
  600. return -1;
  601. break;
  602. case 0x2:
  603. insn->eaBase = (EABase)(eaBaseBase + rm);
  604. insn->eaDisplacement = EA_DISP_16;
  605. if (readDisplacement(insn))
  606. return -1;
  607. break;
  608. case 0x3:
  609. insn->eaBase = (EABase)(insn->eaRegBase + rm);
  610. if (readDisplacement(insn))
  611. return -1;
  612. break;
  613. }
  614. break;
  615. }
  616. case 4:
  617. case 8: {
  618. EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
  619. switch (mod) {
  620. case 0x0:
  621. insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
  622. // In determining whether RIP-relative mode is used (rm=5),
  623. // or whether a SIB byte is present (rm=4),
  624. // the extension bits (REX.b and EVEX.x) are ignored.
  625. switch (rm & 7) {
  626. case 0x4: // SIB byte is present
  627. insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
  628. if (readSIB(insn) || readDisplacement(insn))
  629. return -1;
  630. break;
  631. case 0x5: // RIP-relative
  632. insn->eaBase = EA_BASE_NONE;
  633. insn->eaDisplacement = EA_DISP_32;
  634. if (readDisplacement(insn))
  635. return -1;
  636. break;
  637. default:
  638. insn->eaBase = (EABase)(eaBaseBase + rm);
  639. break;
  640. }
  641. break;
  642. case 0x1:
  643. insn->displacementSize = 1;
  644. [[fallthrough]];
  645. case 0x2:
  646. insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
  647. switch (rm & 7) {
  648. case 0x4: // SIB byte is present
  649. insn->eaBase = EA_BASE_sib;
  650. if (readSIB(insn) || readDisplacement(insn))
  651. return -1;
  652. break;
  653. default:
  654. insn->eaBase = (EABase)(eaBaseBase + rm);
  655. if (readDisplacement(insn))
  656. return -1;
  657. break;
  658. }
  659. break;
  660. case 0x3:
  661. insn->eaDisplacement = EA_DISP_NONE;
  662. insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm);
  663. break;
  664. }
  665. break;
  666. }
  667. } // switch (insn->addressSize)
  668. return 0;
  669. }
  670. #define GENERIC_FIXUP_FUNC(name, base, prefix, mask) \
  671. static uint16_t name(struct InternalInstruction *insn, OperandType type, \
  672. uint8_t index, uint8_t *valid) { \
  673. *valid = 1; \
  674. switch (type) { \
  675. default: \
  676. debug("Unhandled register type"); \
  677. *valid = 0; \
  678. return 0; \
  679. case TYPE_Rv: \
  680. return base + index; \
  681. case TYPE_R8: \
  682. index &= mask; \
  683. if (index > 0xf) \
  684. *valid = 0; \
  685. if (insn->rexPrefix && index >= 4 && index <= 7) { \
  686. return prefix##_SPL + (index - 4); \
  687. } else { \
  688. return prefix##_AL + index; \
  689. } \
  690. case TYPE_R16: \
  691. index &= mask; \
  692. if (index > 0xf) \
  693. *valid = 0; \
  694. return prefix##_AX + index; \
  695. case TYPE_R32: \
  696. index &= mask; \
  697. if (index > 0xf) \
  698. *valid = 0; \
  699. return prefix##_EAX + index; \
  700. case TYPE_R64: \
  701. index &= mask; \
  702. if (index > 0xf) \
  703. *valid = 0; \
  704. return prefix##_RAX + index; \
  705. case TYPE_ZMM: \
  706. return prefix##_ZMM0 + index; \
  707. case TYPE_YMM: \
  708. return prefix##_YMM0 + index; \
  709. case TYPE_XMM: \
  710. return prefix##_XMM0 + index; \
  711. case TYPE_TMM: \
  712. if (index > 7) \
  713. *valid = 0; \
  714. return prefix##_TMM0 + index; \
  715. case TYPE_VK: \
  716. index &= 0xf; \
  717. if (index > 7) \
  718. *valid = 0; \
  719. return prefix##_K0 + index; \
  720. case TYPE_VK_PAIR: \
  721. if (index > 7) \
  722. *valid = 0; \
  723. return prefix##_K0_K1 + (index / 2); \
  724. case TYPE_MM64: \
  725. return prefix##_MM0 + (index & 0x7); \
  726. case TYPE_SEGMENTREG: \
  727. if ((index & 7) > 5) \
  728. *valid = 0; \
  729. return prefix##_ES + (index & 7); \
  730. case TYPE_DEBUGREG: \
  731. return prefix##_DR0 + index; \
  732. case TYPE_CONTROLREG: \
  733. return prefix##_CR0 + index; \
  734. case TYPE_MVSIBX: \
  735. return prefix##_XMM0 + index; \
  736. case TYPE_MVSIBY: \
  737. return prefix##_YMM0 + index; \
  738. case TYPE_MVSIBZ: \
  739. return prefix##_ZMM0 + index; \
  740. } \
  741. }
  742. // Consult an operand type to determine the meaning of the reg or R/M field. If
  743. // the operand is an XMM operand, for example, an operand would be XMM0 instead
  744. // of AX, which readModRM() would otherwise misinterpret it as.
  745. //
  746. // @param insn - The instruction containing the operand.
  747. // @param type - The operand type.
  748. // @param index - The existing value of the field as reported by readModRM().
  749. // @param valid - The address of a uint8_t. The target is set to 1 if the
  750. // field is valid for the register class; 0 if not.
  751. // @return - The proper value.
  752. GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG, 0x1f)
  753. GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG, 0xf)
  754. // Consult an operand specifier to determine which of the fixup*Value functions
  755. // to use in correcting readModRM()'ss interpretation.
  756. //
  757. // @param insn - See fixup*Value().
  758. // @param op - The operand specifier.
  759. // @return - 0 if fixup was successful; -1 if the register returned was
  760. // invalid for its class.
  761. static int fixupReg(struct InternalInstruction *insn,
  762. const struct OperandSpecifier *op) {
  763. uint8_t valid;
  764. LLVM_DEBUG(dbgs() << "fixupReg()");
  765. switch ((OperandEncoding)op->encoding) {
  766. default:
  767. debug("Expected a REG or R/M encoding in fixupReg");
  768. return -1;
  769. case ENCODING_VVVV:
  770. insn->vvvv =
  771. (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
  772. if (!valid)
  773. return -1;
  774. break;
  775. case ENCODING_REG:
  776. insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
  777. insn->reg - insn->regBase, &valid);
  778. if (!valid)
  779. return -1;
  780. break;
  781. case ENCODING_SIB:
  782. CASE_ENCODING_RM:
  783. if (insn->eaBase >= insn->eaRegBase) {
  784. insn->eaBase = (EABase)fixupRMValue(
  785. insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
  786. if (!valid)
  787. return -1;
  788. }
  789. break;
  790. }
  791. return 0;
  792. }
  793. // Read the opcode (except the ModR/M byte in the case of extended or escape
  794. // opcodes).
  795. static bool readOpcode(struct InternalInstruction *insn) {
  796. uint8_t current;
  797. LLVM_DEBUG(dbgs() << "readOpcode()");
  798. insn->opcodeType = ONEBYTE;
  799. if (insn->vectorExtensionType == TYPE_EVEX) {
  800. switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
  801. default:
  802. LLVM_DEBUG(
  803. dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
  804. mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
  805. return true;
  806. case VEX_LOB_0F:
  807. insn->opcodeType = TWOBYTE;
  808. return consume(insn, insn->opcode);
  809. case VEX_LOB_0F38:
  810. insn->opcodeType = THREEBYTE_38;
  811. return consume(insn, insn->opcode);
  812. case VEX_LOB_0F3A:
  813. insn->opcodeType = THREEBYTE_3A;
  814. return consume(insn, insn->opcode);
  815. case VEX_LOB_MAP5:
  816. insn->opcodeType = MAP5;
  817. return consume(insn, insn->opcode);
  818. case VEX_LOB_MAP6:
  819. insn->opcodeType = MAP6;
  820. return consume(insn, insn->opcode);
  821. }
  822. } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
  823. switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
  824. default:
  825. LLVM_DEBUG(
  826. dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
  827. mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
  828. return true;
  829. case VEX_LOB_0F:
  830. insn->opcodeType = TWOBYTE;
  831. return consume(insn, insn->opcode);
  832. case VEX_LOB_0F38:
  833. insn->opcodeType = THREEBYTE_38;
  834. return consume(insn, insn->opcode);
  835. case VEX_LOB_0F3A:
  836. insn->opcodeType = THREEBYTE_3A;
  837. return consume(insn, insn->opcode);
  838. case VEX_LOB_MAP5:
  839. insn->opcodeType = MAP5;
  840. return consume(insn, insn->opcode);
  841. case VEX_LOB_MAP6:
  842. insn->opcodeType = MAP6;
  843. return consume(insn, insn->opcode);
  844. }
  845. } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
  846. insn->opcodeType = TWOBYTE;
  847. return consume(insn, insn->opcode);
  848. } else if (insn->vectorExtensionType == TYPE_XOP) {
  849. switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
  850. default:
  851. LLVM_DEBUG(
  852. dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
  853. mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
  854. return true;
  855. case XOP_MAP_SELECT_8:
  856. insn->opcodeType = XOP8_MAP;
  857. return consume(insn, insn->opcode);
  858. case XOP_MAP_SELECT_9:
  859. insn->opcodeType = XOP9_MAP;
  860. return consume(insn, insn->opcode);
  861. case XOP_MAP_SELECT_A:
  862. insn->opcodeType = XOPA_MAP;
  863. return consume(insn, insn->opcode);
  864. }
  865. }
  866. if (consume(insn, current))
  867. return true;
  868. if (current == 0x0f) {
  869. LLVM_DEBUG(
  870. dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
  871. if (consume(insn, current))
  872. return true;
  873. if (current == 0x38) {
  874. LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
  875. current));
  876. if (consume(insn, current))
  877. return true;
  878. insn->opcodeType = THREEBYTE_38;
  879. } else if (current == 0x3a) {
  880. LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
  881. current));
  882. if (consume(insn, current))
  883. return true;
  884. insn->opcodeType = THREEBYTE_3A;
  885. } else if (current == 0x0f) {
  886. LLVM_DEBUG(
  887. dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
  888. // Consume operands before the opcode to comply with the 3DNow encoding
  889. if (readModRM(insn))
  890. return true;
  891. if (consume(insn, current))
  892. return true;
  893. insn->opcodeType = THREEDNOW_MAP;
  894. } else {
  895. LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
  896. insn->opcodeType = TWOBYTE;
  897. }
  898. } else if (insn->mandatoryPrefix)
  899. // The opcode with mandatory prefix must start with opcode escape.
  900. // If not it's legacy repeat prefix
  901. insn->mandatoryPrefix = 0;
  902. // At this point we have consumed the full opcode.
  903. // Anything we consume from here on must be unconsumed.
  904. insn->opcode = current;
  905. return false;
  906. }
  907. // Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
  908. static bool is16BitEquivalent(const char *orig, const char *equiv) {
  909. for (int i = 0;; i++) {
  910. if (orig[i] == '\0' && equiv[i] == '\0')
  911. return true;
  912. if (orig[i] == '\0' || equiv[i] == '\0')
  913. return false;
  914. if (orig[i] != equiv[i]) {
  915. if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
  916. continue;
  917. if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
  918. continue;
  919. if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
  920. continue;
  921. return false;
  922. }
  923. }
  924. }
  925. // Determine whether this instruction is a 64-bit instruction.
  926. static bool is64Bit(const char *name) {
  927. for (int i = 0;; ++i) {
  928. if (name[i] == '\0')
  929. return false;
  930. if (name[i] == '6' && name[i + 1] == '4')
  931. return true;
  932. }
  933. }
  934. // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
  935. // for extended and escape opcodes, and using a supplied attribute mask.
  936. static int getInstructionIDWithAttrMask(uint16_t *instructionID,
  937. struct InternalInstruction *insn,
  938. uint16_t attrMask) {
  939. auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
  940. const ContextDecision *decision;
  941. switch (insn->opcodeType) {
  942. case ONEBYTE:
  943. decision = &ONEBYTE_SYM;
  944. break;
  945. case TWOBYTE:
  946. decision = &TWOBYTE_SYM;
  947. break;
  948. case THREEBYTE_38:
  949. decision = &THREEBYTE38_SYM;
  950. break;
  951. case THREEBYTE_3A:
  952. decision = &THREEBYTE3A_SYM;
  953. break;
  954. case XOP8_MAP:
  955. decision = &XOP8_MAP_SYM;
  956. break;
  957. case XOP9_MAP:
  958. decision = &XOP9_MAP_SYM;
  959. break;
  960. case XOPA_MAP:
  961. decision = &XOPA_MAP_SYM;
  962. break;
  963. case THREEDNOW_MAP:
  964. decision = &THREEDNOW_MAP_SYM;
  965. break;
  966. case MAP5:
  967. decision = &MAP5_SYM;
  968. break;
  969. case MAP6:
  970. decision = &MAP6_SYM;
  971. break;
  972. }
  973. if (decision->opcodeDecisions[insnCtx]
  974. .modRMDecisions[insn->opcode]
  975. .modrm_type != MODRM_ONEENTRY) {
  976. if (readModRM(insn))
  977. return -1;
  978. *instructionID =
  979. decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
  980. } else {
  981. *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
  982. }
  983. return 0;
  984. }
  985. // Determine the ID of an instruction, consuming the ModR/M byte as appropriate
  986. // for extended and escape opcodes. Determines the attributes and context for
  987. // the instruction before doing so.
  988. static int getInstructionID(struct InternalInstruction *insn,
  989. const MCInstrInfo *mii) {
  990. uint16_t attrMask;
  991. uint16_t instructionID;
  992. LLVM_DEBUG(dbgs() << "getID()");
  993. attrMask = ATTR_NONE;
  994. if (insn->mode == MODE_64BIT)
  995. attrMask |= ATTR_64BIT;
  996. if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
  997. attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
  998. if (insn->vectorExtensionType == TYPE_EVEX) {
  999. switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
  1000. case VEX_PREFIX_66:
  1001. attrMask |= ATTR_OPSIZE;
  1002. break;
  1003. case VEX_PREFIX_F3:
  1004. attrMask |= ATTR_XS;
  1005. break;
  1006. case VEX_PREFIX_F2:
  1007. attrMask |= ATTR_XD;
  1008. break;
  1009. }
  1010. if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1011. attrMask |= ATTR_EVEXKZ;
  1012. if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1013. attrMask |= ATTR_EVEXB;
  1014. if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1015. attrMask |= ATTR_EVEXK;
  1016. if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1017. attrMask |= ATTR_VEXL;
  1018. if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1019. attrMask |= ATTR_EVEXL2;
  1020. } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
  1021. switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
  1022. case VEX_PREFIX_66:
  1023. attrMask |= ATTR_OPSIZE;
  1024. break;
  1025. case VEX_PREFIX_F3:
  1026. attrMask |= ATTR_XS;
  1027. break;
  1028. case VEX_PREFIX_F2:
  1029. attrMask |= ATTR_XD;
  1030. break;
  1031. }
  1032. if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
  1033. attrMask |= ATTR_VEXL;
  1034. } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
  1035. switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
  1036. case VEX_PREFIX_66:
  1037. attrMask |= ATTR_OPSIZE;
  1038. if (insn->hasAdSize)
  1039. attrMask |= ATTR_ADSIZE;
  1040. break;
  1041. case VEX_PREFIX_F3:
  1042. attrMask |= ATTR_XS;
  1043. break;
  1044. case VEX_PREFIX_F2:
  1045. attrMask |= ATTR_XD;
  1046. break;
  1047. }
  1048. if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
  1049. attrMask |= ATTR_VEXL;
  1050. } else if (insn->vectorExtensionType == TYPE_XOP) {
  1051. switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
  1052. case VEX_PREFIX_66:
  1053. attrMask |= ATTR_OPSIZE;
  1054. break;
  1055. case VEX_PREFIX_F3:
  1056. attrMask |= ATTR_XS;
  1057. break;
  1058. case VEX_PREFIX_F2:
  1059. attrMask |= ATTR_XD;
  1060. break;
  1061. }
  1062. if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
  1063. attrMask |= ATTR_VEXL;
  1064. } else {
  1065. return -1;
  1066. }
  1067. } else if (!insn->mandatoryPrefix) {
  1068. // If we don't have mandatory prefix we should use legacy prefixes here
  1069. if (insn->hasOpSize && (insn->mode != MODE_16BIT))
  1070. attrMask |= ATTR_OPSIZE;
  1071. if (insn->hasAdSize)
  1072. attrMask |= ATTR_ADSIZE;
  1073. if (insn->opcodeType == ONEBYTE) {
  1074. if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
  1075. // Special support for PAUSE
  1076. attrMask |= ATTR_XS;
  1077. } else {
  1078. if (insn->repeatPrefix == 0xf2)
  1079. attrMask |= ATTR_XD;
  1080. else if (insn->repeatPrefix == 0xf3)
  1081. attrMask |= ATTR_XS;
  1082. }
  1083. } else {
  1084. switch (insn->mandatoryPrefix) {
  1085. case 0xf2:
  1086. attrMask |= ATTR_XD;
  1087. break;
  1088. case 0xf3:
  1089. attrMask |= ATTR_XS;
  1090. break;
  1091. case 0x66:
  1092. if (insn->mode != MODE_16BIT)
  1093. attrMask |= ATTR_OPSIZE;
  1094. if (insn->hasAdSize)
  1095. attrMask |= ATTR_ADSIZE;
  1096. break;
  1097. case 0x67:
  1098. attrMask |= ATTR_ADSIZE;
  1099. break;
  1100. }
  1101. }
  1102. if (insn->rexPrefix & 0x08) {
  1103. attrMask |= ATTR_REXW;
  1104. attrMask &= ~ATTR_ADSIZE;
  1105. }
  1106. if (insn->mode == MODE_16BIT) {
  1107. // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
  1108. // of the AdSize prefix is inverted w.r.t. 32-bit mode.
  1109. if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
  1110. attrMask ^= ATTR_ADSIZE;
  1111. // If we're in 16-bit mode and this is one of the relative jumps and opsize
  1112. // prefix isn't present, we need to force the opsize attribute since the
  1113. // prefix is inverted relative to 32-bit mode.
  1114. if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
  1115. (insn->opcode == 0xE8 || insn->opcode == 0xE9))
  1116. attrMask |= ATTR_OPSIZE;
  1117. if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
  1118. insn->opcode >= 0x80 && insn->opcode <= 0x8F)
  1119. attrMask |= ATTR_OPSIZE;
  1120. }
  1121. if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
  1122. return -1;
  1123. // The following clauses compensate for limitations of the tables.
  1124. if (insn->mode != MODE_64BIT &&
  1125. insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
  1126. // The tables can't distinquish between cases where the W-bit is used to
  1127. // select register size and cases where its a required part of the opcode.
  1128. if ((insn->vectorExtensionType == TYPE_EVEX &&
  1129. wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
  1130. (insn->vectorExtensionType == TYPE_VEX_3B &&
  1131. wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
  1132. (insn->vectorExtensionType == TYPE_XOP &&
  1133. wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
  1134. uint16_t instructionIDWithREXW;
  1135. if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
  1136. attrMask | ATTR_REXW)) {
  1137. insn->instructionID = instructionID;
  1138. insn->spec = &INSTRUCTIONS_SYM[instructionID];
  1139. return 0;
  1140. }
  1141. auto SpecName = mii->getName(instructionIDWithREXW);
  1142. // If not a 64-bit instruction. Switch the opcode.
  1143. if (!is64Bit(SpecName.data())) {
  1144. insn->instructionID = instructionIDWithREXW;
  1145. insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
  1146. return 0;
  1147. }
  1148. }
  1149. }
  1150. // Absolute moves, umonitor, and movdir64b need special handling.
  1151. // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
  1152. // inverted w.r.t.
  1153. // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
  1154. // any position.
  1155. if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
  1156. (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
  1157. (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) {
  1158. // Make sure we observed the prefixes in any position.
  1159. if (insn->hasAdSize)
  1160. attrMask |= ATTR_ADSIZE;
  1161. if (insn->hasOpSize)
  1162. attrMask |= ATTR_OPSIZE;
  1163. // In 16-bit, invert the attributes.
  1164. if (insn->mode == MODE_16BIT) {
  1165. attrMask ^= ATTR_ADSIZE;
  1166. // The OpSize attribute is only valid with the absolute moves.
  1167. if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
  1168. attrMask ^= ATTR_OPSIZE;
  1169. }
  1170. if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
  1171. return -1;
  1172. insn->instructionID = instructionID;
  1173. insn->spec = &INSTRUCTIONS_SYM[instructionID];
  1174. return 0;
  1175. }
  1176. if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
  1177. !(attrMask & ATTR_OPSIZE)) {
  1178. // The instruction tables make no distinction between instructions that
  1179. // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
  1180. // particular spot (i.e., many MMX operations). In general we're
  1181. // conservative, but in the specific case where OpSize is present but not in
  1182. // the right place we check if there's a 16-bit operation.
  1183. const struct InstructionSpecifier *spec;
  1184. uint16_t instructionIDWithOpsize;
  1185. llvm::StringRef specName, specWithOpSizeName;
  1186. spec = &INSTRUCTIONS_SYM[instructionID];
  1187. if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
  1188. attrMask | ATTR_OPSIZE)) {
  1189. // ModRM required with OpSize but not present. Give up and return the
  1190. // version without OpSize set.
  1191. insn->instructionID = instructionID;
  1192. insn->spec = spec;
  1193. return 0;
  1194. }
  1195. specName = mii->getName(instructionID);
  1196. specWithOpSizeName = mii->getName(instructionIDWithOpsize);
  1197. if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
  1198. (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
  1199. insn->instructionID = instructionIDWithOpsize;
  1200. insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
  1201. } else {
  1202. insn->instructionID = instructionID;
  1203. insn->spec = spec;
  1204. }
  1205. return 0;
  1206. }
  1207. if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
  1208. insn->rexPrefix & 0x01) {
  1209. // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
  1210. // as XCHG %r8, %eax.
  1211. const struct InstructionSpecifier *spec;
  1212. uint16_t instructionIDWithNewOpcode;
  1213. const struct InstructionSpecifier *specWithNewOpcode;
  1214. spec = &INSTRUCTIONS_SYM[instructionID];
  1215. // Borrow opcode from one of the other XCHGar opcodes
  1216. insn->opcode = 0x91;
  1217. if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
  1218. attrMask)) {
  1219. insn->opcode = 0x90;
  1220. insn->instructionID = instructionID;
  1221. insn->spec = spec;
  1222. return 0;
  1223. }
  1224. specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
  1225. // Change back
  1226. insn->opcode = 0x90;
  1227. insn->instructionID = instructionIDWithNewOpcode;
  1228. insn->spec = specWithNewOpcode;
  1229. return 0;
  1230. }
  1231. insn->instructionID = instructionID;
  1232. insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
  1233. return 0;
  1234. }
  1235. // Read an operand from the opcode field of an instruction and interprets it
  1236. // appropriately given the operand width. Handles AddRegFrm instructions.
  1237. //
  1238. // @param insn - the instruction whose opcode field is to be read.
  1239. // @param size - The width (in bytes) of the register being specified.
  1240. // 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
  1241. // RAX.
  1242. // @return - 0 on success; nonzero otherwise.
  1243. static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
  1244. LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
  1245. if (size == 0)
  1246. size = insn->registerSize;
  1247. switch (size) {
  1248. case 1:
  1249. insn->opcodeRegister = (Reg)(
  1250. MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1251. if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
  1252. insn->opcodeRegister < MODRM_REG_AL + 0x8) {
  1253. insn->opcodeRegister =
  1254. (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
  1255. }
  1256. break;
  1257. case 2:
  1258. insn->opcodeRegister = (Reg)(
  1259. MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1260. break;
  1261. case 4:
  1262. insn->opcodeRegister =
  1263. (Reg)(MODRM_REG_EAX +
  1264. ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1265. break;
  1266. case 8:
  1267. insn->opcodeRegister =
  1268. (Reg)(MODRM_REG_RAX +
  1269. ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7)));
  1270. break;
  1271. }
  1272. return 0;
  1273. }
  1274. // Consume an immediate operand from an instruction, given the desired operand
  1275. // size.
  1276. //
  1277. // @param insn - The instruction whose operand is to be read.
  1278. // @param size - The width (in bytes) of the operand.
  1279. // @return - 0 if the immediate was successfully consumed; nonzero
  1280. // otherwise.
  1281. static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
  1282. uint8_t imm8;
  1283. uint16_t imm16;
  1284. uint32_t imm32;
  1285. uint64_t imm64;
  1286. LLVM_DEBUG(dbgs() << "readImmediate()");
  1287. assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
  1288. insn->immediateSize = size;
  1289. insn->immediateOffset = insn->readerCursor - insn->startLocation;
  1290. switch (size) {
  1291. case 1:
  1292. if (consume(insn, imm8))
  1293. return -1;
  1294. insn->immediates[insn->numImmediatesConsumed] = imm8;
  1295. break;
  1296. case 2:
  1297. if (consume(insn, imm16))
  1298. return -1;
  1299. insn->immediates[insn->numImmediatesConsumed] = imm16;
  1300. break;
  1301. case 4:
  1302. if (consume(insn, imm32))
  1303. return -1;
  1304. insn->immediates[insn->numImmediatesConsumed] = imm32;
  1305. break;
  1306. case 8:
  1307. if (consume(insn, imm64))
  1308. return -1;
  1309. insn->immediates[insn->numImmediatesConsumed] = imm64;
  1310. break;
  1311. default:
  1312. llvm_unreachable("invalid size");
  1313. }
  1314. insn->numImmediatesConsumed++;
  1315. return 0;
  1316. }
  1317. // Consume vvvv from an instruction if it has a VEX prefix.
  1318. static int readVVVV(struct InternalInstruction *insn) {
  1319. LLVM_DEBUG(dbgs() << "readVVVV()");
  1320. int vvvv;
  1321. if (insn->vectorExtensionType == TYPE_EVEX)
  1322. vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
  1323. vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
  1324. else if (insn->vectorExtensionType == TYPE_VEX_3B)
  1325. vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
  1326. else if (insn->vectorExtensionType == TYPE_VEX_2B)
  1327. vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
  1328. else if (insn->vectorExtensionType == TYPE_XOP)
  1329. vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
  1330. else
  1331. return -1;
  1332. if (insn->mode != MODE_64BIT)
  1333. vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
  1334. insn->vvvv = static_cast<Reg>(vvvv);
  1335. return 0;
  1336. }
  1337. // Read an mask register from the opcode field of an instruction.
  1338. //
  1339. // @param insn - The instruction whose opcode field is to be read.
  1340. // @return - 0 on success; nonzero otherwise.
  1341. static int readMaskRegister(struct InternalInstruction *insn) {
  1342. LLVM_DEBUG(dbgs() << "readMaskRegister()");
  1343. if (insn->vectorExtensionType != TYPE_EVEX)
  1344. return -1;
  1345. insn->writemask =
  1346. static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
  1347. return 0;
  1348. }
  1349. // Consults the specifier for an instruction and consumes all
  1350. // operands for that instruction, interpreting them as it goes.
  1351. static int readOperands(struct InternalInstruction *insn) {
  1352. int hasVVVV, needVVVV;
  1353. int sawRegImm = 0;
  1354. LLVM_DEBUG(dbgs() << "readOperands()");
  1355. // If non-zero vvvv specified, make sure one of the operands uses it.
  1356. hasVVVV = !readVVVV(insn);
  1357. needVVVV = hasVVVV && (insn->vvvv != 0);
  1358. for (const auto &Op : x86OperandSets[insn->spec->operands]) {
  1359. switch (Op.encoding) {
  1360. case ENCODING_NONE:
  1361. case ENCODING_SI:
  1362. case ENCODING_DI:
  1363. break;
  1364. CASE_ENCODING_VSIB:
  1365. // VSIB can use the V2 bit so check only the other bits.
  1366. if (needVVVV)
  1367. needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
  1368. if (readModRM(insn))
  1369. return -1;
  1370. // Reject if SIB wasn't used.
  1371. if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
  1372. return -1;
  1373. // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
  1374. if (insn->sibIndex == SIB_INDEX_NONE)
  1375. insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
  1376. // If EVEX.v2 is set this is one of the 16-31 registers.
  1377. if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
  1378. v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
  1379. insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
  1380. // Adjust the index register to the correct size.
  1381. switch ((OperandType)Op.type) {
  1382. default:
  1383. debug("Unhandled VSIB index type");
  1384. return -1;
  1385. case TYPE_MVSIBX:
  1386. insn->sibIndex =
  1387. (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
  1388. break;
  1389. case TYPE_MVSIBY:
  1390. insn->sibIndex =
  1391. (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
  1392. break;
  1393. case TYPE_MVSIBZ:
  1394. insn->sibIndex =
  1395. (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
  1396. break;
  1397. }
  1398. // Apply the AVX512 compressed displacement scaling factor.
  1399. if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
  1400. insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
  1401. break;
  1402. case ENCODING_SIB:
  1403. // Reject if SIB wasn't used.
  1404. if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
  1405. return -1;
  1406. if (readModRM(insn))
  1407. return -1;
  1408. if (fixupReg(insn, &Op))
  1409. return -1;
  1410. break;
  1411. case ENCODING_REG:
  1412. CASE_ENCODING_RM:
  1413. if (readModRM(insn))
  1414. return -1;
  1415. if (fixupReg(insn, &Op))
  1416. return -1;
  1417. // Apply the AVX512 compressed displacement scaling factor.
  1418. if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
  1419. insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
  1420. break;
  1421. case ENCODING_IB:
  1422. if (sawRegImm) {
  1423. // Saw a register immediate so don't read again and instead split the
  1424. // previous immediate. FIXME: This is a hack.
  1425. insn->immediates[insn->numImmediatesConsumed] =
  1426. insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
  1427. ++insn->numImmediatesConsumed;
  1428. break;
  1429. }
  1430. if (readImmediate(insn, 1))
  1431. return -1;
  1432. if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
  1433. sawRegImm = 1;
  1434. break;
  1435. case ENCODING_IW:
  1436. if (readImmediate(insn, 2))
  1437. return -1;
  1438. break;
  1439. case ENCODING_ID:
  1440. if (readImmediate(insn, 4))
  1441. return -1;
  1442. break;
  1443. case ENCODING_IO:
  1444. if (readImmediate(insn, 8))
  1445. return -1;
  1446. break;
  1447. case ENCODING_Iv:
  1448. if (readImmediate(insn, insn->immediateSize))
  1449. return -1;
  1450. break;
  1451. case ENCODING_Ia:
  1452. if (readImmediate(insn, insn->addressSize))
  1453. return -1;
  1454. break;
  1455. case ENCODING_IRC:
  1456. insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
  1457. lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
  1458. break;
  1459. case ENCODING_RB:
  1460. if (readOpcodeRegister(insn, 1))
  1461. return -1;
  1462. break;
  1463. case ENCODING_RW:
  1464. if (readOpcodeRegister(insn, 2))
  1465. return -1;
  1466. break;
  1467. case ENCODING_RD:
  1468. if (readOpcodeRegister(insn, 4))
  1469. return -1;
  1470. break;
  1471. case ENCODING_RO:
  1472. if (readOpcodeRegister(insn, 8))
  1473. return -1;
  1474. break;
  1475. case ENCODING_Rv:
  1476. if (readOpcodeRegister(insn, 0))
  1477. return -1;
  1478. break;
  1479. case ENCODING_CC:
  1480. insn->immediates[1] = insn->opcode & 0xf;
  1481. break;
  1482. case ENCODING_FP:
  1483. break;
  1484. case ENCODING_VVVV:
  1485. needVVVV = 0; // Mark that we have found a VVVV operand.
  1486. if (!hasVVVV)
  1487. return -1;
  1488. if (insn->mode != MODE_64BIT)
  1489. insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
  1490. if (fixupReg(insn, &Op))
  1491. return -1;
  1492. break;
  1493. case ENCODING_WRITEMASK:
  1494. if (readMaskRegister(insn))
  1495. return -1;
  1496. break;
  1497. case ENCODING_DUP:
  1498. break;
  1499. default:
  1500. LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
  1501. return -1;
  1502. }
  1503. }
  1504. // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
  1505. if (needVVVV)
  1506. return -1;
  1507. return 0;
  1508. }
  1509. namespace llvm {
  1510. // Fill-ins to make the compiler happy. These constants are never actually
  1511. // assigned; they are just filler to make an automatically-generated switch
  1512. // statement work.
  1513. namespace X86 {
  1514. enum {
  1515. BX_SI = 500,
  1516. BX_DI = 501,
  1517. BP_SI = 502,
  1518. BP_DI = 503,
  1519. sib = 504,
  1520. sib64 = 505
  1521. };
  1522. } // namespace X86
  1523. } // namespace llvm
  1524. static bool translateInstruction(MCInst &target,
  1525. InternalInstruction &source,
  1526. const MCDisassembler *Dis);
  1527. namespace {
  1528. /// Generic disassembler for all X86 platforms. All each platform class should
  1529. /// have to do is subclass the constructor, and provide a different
  1530. /// disassemblerMode value.
  1531. class X86GenericDisassembler : public MCDisassembler {
  1532. std::unique_ptr<const MCInstrInfo> MII;
  1533. public:
  1534. X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
  1535. std::unique_ptr<const MCInstrInfo> MII);
  1536. public:
  1537. DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
  1538. ArrayRef<uint8_t> Bytes, uint64_t Address,
  1539. raw_ostream &cStream) const override;
  1540. private:
  1541. DisassemblerMode fMode;
  1542. };
  1543. } // namespace
  1544. X86GenericDisassembler::X86GenericDisassembler(
  1545. const MCSubtargetInfo &STI,
  1546. MCContext &Ctx,
  1547. std::unique_ptr<const MCInstrInfo> MII)
  1548. : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
  1549. const FeatureBitset &FB = STI.getFeatureBits();
  1550. if (FB[X86::Is16Bit]) {
  1551. fMode = MODE_16BIT;
  1552. return;
  1553. } else if (FB[X86::Is32Bit]) {
  1554. fMode = MODE_32BIT;
  1555. return;
  1556. } else if (FB[X86::Is64Bit]) {
  1557. fMode = MODE_64BIT;
  1558. return;
  1559. }
  1560. llvm_unreachable("Invalid CPU mode");
  1561. }
  1562. MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
  1563. MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
  1564. raw_ostream &CStream) const {
  1565. CommentStream = &CStream;
  1566. InternalInstruction Insn;
  1567. memset(&Insn, 0, sizeof(InternalInstruction));
  1568. Insn.bytes = Bytes;
  1569. Insn.startLocation = Address;
  1570. Insn.readerCursor = Address;
  1571. Insn.mode = fMode;
  1572. if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
  1573. getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
  1574. readOperands(&Insn)) {
  1575. Size = Insn.readerCursor - Address;
  1576. return Fail;
  1577. }
  1578. Insn.operands = x86OperandSets[Insn.spec->operands];
  1579. Insn.length = Insn.readerCursor - Insn.startLocation;
  1580. Size = Insn.length;
  1581. if (Size > 15)
  1582. LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
  1583. bool Ret = translateInstruction(Instr, Insn, this);
  1584. if (!Ret) {
  1585. unsigned Flags = X86::IP_NO_PREFIX;
  1586. if (Insn.hasAdSize)
  1587. Flags |= X86::IP_HAS_AD_SIZE;
  1588. if (!Insn.mandatoryPrefix) {
  1589. if (Insn.hasOpSize)
  1590. Flags |= X86::IP_HAS_OP_SIZE;
  1591. if (Insn.repeatPrefix == 0xf2)
  1592. Flags |= X86::IP_HAS_REPEAT_NE;
  1593. else if (Insn.repeatPrefix == 0xf3 &&
  1594. // It should not be 'pause' f3 90
  1595. Insn.opcode != 0x90)
  1596. Flags |= X86::IP_HAS_REPEAT;
  1597. if (Insn.hasLockPrefix)
  1598. Flags |= X86::IP_HAS_LOCK;
  1599. }
  1600. Instr.setFlags(Flags);
  1601. }
  1602. return (!Ret) ? Success : Fail;
  1603. }
  1604. //
  1605. // Private code that translates from struct InternalInstructions to MCInsts.
  1606. //
  1607. /// translateRegister - Translates an internal register to the appropriate LLVM
  1608. /// register, and appends it as an operand to an MCInst.
  1609. ///
  1610. /// @param mcInst - The MCInst to append to.
  1611. /// @param reg - The Reg to append.
  1612. static void translateRegister(MCInst &mcInst, Reg reg) {
  1613. #define ENTRY(x) X86::x,
  1614. static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
  1615. #undef ENTRY
  1616. MCPhysReg llvmRegnum = llvmRegnums[reg];
  1617. mcInst.addOperand(MCOperand::createReg(llvmRegnum));
  1618. }
  1619. static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
  1620. 0, // SEG_OVERRIDE_NONE
  1621. X86::CS,
  1622. X86::SS,
  1623. X86::DS,
  1624. X86::ES,
  1625. X86::FS,
  1626. X86::GS
  1627. };
  1628. /// translateSrcIndex - Appends a source index operand to an MCInst.
  1629. ///
  1630. /// @param mcInst - The MCInst to append to.
  1631. /// @param insn - The internal instruction.
  1632. static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
  1633. unsigned baseRegNo;
  1634. if (insn.mode == MODE_64BIT)
  1635. baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
  1636. else if (insn.mode == MODE_32BIT)
  1637. baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
  1638. else {
  1639. assert(insn.mode == MODE_16BIT);
  1640. baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
  1641. }
  1642. MCOperand baseReg = MCOperand::createReg(baseRegNo);
  1643. mcInst.addOperand(baseReg);
  1644. MCOperand segmentReg;
  1645. segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
  1646. mcInst.addOperand(segmentReg);
  1647. return false;
  1648. }
  1649. /// translateDstIndex - Appends a destination index operand to an MCInst.
  1650. ///
  1651. /// @param mcInst - The MCInst to append to.
  1652. /// @param insn - The internal instruction.
  1653. static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
  1654. unsigned baseRegNo;
  1655. if (insn.mode == MODE_64BIT)
  1656. baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
  1657. else if (insn.mode == MODE_32BIT)
  1658. baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
  1659. else {
  1660. assert(insn.mode == MODE_16BIT);
  1661. baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
  1662. }
  1663. MCOperand baseReg = MCOperand::createReg(baseRegNo);
  1664. mcInst.addOperand(baseReg);
  1665. return false;
  1666. }
  1667. /// translateImmediate - Appends an immediate operand to an MCInst.
  1668. ///
  1669. /// @param mcInst - The MCInst to append to.
  1670. /// @param immediate - The immediate value to append.
  1671. /// @param operand - The operand, as stored in the descriptor table.
  1672. /// @param insn - The internal instruction.
  1673. static void translateImmediate(MCInst &mcInst, uint64_t immediate,
  1674. const OperandSpecifier &operand,
  1675. InternalInstruction &insn,
  1676. const MCDisassembler *Dis) {
  1677. // Sign-extend the immediate if necessary.
  1678. OperandType type = (OperandType)operand.type;
  1679. bool isBranch = false;
  1680. uint64_t pcrel = 0;
  1681. if (type == TYPE_REL) {
  1682. isBranch = true;
  1683. pcrel = insn.startLocation + insn.length;
  1684. switch (operand.encoding) {
  1685. default:
  1686. break;
  1687. case ENCODING_Iv:
  1688. switch (insn.displacementSize) {
  1689. default:
  1690. break;
  1691. case 1:
  1692. if(immediate & 0x80)
  1693. immediate |= ~(0xffull);
  1694. break;
  1695. case 2:
  1696. if(immediate & 0x8000)
  1697. immediate |= ~(0xffffull);
  1698. break;
  1699. case 4:
  1700. if(immediate & 0x80000000)
  1701. immediate |= ~(0xffffffffull);
  1702. break;
  1703. case 8:
  1704. break;
  1705. }
  1706. break;
  1707. case ENCODING_IB:
  1708. if(immediate & 0x80)
  1709. immediate |= ~(0xffull);
  1710. break;
  1711. case ENCODING_IW:
  1712. if(immediate & 0x8000)
  1713. immediate |= ~(0xffffull);
  1714. break;
  1715. case ENCODING_ID:
  1716. if(immediate & 0x80000000)
  1717. immediate |= ~(0xffffffffull);
  1718. break;
  1719. }
  1720. }
  1721. // By default sign-extend all X86 immediates based on their encoding.
  1722. else if (type == TYPE_IMM) {
  1723. switch (operand.encoding) {
  1724. default:
  1725. break;
  1726. case ENCODING_IB:
  1727. if(immediate & 0x80)
  1728. immediate |= ~(0xffull);
  1729. break;
  1730. case ENCODING_IW:
  1731. if(immediate & 0x8000)
  1732. immediate |= ~(0xffffull);
  1733. break;
  1734. case ENCODING_ID:
  1735. if(immediate & 0x80000000)
  1736. immediate |= ~(0xffffffffull);
  1737. break;
  1738. case ENCODING_IO:
  1739. break;
  1740. }
  1741. }
  1742. switch (type) {
  1743. case TYPE_XMM:
  1744. mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
  1745. return;
  1746. case TYPE_YMM:
  1747. mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
  1748. return;
  1749. case TYPE_ZMM:
  1750. mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
  1751. return;
  1752. default:
  1753. // operand is 64 bits wide. Do nothing.
  1754. break;
  1755. }
  1756. if (!Dis->tryAddingSymbolicOperand(
  1757. mcInst, immediate + pcrel, insn.startLocation, isBranch,
  1758. insn.immediateOffset, insn.immediateSize, insn.length))
  1759. mcInst.addOperand(MCOperand::createImm(immediate));
  1760. if (type == TYPE_MOFFS) {
  1761. MCOperand segmentReg;
  1762. segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
  1763. mcInst.addOperand(segmentReg);
  1764. }
  1765. }
  1766. /// translateRMRegister - Translates a register stored in the R/M field of the
  1767. /// ModR/M byte to its LLVM equivalent and appends it to an MCInst.
  1768. /// @param mcInst - The MCInst to append to.
  1769. /// @param insn - The internal instruction to extract the R/M field
  1770. /// from.
  1771. /// @return - 0 on success; -1 otherwise
  1772. static bool translateRMRegister(MCInst &mcInst,
  1773. InternalInstruction &insn) {
  1774. if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
  1775. debug("A R/M register operand may not have a SIB byte");
  1776. return true;
  1777. }
  1778. switch (insn.eaBase) {
  1779. default:
  1780. debug("Unexpected EA base register");
  1781. return true;
  1782. case EA_BASE_NONE:
  1783. debug("EA_BASE_NONE for ModR/M base");
  1784. return true;
  1785. #define ENTRY(x) case EA_BASE_##x:
  1786. ALL_EA_BASES
  1787. #undef ENTRY
  1788. debug("A R/M register operand may not have a base; "
  1789. "the operand must be a register.");
  1790. return true;
  1791. #define ENTRY(x) \
  1792. case EA_REG_##x: \
  1793. mcInst.addOperand(MCOperand::createReg(X86::x)); break;
  1794. ALL_REGS
  1795. #undef ENTRY
  1796. }
  1797. return false;
  1798. }
  1799. /// translateRMMemory - Translates a memory operand stored in the Mod and R/M
  1800. /// fields of an internal instruction (and possibly its SIB byte) to a memory
  1801. /// operand in LLVM's format, and appends it to an MCInst.
  1802. ///
  1803. /// @param mcInst - The MCInst to append to.
  1804. /// @param insn - The instruction to extract Mod, R/M, and SIB fields
  1805. /// from.
  1806. /// @param ForceSIB - The instruction must use SIB.
  1807. /// @return - 0 on success; nonzero otherwise
  1808. static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
  1809. const MCDisassembler *Dis,
  1810. bool ForceSIB = false) {
  1811. // Addresses in an MCInst are represented as five operands:
  1812. // 1. basereg (register) The R/M base, or (if there is a SIB) the
  1813. // SIB base
  1814. // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
  1815. // scale amount
  1816. // 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
  1817. // the index (which is multiplied by the
  1818. // scale amount)
  1819. // 4. displacement (immediate) 0, or the displacement if there is one
  1820. // 5. segmentreg (register) x86_registerNONE for now, but could be set
  1821. // if we have segment overrides
  1822. MCOperand baseReg;
  1823. MCOperand scaleAmount;
  1824. MCOperand indexReg;
  1825. MCOperand displacement;
  1826. MCOperand segmentReg;
  1827. uint64_t pcrel = 0;
  1828. if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
  1829. if (insn.sibBase != SIB_BASE_NONE) {
  1830. switch (insn.sibBase) {
  1831. default:
  1832. debug("Unexpected sibBase");
  1833. return true;
  1834. #define ENTRY(x) \
  1835. case SIB_BASE_##x: \
  1836. baseReg = MCOperand::createReg(X86::x); break;
  1837. ALL_SIB_BASES
  1838. #undef ENTRY
  1839. }
  1840. } else {
  1841. baseReg = MCOperand::createReg(X86::NoRegister);
  1842. }
  1843. if (insn.sibIndex != SIB_INDEX_NONE) {
  1844. switch (insn.sibIndex) {
  1845. default:
  1846. debug("Unexpected sibIndex");
  1847. return true;
  1848. #define ENTRY(x) \
  1849. case SIB_INDEX_##x: \
  1850. indexReg = MCOperand::createReg(X86::x); break;
  1851. EA_BASES_32BIT
  1852. EA_BASES_64BIT
  1853. REGS_XMM
  1854. REGS_YMM
  1855. REGS_ZMM
  1856. #undef ENTRY
  1857. }
  1858. } else {
  1859. // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
  1860. // but no index is used and modrm alone should have been enough.
  1861. // -No base register in 32-bit mode. In 64-bit mode this is used to
  1862. // avoid rip-relative addressing.
  1863. // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
  1864. // base always requires a SIB byte.
  1865. // -A scale other than 1 is used.
  1866. if (!ForceSIB &&
  1867. (insn.sibScale != 1 ||
  1868. (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
  1869. (insn.sibBase != SIB_BASE_NONE &&
  1870. insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
  1871. insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
  1872. indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
  1873. X86::RIZ);
  1874. } else
  1875. indexReg = MCOperand::createReg(X86::NoRegister);
  1876. }
  1877. scaleAmount = MCOperand::createImm(insn.sibScale);
  1878. } else {
  1879. switch (insn.eaBase) {
  1880. case EA_BASE_NONE:
  1881. if (insn.eaDisplacement == EA_DISP_NONE) {
  1882. debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
  1883. return true;
  1884. }
  1885. if (insn.mode == MODE_64BIT){
  1886. pcrel = insn.startLocation + insn.length;
  1887. Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
  1888. insn.startLocation +
  1889. insn.displacementOffset);
  1890. // Section 2.2.1.6
  1891. baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
  1892. X86::RIP);
  1893. }
  1894. else
  1895. baseReg = MCOperand::createReg(X86::NoRegister);
  1896. indexReg = MCOperand::createReg(X86::NoRegister);
  1897. break;
  1898. case EA_BASE_BX_SI:
  1899. baseReg = MCOperand::createReg(X86::BX);
  1900. indexReg = MCOperand::createReg(X86::SI);
  1901. break;
  1902. case EA_BASE_BX_DI:
  1903. baseReg = MCOperand::createReg(X86::BX);
  1904. indexReg = MCOperand::createReg(X86::DI);
  1905. break;
  1906. case EA_BASE_BP_SI:
  1907. baseReg = MCOperand::createReg(X86::BP);
  1908. indexReg = MCOperand::createReg(X86::SI);
  1909. break;
  1910. case EA_BASE_BP_DI:
  1911. baseReg = MCOperand::createReg(X86::BP);
  1912. indexReg = MCOperand::createReg(X86::DI);
  1913. break;
  1914. default:
  1915. indexReg = MCOperand::createReg(X86::NoRegister);
  1916. switch (insn.eaBase) {
  1917. default:
  1918. debug("Unexpected eaBase");
  1919. return true;
  1920. // Here, we will use the fill-ins defined above. However,
  1921. // BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
  1922. // sib and sib64 were handled in the top-level if, so they're only
  1923. // placeholders to keep the compiler happy.
  1924. #define ENTRY(x) \
  1925. case EA_BASE_##x: \
  1926. baseReg = MCOperand::createReg(X86::x); break;
  1927. ALL_EA_BASES
  1928. #undef ENTRY
  1929. #define ENTRY(x) case EA_REG_##x:
  1930. ALL_REGS
  1931. #undef ENTRY
  1932. debug("A R/M memory operand may not be a register; "
  1933. "the base field must be a base.");
  1934. return true;
  1935. }
  1936. }
  1937. scaleAmount = MCOperand::createImm(1);
  1938. }
  1939. displacement = MCOperand::createImm(insn.displacement);
  1940. segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
  1941. mcInst.addOperand(baseReg);
  1942. mcInst.addOperand(scaleAmount);
  1943. mcInst.addOperand(indexReg);
  1944. const uint8_t dispSize =
  1945. (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
  1946. if (!Dis->tryAddingSymbolicOperand(
  1947. mcInst, insn.displacement + pcrel, insn.startLocation, false,
  1948. insn.displacementOffset, dispSize, insn.length))
  1949. mcInst.addOperand(displacement);
  1950. mcInst.addOperand(segmentReg);
  1951. return false;
  1952. }
  1953. /// translateRM - Translates an operand stored in the R/M (and possibly SIB)
  1954. /// byte of an instruction to LLVM form, and appends it to an MCInst.
  1955. ///
  1956. /// @param mcInst - The MCInst to append to.
  1957. /// @param operand - The operand, as stored in the descriptor table.
  1958. /// @param insn - The instruction to extract Mod, R/M, and SIB fields
  1959. /// from.
  1960. /// @return - 0 on success; nonzero otherwise
  1961. static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
  1962. InternalInstruction &insn, const MCDisassembler *Dis) {
  1963. switch (operand.type) {
  1964. default:
  1965. debug("Unexpected type for a R/M operand");
  1966. return true;
  1967. case TYPE_R8:
  1968. case TYPE_R16:
  1969. case TYPE_R32:
  1970. case TYPE_R64:
  1971. case TYPE_Rv:
  1972. case TYPE_MM64:
  1973. case TYPE_XMM:
  1974. case TYPE_YMM:
  1975. case TYPE_ZMM:
  1976. case TYPE_TMM:
  1977. case TYPE_VK_PAIR:
  1978. case TYPE_VK:
  1979. case TYPE_DEBUGREG:
  1980. case TYPE_CONTROLREG:
  1981. case TYPE_BNDR:
  1982. return translateRMRegister(mcInst, insn);
  1983. case TYPE_M:
  1984. case TYPE_MVSIBX:
  1985. case TYPE_MVSIBY:
  1986. case TYPE_MVSIBZ:
  1987. return translateRMMemory(mcInst, insn, Dis);
  1988. case TYPE_MSIB:
  1989. return translateRMMemory(mcInst, insn, Dis, true);
  1990. }
  1991. }
  1992. /// translateFPRegister - Translates a stack position on the FPU stack to its
  1993. /// LLVM form, and appends it to an MCInst.
  1994. ///
  1995. /// @param mcInst - The MCInst to append to.
  1996. /// @param stackPos - The stack position to translate.
  1997. static void translateFPRegister(MCInst &mcInst,
  1998. uint8_t stackPos) {
  1999. mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
  2000. }
  2001. /// translateMaskRegister - Translates a 3-bit mask register number to
  2002. /// LLVM form, and appends it to an MCInst.
  2003. ///
  2004. /// @param mcInst - The MCInst to append to.
  2005. /// @param maskRegNum - Number of mask register from 0 to 7.
  2006. /// @return - false on success; true otherwise.
  2007. static bool translateMaskRegister(MCInst &mcInst,
  2008. uint8_t maskRegNum) {
  2009. if (maskRegNum >= 8) {
  2010. debug("Invalid mask register number");
  2011. return true;
  2012. }
  2013. mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
  2014. return false;
  2015. }
  2016. /// translateOperand - Translates an operand stored in an internal instruction
  2017. /// to LLVM's format and appends it to an MCInst.
  2018. ///
  2019. /// @param mcInst - The MCInst to append to.
  2020. /// @param operand - The operand, as stored in the descriptor table.
  2021. /// @param insn - The internal instruction.
  2022. /// @return - false on success; true otherwise.
  2023. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
  2024. InternalInstruction &insn,
  2025. const MCDisassembler *Dis) {
  2026. switch (operand.encoding) {
  2027. default:
  2028. debug("Unhandled operand encoding during translation");
  2029. return true;
  2030. case ENCODING_REG:
  2031. translateRegister(mcInst, insn.reg);
  2032. return false;
  2033. case ENCODING_WRITEMASK:
  2034. return translateMaskRegister(mcInst, insn.writemask);
  2035. case ENCODING_SIB:
  2036. CASE_ENCODING_RM:
  2037. CASE_ENCODING_VSIB:
  2038. return translateRM(mcInst, operand, insn, Dis);
  2039. case ENCODING_IB:
  2040. case ENCODING_IW:
  2041. case ENCODING_ID:
  2042. case ENCODING_IO:
  2043. case ENCODING_Iv:
  2044. case ENCODING_Ia:
  2045. translateImmediate(mcInst,
  2046. insn.immediates[insn.numImmediatesTranslated++],
  2047. operand,
  2048. insn,
  2049. Dis);
  2050. return false;
  2051. case ENCODING_IRC:
  2052. mcInst.addOperand(MCOperand::createImm(insn.RC));
  2053. return false;
  2054. case ENCODING_SI:
  2055. return translateSrcIndex(mcInst, insn);
  2056. case ENCODING_DI:
  2057. return translateDstIndex(mcInst, insn);
  2058. case ENCODING_RB:
  2059. case ENCODING_RW:
  2060. case ENCODING_RD:
  2061. case ENCODING_RO:
  2062. case ENCODING_Rv:
  2063. translateRegister(mcInst, insn.opcodeRegister);
  2064. return false;
  2065. case ENCODING_CC:
  2066. mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
  2067. return false;
  2068. case ENCODING_FP:
  2069. translateFPRegister(mcInst, insn.modRM & 7);
  2070. return false;
  2071. case ENCODING_VVVV:
  2072. translateRegister(mcInst, insn.vvvv);
  2073. return false;
  2074. case ENCODING_DUP:
  2075. return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
  2076. insn, Dis);
  2077. }
  2078. }
  2079. /// translateInstruction - Translates an internal instruction and all its
  2080. /// operands to an MCInst.
  2081. ///
  2082. /// @param mcInst - The MCInst to populate with the instruction's data.
  2083. /// @param insn - The internal instruction.
  2084. /// @return - false on success; true otherwise.
  2085. static bool translateInstruction(MCInst &mcInst,
  2086. InternalInstruction &insn,
  2087. const MCDisassembler *Dis) {
  2088. if (!insn.spec) {
  2089. debug("Instruction has no specification");
  2090. return true;
  2091. }
  2092. mcInst.clear();
  2093. mcInst.setOpcode(insn.instructionID);
  2094. // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
  2095. // prefix bytes should be disassembled as xrelease and xacquire then set the
  2096. // opcode to those instead of the rep and repne opcodes.
  2097. if (insn.xAcquireRelease) {
  2098. if(mcInst.getOpcode() == X86::REP_PREFIX)
  2099. mcInst.setOpcode(X86::XRELEASE_PREFIX);
  2100. else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
  2101. mcInst.setOpcode(X86::XACQUIRE_PREFIX);
  2102. }
  2103. insn.numImmediatesTranslated = 0;
  2104. for (const auto &Op : insn.operands) {
  2105. if (Op.encoding != ENCODING_NONE) {
  2106. if (translateOperand(mcInst, Op, insn, Dis)) {
  2107. return true;
  2108. }
  2109. }
  2110. }
  2111. return false;
  2112. }
  2113. static MCDisassembler *createX86Disassembler(const Target &T,
  2114. const MCSubtargetInfo &STI,
  2115. MCContext &Ctx) {
  2116. std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
  2117. return new X86GenericDisassembler(STI, Ctx, std::move(MII));
  2118. }
  2119. extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
  2120. // Register the disassembler.
  2121. TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
  2122. createX86Disassembler);
  2123. TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),
  2124. createX86Disassembler);
  2125. }