sljitNativeX86_common.c 82 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification, are
  7. * permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this list of
  10. * conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  13. * of conditions and the following disclaimer in the documentation and/or other materials
  14. * provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
  27. {
  28. #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
  29. return "x86" SLJIT_CPUINFO " ABI:fastcall";
  30. #else
  31. return "x86" SLJIT_CPUINFO;
  32. #endif
  33. }
  34. /*
  35. 32b register indexes:
  36. 0 - EAX
  37. 1 - ECX
  38. 2 - EDX
  39. 3 - EBX
  40. 4 - ESP
  41. 5 - EBP
  42. 6 - ESI
  43. 7 - EDI
  44. */
  45. /*
  46. 64b register indexes:
  47. 0 - RAX
  48. 1 - RCX
  49. 2 - RDX
  50. 3 - RBX
  51. 4 - RSP
  52. 5 - RBP
  53. 6 - RSI
  54. 7 - RDI
  55. 8 - R8 - From now on REX prefix is required
  56. 9 - R9
  57. 10 - R10
  58. 11 - R11
  59. 12 - R12
  60. 13 - R13
  61. 14 - R14
  62. 15 - R15
  63. */
  64. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  65. /* Last register + 1. */
  66. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  67. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
  68. 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
  69. };
  70. #define CHECK_EXTRA_REGS(p, w, do) \
  71. if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
  72. if (p <= compiler->scratches) \
  73. w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
  74. else \
  75. w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
  76. p = SLJIT_MEM1(SLJIT_SP); \
  77. do; \
  78. }
  79. #else /* SLJIT_CONFIG_X86_32 */
  80. /* Last register + 1. */
  81. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  82. #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
  83. /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
  84. Note: avoid to use r12 and r13 for memory addessing
  85. therefore r12 is better to be a higher saved register. */
  86. #ifndef _WIN64
  87. /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
  88. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  89. 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
  90. };
  91. /* low-map. reg_map & 0x7. */
  92. static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  93. 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
  94. };
  95. #else
  96. /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
  97. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  98. 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
  99. };
  100. /* low-map. reg_map & 0x7. */
  101. static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
  102. 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
  103. };
  104. #endif
  105. /* Args: xmm0-xmm3 */
  106. static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  107. 4, 0, 1, 2, 3, 5, 6
  108. };
  109. /* low-map. freg_map & 0x7. */
  110. static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
  111. 4, 0, 1, 2, 3, 5, 6
  112. };
  113. #define REX_W 0x48
  114. #define REX_R 0x44
  115. #define REX_X 0x42
  116. #define REX_B 0x41
  117. #define REX 0x40
  118. #ifndef _WIN64
  119. #define HALFWORD_MAX 0x7fffffffl
  120. #define HALFWORD_MIN -0x80000000l
  121. #else
  122. #define HALFWORD_MAX 0x7fffffffll
  123. #define HALFWORD_MIN -0x80000000ll
  124. #endif
  125. #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
  126. #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
  127. #define CHECK_EXTRA_REGS(p, w, do)
  128. #endif /* SLJIT_CONFIG_X86_32 */
  129. #define TMP_FREG (0)
  130. /* Size flags for emit_x86_instruction: */
  131. #define EX86_BIN_INS 0x0010
  132. #define EX86_SHIFT_INS 0x0020
  133. #define EX86_REX 0x0040
  134. #define EX86_NO_REXW 0x0080
  135. #define EX86_BYTE_ARG 0x0100
  136. #define EX86_HALF_ARG 0x0200
  137. #define EX86_PREF_66 0x0400
  138. #define EX86_PREF_F2 0x0800
  139. #define EX86_PREF_F3 0x1000
  140. #define EX86_SSE2_OP1 0x2000
  141. #define EX86_SSE2_OP2 0x4000
  142. #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
  143. /* --------------------------------------------------------------------- */
  144. /* Instrucion forms */
  145. /* --------------------------------------------------------------------- */
  146. #define ADD (/* BINARY */ 0 << 3)
  147. #define ADD_EAX_i32 0x05
  148. #define ADD_r_rm 0x03
  149. #define ADD_rm_r 0x01
  150. #define ADDSD_x_xm 0x58
  151. #define ADC (/* BINARY */ 2 << 3)
  152. #define ADC_EAX_i32 0x15
  153. #define ADC_r_rm 0x13
  154. #define ADC_rm_r 0x11
  155. #define AND (/* BINARY */ 4 << 3)
  156. #define AND_EAX_i32 0x25
  157. #define AND_r_rm 0x23
  158. #define AND_rm_r 0x21
  159. #define ANDPD_x_xm 0x54
  160. #define BSR_r_rm (/* GROUP_0F */ 0xbd)
  161. #define CALL_i32 0xe8
  162. #define CALL_rm (/* GROUP_FF */ 2 << 3)
  163. #define CDQ 0x99
  164. #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
  165. #define CMP (/* BINARY */ 7 << 3)
  166. #define CMP_EAX_i32 0x3d
  167. #define CMP_r_rm 0x3b
  168. #define CMP_rm_r 0x39
  169. #define CVTPD2PS_x_xm 0x5a
  170. #define CVTSI2SD_x_rm 0x2a
  171. #define CVTTSD2SI_r_xm 0x2c
  172. #define DIV (/* GROUP_F7 */ 6 << 3)
  173. #define DIVSD_x_xm 0x5e
  174. #define FSTPS 0xd9
  175. #define FSTPD 0xdd
  176. #define INT3 0xcc
  177. #define IDIV (/* GROUP_F7 */ 7 << 3)
  178. #define IMUL (/* GROUP_F7 */ 5 << 3)
  179. #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
  180. #define IMUL_r_rm_i8 0x6b
  181. #define IMUL_r_rm_i32 0x69
  182. #define JE_i8 0x74
  183. #define JNE_i8 0x75
  184. #define JMP_i8 0xeb
  185. #define JMP_i32 0xe9
  186. #define JMP_rm (/* GROUP_FF */ 4 << 3)
  187. #define LEA_r_m 0x8d
  188. #define MOV_r_rm 0x8b
  189. #define MOV_r_i32 0xb8
  190. #define MOV_rm_r 0x89
  191. #define MOV_rm_i32 0xc7
  192. #define MOV_rm8_i8 0xc6
  193. #define MOV_rm8_r8 0x88
  194. #define MOVSD_x_xm 0x10
  195. #define MOVSD_xm_x 0x11
  196. #define MOVSXD_r_rm 0x63
  197. #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
  198. #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
  199. #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
  200. #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
  201. #define MUL (/* GROUP_F7 */ 4 << 3)
  202. #define MULSD_x_xm 0x59
  203. #define NEG_rm (/* GROUP_F7 */ 3 << 3)
  204. #define NOP 0x90
  205. #define NOT_rm (/* GROUP_F7 */ 2 << 3)
  206. #define OR (/* BINARY */ 1 << 3)
  207. #define OR_r_rm 0x0b
  208. #define OR_EAX_i32 0x0d
  209. #define OR_rm_r 0x09
  210. #define OR_rm8_r8 0x08
  211. #define POP_r 0x58
  212. #define POP_rm 0x8f
  213. #define POPF 0x9d
  214. #define PREFETCH 0x18
  215. #define PUSH_i32 0x68
  216. #define PUSH_r 0x50
  217. #define PUSH_rm (/* GROUP_FF */ 6 << 3)
  218. #define PUSHF 0x9c
  219. #define RET_near 0xc3
  220. #define RET_i16 0xc2
  221. #define SBB (/* BINARY */ 3 << 3)
  222. #define SBB_EAX_i32 0x1d
  223. #define SBB_r_rm 0x1b
  224. #define SBB_rm_r 0x19
  225. #define SAR (/* SHIFT */ 7 << 3)
  226. #define SHL (/* SHIFT */ 4 << 3)
  227. #define SHR (/* SHIFT */ 5 << 3)
  228. #define SUB (/* BINARY */ 5 << 3)
  229. #define SUB_EAX_i32 0x2d
  230. #define SUB_r_rm 0x2b
  231. #define SUB_rm_r 0x29
  232. #define SUBSD_x_xm 0x5c
  233. #define TEST_EAX_i32 0xa9
  234. #define TEST_rm_r 0x85
  235. #define UCOMISD_x_xm 0x2e
  236. #define UNPCKLPD_x_xm 0x14
  237. #define XCHG_EAX_r 0x90
  238. #define XCHG_r_rm 0x87
  239. #define XOR (/* BINARY */ 6 << 3)
  240. #define XOR_EAX_i32 0x35
  241. #define XOR_r_rm 0x33
  242. #define XOR_rm_r 0x31
  243. #define XORPD_x_xm 0x57
  244. #define GROUP_0F 0x0f
  245. #define GROUP_F7 0xf7
  246. #define GROUP_FF 0xff
  247. #define GROUP_BINARY_81 0x81
  248. #define GROUP_BINARY_83 0x83
  249. #define GROUP_SHIFT_1 0xd1
  250. #define GROUP_SHIFT_N 0xc1
  251. #define GROUP_SHIFT_CL 0xd3
  252. #define MOD_REG 0xc0
  253. #define MOD_DISP8 0x40
  254. #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
  255. #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
  256. #define POP_REG(r) (*inst++ = (POP_r + (r)))
  257. #define RET() (*inst++ = (RET_near))
  258. #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
  259. /* r32, r/m32 */
  260. #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
  261. /* Multithreading does not affect these static variables, since they store
  262. built-in CPU features. Therefore they can be overwritten by different threads
  263. if they detect the CPU features in the same time. */
  264. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  265. static sljit_s32 cpu_has_sse2 = -1;
  266. #endif
  267. static sljit_s32 cpu_has_cmov = -1;
  268. #ifdef _WIN32_WCE
  269. #include <cmnintrin.h>
  270. #elif defined(_MSC_VER) && _MSC_VER >= 1400
  271. #include <intrin.h>
  272. #endif
  273. /******************************************************/
  274. /* Unaligned-store functions */
  275. /******************************************************/
  276. static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
  277. {
  278. SLJIT_MEMCPY(addr, &value, sizeof(value));
  279. }
  280. static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
  281. {
  282. SLJIT_MEMCPY(addr, &value, sizeof(value));
  283. }
  284. static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
  285. {
  286. SLJIT_MEMCPY(addr, &value, sizeof(value));
  287. }
  288. /******************************************************/
  289. /* Utility functions */
  290. /******************************************************/
  291. static void get_cpu_features(void)
  292. {
  293. sljit_u32 features;
  294. #if defined(_MSC_VER) && _MSC_VER >= 1400
  295. int CPUInfo[4];
  296. __cpuid(CPUInfo, 1);
  297. features = (sljit_u32)CPUInfo[3];
  298. #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
  299. /* AT&T syntax. */
  300. __asm__ (
  301. "movl $0x1, %%eax\n"
  302. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  303. /* On x86-32, there is no red zone, so this
  304. should work (no need for a local variable). */
  305. "push %%ebx\n"
  306. #endif
  307. "cpuid\n"
  308. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  309. "pop %%ebx\n"
  310. #endif
  311. "movl %%edx, %0\n"
  312. : "=g" (features)
  313. :
  314. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  315. : "%eax", "%ecx", "%edx"
  316. #else
  317. : "%rax", "%rbx", "%rcx", "%rdx"
  318. #endif
  319. );
  320. #else /* _MSC_VER && _MSC_VER >= 1400 */
  321. /* Intel syntax. */
  322. __asm {
  323. mov eax, 1
  324. cpuid
  325. mov features, edx
  326. }
  327. #endif /* _MSC_VER && _MSC_VER >= 1400 */
  328. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  329. cpu_has_sse2 = (features >> 26) & 0x1;
  330. #endif
  331. cpu_has_cmov = (features >> 15) & 0x1;
  332. }
  333. static sljit_u8 get_jump_code(sljit_s32 type)
  334. {
  335. switch (type) {
  336. case SLJIT_EQUAL:
  337. case SLJIT_EQUAL_F64:
  338. return 0x84 /* je */;
  339. case SLJIT_NOT_EQUAL:
  340. case SLJIT_NOT_EQUAL_F64:
  341. return 0x85 /* jne */;
  342. case SLJIT_LESS:
  343. case SLJIT_LESS_F64:
  344. return 0x82 /* jc */;
  345. case SLJIT_GREATER_EQUAL:
  346. case SLJIT_GREATER_EQUAL_F64:
  347. return 0x83 /* jae */;
  348. case SLJIT_GREATER:
  349. case SLJIT_GREATER_F64:
  350. return 0x87 /* jnbe */;
  351. case SLJIT_LESS_EQUAL:
  352. case SLJIT_LESS_EQUAL_F64:
  353. return 0x86 /* jbe */;
  354. case SLJIT_SIG_LESS:
  355. return 0x8c /* jl */;
  356. case SLJIT_SIG_GREATER_EQUAL:
  357. return 0x8d /* jnl */;
  358. case SLJIT_SIG_GREATER:
  359. return 0x8f /* jnle */;
  360. case SLJIT_SIG_LESS_EQUAL:
  361. return 0x8e /* jle */;
  362. case SLJIT_OVERFLOW:
  363. case SLJIT_MUL_OVERFLOW:
  364. return 0x80 /* jo */;
  365. case SLJIT_NOT_OVERFLOW:
  366. case SLJIT_MUL_NOT_OVERFLOW:
  367. return 0x81 /* jno */;
  368. case SLJIT_UNORDERED_F64:
  369. return 0x8a /* jp */;
  370. case SLJIT_ORDERED_F64:
  371. return 0x8b /* jpo */;
  372. }
  373. return 0;
  374. }
  375. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  376. static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
  377. #else
  378. static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
  379. static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
  380. #endif
  381. static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
  382. {
  383. sljit_s32 type = jump->flags >> TYPE_SHIFT;
  384. sljit_s32 short_jump;
  385. sljit_uw label_addr;
  386. if (jump->flags & JUMP_LABEL)
  387. label_addr = (sljit_uw)(code + jump->u.label->size);
  388. else
  389. label_addr = jump->u.target - executable_offset;
  390. short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
  391. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  392. if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
  393. return generate_far_jump_code(jump, code_ptr);
  394. #endif
  395. if (type == SLJIT_JUMP) {
  396. if (short_jump)
  397. *code_ptr++ = JMP_i8;
  398. else
  399. *code_ptr++ = JMP_i32;
  400. jump->addr++;
  401. }
  402. else if (type >= SLJIT_FAST_CALL) {
  403. short_jump = 0;
  404. *code_ptr++ = CALL_i32;
  405. jump->addr++;
  406. }
  407. else if (short_jump) {
  408. *code_ptr++ = get_jump_code(type) - 0x10;
  409. jump->addr++;
  410. }
  411. else {
  412. *code_ptr++ = GROUP_0F;
  413. *code_ptr++ = get_jump_code(type);
  414. jump->addr += 2;
  415. }
  416. if (short_jump) {
  417. jump->flags |= PATCH_MB;
  418. code_ptr += sizeof(sljit_s8);
  419. } else {
  420. jump->flags |= PATCH_MW;
  421. code_ptr += sizeof(sljit_s32);
  422. }
  423. return code_ptr;
  424. }
  425. SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
  426. {
  427. struct sljit_memory_fragment *buf;
  428. sljit_u8 *code;
  429. sljit_u8 *code_ptr;
  430. sljit_u8 *buf_ptr;
  431. sljit_u8 *buf_end;
  432. sljit_u8 len;
  433. sljit_sw executable_offset;
  434. sljit_sw jump_addr;
  435. struct sljit_label *label;
  436. struct sljit_jump *jump;
  437. struct sljit_const *const_;
  438. struct sljit_put_label *put_label;
  439. CHECK_ERROR_PTR();
  440. CHECK_PTR(check_sljit_generate_code(compiler));
  441. reverse_buf(compiler);
  442. /* Second code generation pass. */
  443. code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
  444. PTR_FAIL_WITH_EXEC_IF(code);
  445. buf = compiler->buf;
  446. code_ptr = code;
  447. label = compiler->labels;
  448. jump = compiler->jumps;
  449. const_ = compiler->consts;
  450. put_label = compiler->put_labels;
  451. executable_offset = SLJIT_EXEC_OFFSET(code);
  452. do {
  453. buf_ptr = buf->memory;
  454. buf_end = buf_ptr + buf->used_size;
  455. do {
  456. len = *buf_ptr++;
  457. if (len > 0) {
  458. /* The code is already generated. */
  459. SLJIT_MEMCPY(code_ptr, buf_ptr, len);
  460. code_ptr += len;
  461. buf_ptr += len;
  462. }
  463. else {
  464. switch (*buf_ptr) {
  465. case 0:
  466. label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
  467. label->size = code_ptr - code;
  468. label = label->next;
  469. break;
  470. case 1:
  471. jump->addr = (sljit_uw)code_ptr;
  472. if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
  473. code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
  474. else {
  475. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  476. code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
  477. #else
  478. code_ptr = generate_far_jump_code(jump, code_ptr);
  479. #endif
  480. }
  481. jump = jump->next;
  482. break;
  483. case 2:
  484. const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
  485. const_ = const_->next;
  486. break;
  487. default:
  488. SLJIT_ASSERT(*buf_ptr == 3);
  489. SLJIT_ASSERT(put_label->label);
  490. put_label->addr = (sljit_uw)code_ptr;
  491. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  492. code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
  493. #endif
  494. put_label = put_label->next;
  495. break;
  496. }
  497. buf_ptr++;
  498. }
  499. } while (buf_ptr < buf_end);
  500. SLJIT_ASSERT(buf_ptr == buf_end);
  501. buf = buf->next;
  502. } while (buf);
  503. SLJIT_ASSERT(!label);
  504. SLJIT_ASSERT(!jump);
  505. SLJIT_ASSERT(!const_);
  506. SLJIT_ASSERT(!put_label);
  507. SLJIT_ASSERT(code_ptr <= code + compiler->size);
  508. jump = compiler->jumps;
  509. while (jump) {
  510. jump_addr = jump->addr + executable_offset;
  511. if (jump->flags & PATCH_MB) {
  512. SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
  513. *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
  514. } else if (jump->flags & PATCH_MW) {
  515. if (jump->flags & JUMP_LABEL) {
  516. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  517. sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
  518. #else
  519. SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  520. sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
  521. #endif
  522. }
  523. else {
  524. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  525. sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
  526. #else
  527. SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
  528. sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
  529. #endif
  530. }
  531. }
  532. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  533. else if (jump->flags & PATCH_MD)
  534. sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
  535. #endif
  536. jump = jump->next;
  537. }
  538. put_label = compiler->put_labels;
  539. while (put_label) {
  540. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  541. sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
  542. #else
  543. if (put_label->flags & PATCH_MD) {
  544. SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
  545. sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
  546. }
  547. else {
  548. SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
  549. sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
  550. }
  551. #endif
  552. put_label = put_label->next;
  553. }
  554. compiler->error = SLJIT_ERR_COMPILED;
  555. compiler->executable_offset = executable_offset;
  556. compiler->executable_size = code_ptr - code;
  557. return (void*)(code + executable_offset);
  558. }
  559. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
  560. {
  561. switch (feature_type) {
  562. case SLJIT_HAS_FPU:
  563. #ifdef SLJIT_IS_FPU_AVAILABLE
  564. return SLJIT_IS_FPU_AVAILABLE;
  565. #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  566. if (cpu_has_sse2 == -1)
  567. get_cpu_features();
  568. return cpu_has_sse2;
  569. #else /* SLJIT_DETECT_SSE2 */
  570. return 1;
  571. #endif /* SLJIT_DETECT_SSE2 */
  572. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  573. case SLJIT_HAS_VIRTUAL_REGISTERS:
  574. return 1;
  575. #endif
  576. case SLJIT_HAS_CLZ:
  577. case SLJIT_HAS_CMOV:
  578. if (cpu_has_cmov == -1)
  579. get_cpu_features();
  580. return cpu_has_cmov;
  581. case SLJIT_HAS_SSE2:
  582. #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
  583. if (cpu_has_sse2 == -1)
  584. get_cpu_features();
  585. return cpu_has_sse2;
  586. #else
  587. return 1;
  588. #endif
  589. default:
  590. return 0;
  591. }
  592. }
  593. /* --------------------------------------------------------------------- */
  594. /* Operators */
  595. /* --------------------------------------------------------------------- */
  596. #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
  597. static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
  598. sljit_u32 op_types,
  599. sljit_s32 dst, sljit_sw dstw,
  600. sljit_s32 src1, sljit_sw src1w,
  601. sljit_s32 src2, sljit_sw src2w);
  602. static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
  603. sljit_u32 op_types,
  604. sljit_s32 dst, sljit_sw dstw,
  605. sljit_s32 src1, sljit_sw src1w,
  606. sljit_s32 src2, sljit_sw src2w);
  607. static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  608. sljit_s32 dst, sljit_sw dstw,
  609. sljit_s32 src, sljit_sw srcw);
  610. #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
  611. FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  612. static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
  613. sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
  614. static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
  615. sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
  616. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  617. #include "sljitNativeX86_32.c"
  618. #else
  619. #include "sljitNativeX86_64.c"
  620. #endif
  621. static sljit_s32 emit_mov(struct sljit_compiler *compiler,
  622. sljit_s32 dst, sljit_sw dstw,
  623. sljit_s32 src, sljit_sw srcw)
  624. {
  625. sljit_u8* inst;
  626. SLJIT_ASSERT(dst != SLJIT_UNUSED);
  627. if (FAST_IS_REG(src)) {
  628. inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
  629. FAIL_IF(!inst);
  630. *inst = MOV_rm_r;
  631. return SLJIT_SUCCESS;
  632. }
  633. if (src & SLJIT_IMM) {
  634. if (FAST_IS_REG(dst)) {
  635. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  636. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  637. #else
  638. if (!compiler->mode32) {
  639. if (NOT_HALFWORD(srcw))
  640. return emit_load_imm64(compiler, dst, srcw);
  641. }
  642. else
  643. return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
  644. #endif
  645. }
  646. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  647. if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
  648. /* Immediate to memory move. Only SLJIT_MOV operation copies
  649. an immediate directly into memory so TMP_REG1 can be used. */
  650. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
  651. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  652. FAIL_IF(!inst);
  653. *inst = MOV_rm_r;
  654. return SLJIT_SUCCESS;
  655. }
  656. #endif
  657. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
  658. FAIL_IF(!inst);
  659. *inst = MOV_rm_i32;
  660. return SLJIT_SUCCESS;
  661. }
  662. if (FAST_IS_REG(dst)) {
  663. inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
  664. FAIL_IF(!inst);
  665. *inst = MOV_r_rm;
  666. return SLJIT_SUCCESS;
  667. }
  668. /* Memory to memory move. Only SLJIT_MOV operation copies
  669. data from memory to memory so TMP_REG1 can be used. */
  670. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
  671. FAIL_IF(!inst);
  672. *inst = MOV_r_rm;
  673. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  674. FAIL_IF(!inst);
  675. *inst = MOV_rm_r;
  676. return SLJIT_SUCCESS;
  677. }
  678. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
  679. {
  680. sljit_u8 *inst;
  681. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  682. sljit_s32 size;
  683. #endif
  684. CHECK_ERROR();
  685. CHECK(check_sljit_emit_op0(compiler, op));
  686. switch (GET_OPCODE(op)) {
  687. case SLJIT_BREAKPOINT:
  688. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  689. FAIL_IF(!inst);
  690. INC_SIZE(1);
  691. *inst = INT3;
  692. break;
  693. case SLJIT_NOP:
  694. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  695. FAIL_IF(!inst);
  696. INC_SIZE(1);
  697. *inst = NOP;
  698. break;
  699. case SLJIT_LMUL_UW:
  700. case SLJIT_LMUL_SW:
  701. case SLJIT_DIVMOD_UW:
  702. case SLJIT_DIVMOD_SW:
  703. case SLJIT_DIV_UW:
  704. case SLJIT_DIV_SW:
  705. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  706. #ifdef _WIN64
  707. SLJIT_ASSERT(
  708. reg_map[SLJIT_R0] == 0
  709. && reg_map[SLJIT_R1] == 2
  710. && reg_map[TMP_REG1] > 7);
  711. #else
  712. SLJIT_ASSERT(
  713. reg_map[SLJIT_R0] == 0
  714. && reg_map[SLJIT_R1] < 7
  715. && reg_map[TMP_REG1] == 2);
  716. #endif
  717. compiler->mode32 = op & SLJIT_I32_OP;
  718. #endif
  719. SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
  720. op = GET_OPCODE(op);
  721. if ((op | 0x2) == SLJIT_DIV_UW) {
  722. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  723. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  724. inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
  725. #else
  726. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  727. #endif
  728. FAIL_IF(!inst);
  729. *inst = XOR_r_rm;
  730. }
  731. if ((op | 0x2) == SLJIT_DIV_SW) {
  732. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
  733. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
  734. #endif
  735. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  736. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  737. FAIL_IF(!inst);
  738. INC_SIZE(1);
  739. *inst = CDQ;
  740. #else
  741. if (compiler->mode32) {
  742. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  743. FAIL_IF(!inst);
  744. INC_SIZE(1);
  745. *inst = CDQ;
  746. } else {
  747. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  748. FAIL_IF(!inst);
  749. INC_SIZE(2);
  750. *inst++ = REX_W;
  751. *inst = CDQ;
  752. }
  753. #endif
  754. }
  755. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  756. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  757. FAIL_IF(!inst);
  758. INC_SIZE(2);
  759. *inst++ = GROUP_F7;
  760. *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
  761. #else
  762. #ifdef _WIN64
  763. size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
  764. #else
  765. size = (!compiler->mode32) ? 3 : 2;
  766. #endif
  767. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  768. FAIL_IF(!inst);
  769. INC_SIZE(size);
  770. #ifdef _WIN64
  771. if (!compiler->mode32)
  772. *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
  773. else if (op >= SLJIT_DIVMOD_UW)
  774. *inst++ = REX_B;
  775. *inst++ = GROUP_F7;
  776. *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
  777. #else
  778. if (!compiler->mode32)
  779. *inst++ = REX_W;
  780. *inst++ = GROUP_F7;
  781. *inst = MOD_REG | reg_map[SLJIT_R1];
  782. #endif
  783. #endif
  784. switch (op) {
  785. case SLJIT_LMUL_UW:
  786. *inst |= MUL;
  787. break;
  788. case SLJIT_LMUL_SW:
  789. *inst |= IMUL;
  790. break;
  791. case SLJIT_DIVMOD_UW:
  792. case SLJIT_DIV_UW:
  793. *inst |= DIV;
  794. break;
  795. case SLJIT_DIVMOD_SW:
  796. case SLJIT_DIV_SW:
  797. *inst |= IDIV;
  798. break;
  799. }
  800. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
  801. if (op <= SLJIT_DIVMOD_SW)
  802. EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  803. #else
  804. if (op >= SLJIT_DIV_UW)
  805. EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
  806. #endif
  807. break;
  808. }
  809. return SLJIT_SUCCESS;
  810. }
  811. #define ENCODE_PREFIX(prefix) \
  812. do { \
  813. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
  814. FAIL_IF(!inst); \
  815. INC_SIZE(1); \
  816. *inst = (prefix); \
  817. } while (0)
  818. static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
  819. sljit_s32 dst, sljit_sw dstw,
  820. sljit_s32 src, sljit_sw srcw)
  821. {
  822. sljit_u8* inst;
  823. sljit_s32 dst_r;
  824. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  825. sljit_s32 work_r;
  826. #endif
  827. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  828. compiler->mode32 = 0;
  829. #endif
  830. if (src & SLJIT_IMM) {
  831. if (FAST_IS_REG(dst)) {
  832. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  833. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  834. #else
  835. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  836. FAIL_IF(!inst);
  837. *inst = MOV_rm_i32;
  838. return SLJIT_SUCCESS;
  839. #endif
  840. }
  841. inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
  842. FAIL_IF(!inst);
  843. *inst = MOV_rm8_i8;
  844. return SLJIT_SUCCESS;
  845. }
  846. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  847. if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
  848. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  849. if (reg_map[src] >= 4) {
  850. SLJIT_ASSERT(dst_r == TMP_REG1);
  851. EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
  852. } else
  853. dst_r = src;
  854. #else
  855. dst_r = src;
  856. #endif
  857. }
  858. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  859. else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
  860. /* src, dst are registers. */
  861. SLJIT_ASSERT(SLOW_IS_REG(dst));
  862. if (reg_map[dst] < 4) {
  863. if (dst != src)
  864. EMIT_MOV(compiler, dst, 0, src, 0);
  865. inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
  866. FAIL_IF(!inst);
  867. *inst++ = GROUP_0F;
  868. *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  869. }
  870. else {
  871. if (dst != src)
  872. EMIT_MOV(compiler, dst, 0, src, 0);
  873. if (sign) {
  874. /* shl reg, 24 */
  875. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  876. FAIL_IF(!inst);
  877. *inst |= SHL;
  878. /* sar reg, 24 */
  879. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
  880. FAIL_IF(!inst);
  881. *inst |= SAR;
  882. }
  883. else {
  884. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
  885. FAIL_IF(!inst);
  886. *(inst + 1) |= AND;
  887. }
  888. }
  889. return SLJIT_SUCCESS;
  890. }
  891. #endif
  892. else {
  893. /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
  894. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  895. FAIL_IF(!inst);
  896. *inst++ = GROUP_0F;
  897. *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
  898. }
  899. if (dst & SLJIT_MEM) {
  900. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  901. if (dst_r == TMP_REG1) {
  902. /* Find a non-used register, whose reg_map[src] < 4. */
  903. if ((dst & REG_MASK) == SLJIT_R0) {
  904. if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
  905. work_r = SLJIT_R2;
  906. else
  907. work_r = SLJIT_R1;
  908. }
  909. else {
  910. if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
  911. work_r = SLJIT_R0;
  912. else if ((dst & REG_MASK) == SLJIT_R1)
  913. work_r = SLJIT_R2;
  914. else
  915. work_r = SLJIT_R1;
  916. }
  917. if (work_r == SLJIT_R0) {
  918. ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  919. }
  920. else {
  921. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  922. FAIL_IF(!inst);
  923. *inst = XCHG_r_rm;
  924. }
  925. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
  926. FAIL_IF(!inst);
  927. *inst = MOV_rm8_r8;
  928. if (work_r == SLJIT_R0) {
  929. ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
  930. }
  931. else {
  932. inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
  933. FAIL_IF(!inst);
  934. *inst = XCHG_r_rm;
  935. }
  936. }
  937. else {
  938. inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  939. FAIL_IF(!inst);
  940. *inst = MOV_rm8_r8;
  941. }
  942. #else
  943. inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
  944. FAIL_IF(!inst);
  945. *inst = MOV_rm8_r8;
  946. #endif
  947. }
  948. return SLJIT_SUCCESS;
  949. }
  950. static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
  951. sljit_s32 src, sljit_sw srcw)
  952. {
  953. sljit_u8* inst;
  954. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  955. compiler->mode32 = 1;
  956. #endif
  957. inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
  958. FAIL_IF(!inst);
  959. *inst++ = GROUP_0F;
  960. *inst++ = PREFETCH;
  961. if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
  962. *inst |= (3 << 3);
  963. else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
  964. *inst |= (2 << 3);
  965. else
  966. *inst |= (1 << 3);
  967. return SLJIT_SUCCESS;
  968. }
  969. static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
  970. sljit_s32 dst, sljit_sw dstw,
  971. sljit_s32 src, sljit_sw srcw)
  972. {
  973. sljit_u8* inst;
  974. sljit_s32 dst_r;
  975. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  976. compiler->mode32 = 0;
  977. #endif
  978. if (src & SLJIT_IMM) {
  979. if (FAST_IS_REG(dst)) {
  980. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  981. return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
  982. #else
  983. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
  984. FAIL_IF(!inst);
  985. *inst = MOV_rm_i32;
  986. return SLJIT_SUCCESS;
  987. #endif
  988. }
  989. inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
  990. FAIL_IF(!inst);
  991. *inst = MOV_rm_i32;
  992. return SLJIT_SUCCESS;
  993. }
  994. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  995. if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  996. dst_r = src;
  997. else {
  998. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  999. FAIL_IF(!inst);
  1000. *inst++ = GROUP_0F;
  1001. *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
  1002. }
  1003. if (dst & SLJIT_MEM) {
  1004. inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
  1005. FAIL_IF(!inst);
  1006. *inst = MOV_rm_r;
  1007. }
  1008. return SLJIT_SUCCESS;
  1009. }
  1010. static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
  1011. sljit_s32 dst, sljit_sw dstw,
  1012. sljit_s32 src, sljit_sw srcw)
  1013. {
  1014. sljit_u8* inst;
  1015. if (dst == src && dstw == srcw) {
  1016. /* Same input and output */
  1017. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  1018. FAIL_IF(!inst);
  1019. *inst++ = GROUP_F7;
  1020. *inst |= opcode;
  1021. return SLJIT_SUCCESS;
  1022. }
  1023. if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
  1024. dst = TMP_REG1;
  1025. if (FAST_IS_REG(dst)) {
  1026. EMIT_MOV(compiler, dst, 0, src, srcw);
  1027. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
  1028. FAIL_IF(!inst);
  1029. *inst++ = GROUP_F7;
  1030. *inst |= opcode;
  1031. return SLJIT_SUCCESS;
  1032. }
  1033. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1034. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1035. FAIL_IF(!inst);
  1036. *inst++ = GROUP_F7;
  1037. *inst |= opcode;
  1038. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1039. return SLJIT_SUCCESS;
  1040. }
  1041. static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
  1042. sljit_s32 dst, sljit_sw dstw,
  1043. sljit_s32 src, sljit_sw srcw)
  1044. {
  1045. sljit_u8* inst;
  1046. if (dst == SLJIT_UNUSED)
  1047. dst = TMP_REG1;
  1048. if (FAST_IS_REG(dst)) {
  1049. EMIT_MOV(compiler, dst, 0, src, srcw);
  1050. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
  1051. FAIL_IF(!inst);
  1052. *inst++ = GROUP_F7;
  1053. *inst |= NOT_rm;
  1054. inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
  1055. FAIL_IF(!inst);
  1056. *inst = OR_r_rm;
  1057. return SLJIT_SUCCESS;
  1058. }
  1059. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  1060. inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
  1061. FAIL_IF(!inst);
  1062. *inst++ = GROUP_F7;
  1063. *inst |= NOT_rm;
  1064. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
  1065. FAIL_IF(!inst);
  1066. *inst = OR_r_rm;
  1067. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1068. return SLJIT_SUCCESS;
  1069. }
  1070. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1071. static const sljit_sw emit_clz_arg = 32 + 31;
  1072. #endif
  1073. static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
  1074. sljit_s32 dst, sljit_sw dstw,
  1075. sljit_s32 src, sljit_sw srcw)
  1076. {
  1077. sljit_u8* inst;
  1078. sljit_s32 dst_r;
  1079. SLJIT_UNUSED_ARG(op_flags);
  1080. if (cpu_has_cmov == -1)
  1081. get_cpu_features();
  1082. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  1083. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
  1084. FAIL_IF(!inst);
  1085. *inst++ = GROUP_0F;
  1086. *inst = BSR_r_rm;
  1087. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1088. if (cpu_has_cmov) {
  1089. if (dst_r != TMP_REG1) {
  1090. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
  1091. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
  1092. }
  1093. else
  1094. inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
  1095. FAIL_IF(!inst);
  1096. *inst++ = GROUP_0F;
  1097. *inst = CMOVE_r_rm;
  1098. }
  1099. else
  1100. FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
  1101. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
  1102. #else
  1103. if (cpu_has_cmov) {
  1104. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
  1105. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1106. FAIL_IF(!inst);
  1107. *inst++ = GROUP_0F;
  1108. *inst = CMOVE_r_rm;
  1109. }
  1110. else
  1111. FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
  1112. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
  1113. #endif
  1114. FAIL_IF(!inst);
  1115. *(inst + 1) |= XOR;
  1116. if (dst & SLJIT_MEM)
  1117. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1118. return SLJIT_SUCCESS;
  1119. }
  1120. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
  1121. sljit_s32 dst, sljit_sw dstw,
  1122. sljit_s32 src, sljit_sw srcw)
  1123. {
  1124. sljit_s32 op_flags = GET_ALL_FLAGS(op);
  1125. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1126. sljit_s32 dst_is_ereg = 0;
  1127. #endif
  1128. CHECK_ERROR();
  1129. CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
  1130. ADJUST_LOCAL_OFFSET(dst, dstw);
  1131. ADJUST_LOCAL_OFFSET(src, srcw);
  1132. CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
  1133. CHECK_EXTRA_REGS(src, srcw, (void)0);
  1134. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1135. compiler->mode32 = op_flags & SLJIT_I32_OP;
  1136. #endif
  1137. if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
  1138. if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
  1139. return emit_prefetch(compiler, op, src, srcw);
  1140. return SLJIT_SUCCESS;
  1141. }
  1142. op = GET_OPCODE(op);
  1143. if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
  1144. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1145. compiler->mode32 = 0;
  1146. #endif
  1147. if (FAST_IS_REG(src) && src == dst) {
  1148. if (!TYPE_CAST_NEEDED(op))
  1149. return SLJIT_SUCCESS;
  1150. }
  1151. if (op_flags & SLJIT_I32_OP) {
  1152. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1153. if (src & SLJIT_MEM) {
  1154. if (op == SLJIT_MOV_S32)
  1155. op = SLJIT_MOV_U32;
  1156. }
  1157. else if (src & SLJIT_IMM) {
  1158. if (op == SLJIT_MOV_U32)
  1159. op = SLJIT_MOV_S32;
  1160. }
  1161. #endif
  1162. }
  1163. if (src & SLJIT_IMM) {
  1164. switch (op) {
  1165. case SLJIT_MOV_U8:
  1166. srcw = (sljit_u8)srcw;
  1167. break;
  1168. case SLJIT_MOV_S8:
  1169. srcw = (sljit_s8)srcw;
  1170. break;
  1171. case SLJIT_MOV_U16:
  1172. srcw = (sljit_u16)srcw;
  1173. break;
  1174. case SLJIT_MOV_S16:
  1175. srcw = (sljit_s16)srcw;
  1176. break;
  1177. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1178. case SLJIT_MOV_U32:
  1179. srcw = (sljit_u32)srcw;
  1180. break;
  1181. case SLJIT_MOV_S32:
  1182. srcw = (sljit_s32)srcw;
  1183. break;
  1184. #endif
  1185. }
  1186. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1187. if (SLJIT_UNLIKELY(dst_is_ereg))
  1188. return emit_mov(compiler, dst, dstw, src, srcw);
  1189. #endif
  1190. }
  1191. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1192. if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
  1193. SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
  1194. dst = TMP_REG1;
  1195. }
  1196. #endif
  1197. switch (op) {
  1198. case SLJIT_MOV:
  1199. case SLJIT_MOV_P:
  1200. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1201. case SLJIT_MOV_U32:
  1202. case SLJIT_MOV_S32:
  1203. #endif
  1204. FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
  1205. break;
  1206. case SLJIT_MOV_U8:
  1207. FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
  1208. break;
  1209. case SLJIT_MOV_S8:
  1210. FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
  1211. break;
  1212. case SLJIT_MOV_U16:
  1213. FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
  1214. break;
  1215. case SLJIT_MOV_S16:
  1216. FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
  1217. break;
  1218. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1219. case SLJIT_MOV_U32:
  1220. FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
  1221. break;
  1222. case SLJIT_MOV_S32:
  1223. FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
  1224. break;
  1225. #endif
  1226. }
  1227. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1228. if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
  1229. return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
  1230. #endif
  1231. return SLJIT_SUCCESS;
  1232. }
  1233. switch (op) {
  1234. case SLJIT_NOT:
  1235. if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
  1236. return emit_not_with_flags(compiler, dst, dstw, src, srcw);
  1237. return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
  1238. case SLJIT_NEG:
  1239. return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
  1240. case SLJIT_CLZ:
  1241. return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
  1242. }
  1243. return SLJIT_SUCCESS;
  1244. }
  1245. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1246. #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
  1247. if (IS_HALFWORD(immw) || compiler->mode32) { \
  1248. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
  1249. FAIL_IF(!inst); \
  1250. *(inst + 1) |= (op_imm); \
  1251. } \
  1252. else { \
  1253. FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
  1254. inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
  1255. FAIL_IF(!inst); \
  1256. *inst = (op_mr); \
  1257. }
  1258. #define BINARY_EAX_IMM(op_eax_imm, immw) \
  1259. FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
  1260. #else
  1261. #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
  1262. inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
  1263. FAIL_IF(!inst); \
  1264. *(inst + 1) |= (op_imm);
  1265. #define BINARY_EAX_IMM(op_eax_imm, immw) \
  1266. FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
  1267. #endif
  1268. static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
  1269. sljit_u32 op_types,
  1270. sljit_s32 dst, sljit_sw dstw,
  1271. sljit_s32 src1, sljit_sw src1w,
  1272. sljit_s32 src2, sljit_sw src2w)
  1273. {
  1274. sljit_u8* inst;
  1275. sljit_u8 op_eax_imm = (op_types >> 24);
  1276. sljit_u8 op_rm = (op_types >> 16) & 0xff;
  1277. sljit_u8 op_mr = (op_types >> 8) & 0xff;
  1278. sljit_u8 op_imm = op_types & 0xff;
  1279. if (dst == SLJIT_UNUSED) {
  1280. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1281. if (src2 & SLJIT_IMM) {
  1282. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1283. }
  1284. else {
  1285. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1286. FAIL_IF(!inst);
  1287. *inst = op_rm;
  1288. }
  1289. return SLJIT_SUCCESS;
  1290. }
  1291. if (dst == src1 && dstw == src1w) {
  1292. if (src2 & SLJIT_IMM) {
  1293. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1294. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1295. #else
  1296. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
  1297. #endif
  1298. BINARY_EAX_IMM(op_eax_imm, src2w);
  1299. }
  1300. else {
  1301. BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
  1302. }
  1303. }
  1304. else if (FAST_IS_REG(dst)) {
  1305. inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
  1306. FAIL_IF(!inst);
  1307. *inst = op_rm;
  1308. }
  1309. else if (FAST_IS_REG(src2)) {
  1310. /* Special exception for sljit_emit_op_flags. */
  1311. inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
  1312. FAIL_IF(!inst);
  1313. *inst = op_mr;
  1314. }
  1315. else {
  1316. EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
  1317. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1318. FAIL_IF(!inst);
  1319. *inst = op_mr;
  1320. }
  1321. return SLJIT_SUCCESS;
  1322. }
  1323. /* Only for cumulative operations. */
  1324. if (dst == src2 && dstw == src2w) {
  1325. if (src1 & SLJIT_IMM) {
  1326. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1327. if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1328. #else
  1329. if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
  1330. #endif
  1331. BINARY_EAX_IMM(op_eax_imm, src1w);
  1332. }
  1333. else {
  1334. BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
  1335. }
  1336. }
  1337. else if (FAST_IS_REG(dst)) {
  1338. inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
  1339. FAIL_IF(!inst);
  1340. *inst = op_rm;
  1341. }
  1342. else if (FAST_IS_REG(src1)) {
  1343. inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
  1344. FAIL_IF(!inst);
  1345. *inst = op_mr;
  1346. }
  1347. else {
  1348. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1349. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1350. FAIL_IF(!inst);
  1351. *inst = op_mr;
  1352. }
  1353. return SLJIT_SUCCESS;
  1354. }
  1355. /* General version. */
  1356. if (FAST_IS_REG(dst)) {
  1357. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1358. if (src2 & SLJIT_IMM) {
  1359. BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
  1360. }
  1361. else {
  1362. inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
  1363. FAIL_IF(!inst);
  1364. *inst = op_rm;
  1365. }
  1366. }
  1367. else {
  1368. /* This version requires less memory writing. */
  1369. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1370. if (src2 & SLJIT_IMM) {
  1371. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1372. }
  1373. else {
  1374. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1375. FAIL_IF(!inst);
  1376. *inst = op_rm;
  1377. }
  1378. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1379. }
  1380. return SLJIT_SUCCESS;
  1381. }
  1382. static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
  1383. sljit_u32 op_types,
  1384. sljit_s32 dst, sljit_sw dstw,
  1385. sljit_s32 src1, sljit_sw src1w,
  1386. sljit_s32 src2, sljit_sw src2w)
  1387. {
  1388. sljit_u8* inst;
  1389. sljit_u8 op_eax_imm = (op_types >> 24);
  1390. sljit_u8 op_rm = (op_types >> 16) & 0xff;
  1391. sljit_u8 op_mr = (op_types >> 8) & 0xff;
  1392. sljit_u8 op_imm = op_types & 0xff;
  1393. if (dst == SLJIT_UNUSED) {
  1394. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1395. if (src2 & SLJIT_IMM) {
  1396. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1397. }
  1398. else {
  1399. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1400. FAIL_IF(!inst);
  1401. *inst = op_rm;
  1402. }
  1403. return SLJIT_SUCCESS;
  1404. }
  1405. if (dst == src1 && dstw == src1w) {
  1406. if (src2 & SLJIT_IMM) {
  1407. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1408. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1409. #else
  1410. if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
  1411. #endif
  1412. BINARY_EAX_IMM(op_eax_imm, src2w);
  1413. }
  1414. else {
  1415. BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
  1416. }
  1417. }
  1418. else if (FAST_IS_REG(dst)) {
  1419. inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
  1420. FAIL_IF(!inst);
  1421. *inst = op_rm;
  1422. }
  1423. else if (FAST_IS_REG(src2)) {
  1424. inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
  1425. FAIL_IF(!inst);
  1426. *inst = op_mr;
  1427. }
  1428. else {
  1429. EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
  1430. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
  1431. FAIL_IF(!inst);
  1432. *inst = op_mr;
  1433. }
  1434. return SLJIT_SUCCESS;
  1435. }
  1436. /* General version. */
  1437. if (FAST_IS_REG(dst) && dst != src2) {
  1438. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1439. if (src2 & SLJIT_IMM) {
  1440. BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
  1441. }
  1442. else {
  1443. inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
  1444. FAIL_IF(!inst);
  1445. *inst = op_rm;
  1446. }
  1447. }
  1448. else {
  1449. /* This version requires less memory writing. */
  1450. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1451. if (src2 & SLJIT_IMM) {
  1452. BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
  1453. }
  1454. else {
  1455. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1456. FAIL_IF(!inst);
  1457. *inst = op_rm;
  1458. }
  1459. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1460. }
  1461. return SLJIT_SUCCESS;
  1462. }
  1463. static sljit_s32 emit_mul(struct sljit_compiler *compiler,
  1464. sljit_s32 dst, sljit_sw dstw,
  1465. sljit_s32 src1, sljit_sw src1w,
  1466. sljit_s32 src2, sljit_sw src2w)
  1467. {
  1468. sljit_u8* inst;
  1469. sljit_s32 dst_r;
  1470. dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
  1471. /* Register destination. */
  1472. if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
  1473. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
  1474. FAIL_IF(!inst);
  1475. *inst++ = GROUP_0F;
  1476. *inst = IMUL_r_rm;
  1477. }
  1478. else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
  1479. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
  1480. FAIL_IF(!inst);
  1481. *inst++ = GROUP_0F;
  1482. *inst = IMUL_r_rm;
  1483. }
  1484. else if (src1 & SLJIT_IMM) {
  1485. if (src2 & SLJIT_IMM) {
  1486. EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
  1487. src2 = dst_r;
  1488. src2w = 0;
  1489. }
  1490. if (src1w <= 127 && src1w >= -128) {
  1491. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1492. FAIL_IF(!inst);
  1493. *inst = IMUL_r_rm_i8;
  1494. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  1495. FAIL_IF(!inst);
  1496. INC_SIZE(1);
  1497. *inst = (sljit_s8)src1w;
  1498. }
  1499. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1500. else {
  1501. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1502. FAIL_IF(!inst);
  1503. *inst = IMUL_r_rm_i32;
  1504. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1505. FAIL_IF(!inst);
  1506. INC_SIZE(4);
  1507. sljit_unaligned_store_sw(inst, src1w);
  1508. }
  1509. #else
  1510. else if (IS_HALFWORD(src1w)) {
  1511. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
  1512. FAIL_IF(!inst);
  1513. *inst = IMUL_r_rm_i32;
  1514. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1515. FAIL_IF(!inst);
  1516. INC_SIZE(4);
  1517. sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
  1518. }
  1519. else {
  1520. if (dst_r != src2)
  1521. EMIT_MOV(compiler, dst_r, 0, src2, src2w);
  1522. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
  1523. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1524. FAIL_IF(!inst);
  1525. *inst++ = GROUP_0F;
  1526. *inst = IMUL_r_rm;
  1527. }
  1528. #endif
  1529. }
  1530. else if (src2 & SLJIT_IMM) {
  1531. /* Note: src1 is NOT immediate. */
  1532. if (src2w <= 127 && src2w >= -128) {
  1533. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1534. FAIL_IF(!inst);
  1535. *inst = IMUL_r_rm_i8;
  1536. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  1537. FAIL_IF(!inst);
  1538. INC_SIZE(1);
  1539. *inst = (sljit_s8)src2w;
  1540. }
  1541. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1542. else {
  1543. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1544. FAIL_IF(!inst);
  1545. *inst = IMUL_r_rm_i32;
  1546. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1547. FAIL_IF(!inst);
  1548. INC_SIZE(4);
  1549. sljit_unaligned_store_sw(inst, src2w);
  1550. }
  1551. #else
  1552. else if (IS_HALFWORD(src2w)) {
  1553. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
  1554. FAIL_IF(!inst);
  1555. *inst = IMUL_r_rm_i32;
  1556. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  1557. FAIL_IF(!inst);
  1558. INC_SIZE(4);
  1559. sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
  1560. }
  1561. else {
  1562. if (dst_r != src1)
  1563. EMIT_MOV(compiler, dst_r, 0, src1, src1w);
  1564. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
  1565. inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
  1566. FAIL_IF(!inst);
  1567. *inst++ = GROUP_0F;
  1568. *inst = IMUL_r_rm;
  1569. }
  1570. #endif
  1571. }
  1572. else {
  1573. /* Neither argument is immediate. */
  1574. if (ADDRESSING_DEPENDS_ON(src2, dst_r))
  1575. dst_r = TMP_REG1;
  1576. EMIT_MOV(compiler, dst_r, 0, src1, src1w);
  1577. inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
  1578. FAIL_IF(!inst);
  1579. *inst++ = GROUP_0F;
  1580. *inst = IMUL_r_rm;
  1581. }
  1582. if (dst & SLJIT_MEM)
  1583. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1584. return SLJIT_SUCCESS;
  1585. }
  1586. static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
  1587. sljit_s32 dst, sljit_sw dstw,
  1588. sljit_s32 src1, sljit_sw src1w,
  1589. sljit_s32 src2, sljit_sw src2w)
  1590. {
  1591. sljit_u8* inst;
  1592. sljit_s32 dst_r, done = 0;
  1593. /* These cases better be left to handled by normal way. */
  1594. if (dst == src1 && dstw == src1w)
  1595. return SLJIT_ERR_UNSUPPORTED;
  1596. if (dst == src2 && dstw == src2w)
  1597. return SLJIT_ERR_UNSUPPORTED;
  1598. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  1599. if (FAST_IS_REG(src1)) {
  1600. if (FAST_IS_REG(src2)) {
  1601. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
  1602. FAIL_IF(!inst);
  1603. *inst = LEA_r_m;
  1604. done = 1;
  1605. }
  1606. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1607. if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1608. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
  1609. #else
  1610. if (src2 & SLJIT_IMM) {
  1611. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
  1612. #endif
  1613. FAIL_IF(!inst);
  1614. *inst = LEA_r_m;
  1615. done = 1;
  1616. }
  1617. }
  1618. else if (FAST_IS_REG(src2)) {
  1619. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1620. if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1621. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
  1622. #else
  1623. if (src1 & SLJIT_IMM) {
  1624. inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
  1625. #endif
  1626. FAIL_IF(!inst);
  1627. *inst = LEA_r_m;
  1628. done = 1;
  1629. }
  1630. }
  1631. if (done) {
  1632. if (dst_r == TMP_REG1)
  1633. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  1634. return SLJIT_SUCCESS;
  1635. }
  1636. return SLJIT_ERR_UNSUPPORTED;
  1637. }
  1638. static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
  1639. sljit_s32 src1, sljit_sw src1w,
  1640. sljit_s32 src2, sljit_sw src2w)
  1641. {
  1642. sljit_u8* inst;
  1643. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1644. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1645. #else
  1646. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
  1647. #endif
  1648. BINARY_EAX_IMM(CMP_EAX_i32, src2w);
  1649. return SLJIT_SUCCESS;
  1650. }
  1651. if (FAST_IS_REG(src1)) {
  1652. if (src2 & SLJIT_IMM) {
  1653. BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
  1654. }
  1655. else {
  1656. inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
  1657. FAIL_IF(!inst);
  1658. *inst = CMP_r_rm;
  1659. }
  1660. return SLJIT_SUCCESS;
  1661. }
  1662. if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
  1663. inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
  1664. FAIL_IF(!inst);
  1665. *inst = CMP_rm_r;
  1666. return SLJIT_SUCCESS;
  1667. }
  1668. if (src2 & SLJIT_IMM) {
  1669. if (src1 & SLJIT_IMM) {
  1670. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1671. src1 = TMP_REG1;
  1672. src1w = 0;
  1673. }
  1674. BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
  1675. }
  1676. else {
  1677. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1678. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1679. FAIL_IF(!inst);
  1680. *inst = CMP_r_rm;
  1681. }
  1682. return SLJIT_SUCCESS;
  1683. }
  1684. static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
  1685. sljit_s32 src1, sljit_sw src1w,
  1686. sljit_s32 src2, sljit_sw src2w)
  1687. {
  1688. sljit_u8* inst;
  1689. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1690. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
  1691. #else
  1692. if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
  1693. #endif
  1694. BINARY_EAX_IMM(TEST_EAX_i32, src2w);
  1695. return SLJIT_SUCCESS;
  1696. }
  1697. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1698. if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
  1699. #else
  1700. if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
  1701. #endif
  1702. BINARY_EAX_IMM(TEST_EAX_i32, src1w);
  1703. return SLJIT_SUCCESS;
  1704. }
  1705. if (!(src1 & SLJIT_IMM)) {
  1706. if (src2 & SLJIT_IMM) {
  1707. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1708. if (IS_HALFWORD(src2w) || compiler->mode32) {
  1709. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
  1710. FAIL_IF(!inst);
  1711. *inst = GROUP_F7;
  1712. }
  1713. else {
  1714. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
  1715. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
  1716. FAIL_IF(!inst);
  1717. *inst = TEST_rm_r;
  1718. }
  1719. #else
  1720. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
  1721. FAIL_IF(!inst);
  1722. *inst = GROUP_F7;
  1723. #endif
  1724. return SLJIT_SUCCESS;
  1725. }
  1726. else if (FAST_IS_REG(src1)) {
  1727. inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
  1728. FAIL_IF(!inst);
  1729. *inst = TEST_rm_r;
  1730. return SLJIT_SUCCESS;
  1731. }
  1732. }
  1733. if (!(src2 & SLJIT_IMM)) {
  1734. if (src1 & SLJIT_IMM) {
  1735. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1736. if (IS_HALFWORD(src1w) || compiler->mode32) {
  1737. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
  1738. FAIL_IF(!inst);
  1739. *inst = GROUP_F7;
  1740. }
  1741. else {
  1742. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
  1743. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1744. FAIL_IF(!inst);
  1745. *inst = TEST_rm_r;
  1746. }
  1747. #else
  1748. inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
  1749. FAIL_IF(!inst);
  1750. *inst = GROUP_F7;
  1751. #endif
  1752. return SLJIT_SUCCESS;
  1753. }
  1754. else if (FAST_IS_REG(src2)) {
  1755. inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
  1756. FAIL_IF(!inst);
  1757. *inst = TEST_rm_r;
  1758. return SLJIT_SUCCESS;
  1759. }
  1760. }
  1761. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1762. if (src2 & SLJIT_IMM) {
  1763. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1764. if (IS_HALFWORD(src2w) || compiler->mode32) {
  1765. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
  1766. FAIL_IF(!inst);
  1767. *inst = GROUP_F7;
  1768. }
  1769. else {
  1770. FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
  1771. inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
  1772. FAIL_IF(!inst);
  1773. *inst = TEST_rm_r;
  1774. }
  1775. #else
  1776. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
  1777. FAIL_IF(!inst);
  1778. *inst = GROUP_F7;
  1779. #endif
  1780. }
  1781. else {
  1782. inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
  1783. FAIL_IF(!inst);
  1784. *inst = TEST_rm_r;
  1785. }
  1786. return SLJIT_SUCCESS;
  1787. }
  1788. static sljit_s32 emit_shift(struct sljit_compiler *compiler,
  1789. sljit_u8 mode,
  1790. sljit_s32 dst, sljit_sw dstw,
  1791. sljit_s32 src1, sljit_sw src1w,
  1792. sljit_s32 src2, sljit_sw src2w)
  1793. {
  1794. sljit_u8* inst;
  1795. if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
  1796. if (dst == src1 && dstw == src1w) {
  1797. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
  1798. FAIL_IF(!inst);
  1799. *inst |= mode;
  1800. return SLJIT_SUCCESS;
  1801. }
  1802. if (dst == SLJIT_UNUSED) {
  1803. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1804. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
  1805. FAIL_IF(!inst);
  1806. *inst |= mode;
  1807. return SLJIT_SUCCESS;
  1808. }
  1809. if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
  1810. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1811. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1812. FAIL_IF(!inst);
  1813. *inst |= mode;
  1814. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1815. return SLJIT_SUCCESS;
  1816. }
  1817. if (FAST_IS_REG(dst)) {
  1818. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1819. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
  1820. FAIL_IF(!inst);
  1821. *inst |= mode;
  1822. return SLJIT_SUCCESS;
  1823. }
  1824. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1825. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
  1826. FAIL_IF(!inst);
  1827. *inst |= mode;
  1828. EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
  1829. return SLJIT_SUCCESS;
  1830. }
  1831. if (dst == SLJIT_PREF_SHIFT_REG) {
  1832. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1833. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1834. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1835. FAIL_IF(!inst);
  1836. *inst |= mode;
  1837. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1838. }
  1839. else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
  1840. if (src1 != dst)
  1841. EMIT_MOV(compiler, dst, 0, src1, src1w);
  1842. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
  1843. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1844. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
  1845. FAIL_IF(!inst);
  1846. *inst |= mode;
  1847. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1848. }
  1849. else {
  1850. /* This case is complex since ecx itself may be used for
  1851. addressing, and this case must be supported as well. */
  1852. EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
  1853. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1854. EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
  1855. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1856. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1857. FAIL_IF(!inst);
  1858. *inst |= mode;
  1859. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
  1860. #else
  1861. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
  1862. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
  1863. inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
  1864. FAIL_IF(!inst);
  1865. *inst |= mode;
  1866. EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
  1867. #endif
  1868. if (dst != SLJIT_UNUSED)
  1869. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  1870. }
  1871. return SLJIT_SUCCESS;
  1872. }
  1873. static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
  1874. sljit_u8 mode, sljit_s32 set_flags,
  1875. sljit_s32 dst, sljit_sw dstw,
  1876. sljit_s32 src1, sljit_sw src1w,
  1877. sljit_s32 src2, sljit_sw src2w)
  1878. {
  1879. /* The CPU does not set flags if the shift count is 0. */
  1880. if (src2 & SLJIT_IMM) {
  1881. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1882. if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
  1883. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1884. #else
  1885. if ((src2w & 0x1f) != 0)
  1886. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1887. #endif
  1888. if (!set_flags)
  1889. return emit_mov(compiler, dst, dstw, src1, src1w);
  1890. /* OR dst, src, 0 */
  1891. return emit_cum_binary(compiler, BINARY_OPCODE(OR),
  1892. dst, dstw, src1, src1w, SLJIT_IMM, 0);
  1893. }
  1894. if (!set_flags)
  1895. return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
  1896. if (!FAST_IS_REG(dst))
  1897. FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
  1898. FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
  1899. if (FAST_IS_REG(dst))
  1900. return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
  1901. return SLJIT_SUCCESS;
  1902. }
  1903. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
  1904. sljit_s32 dst, sljit_sw dstw,
  1905. sljit_s32 src1, sljit_sw src1w,
  1906. sljit_s32 src2, sljit_sw src2w)
  1907. {
  1908. CHECK_ERROR();
  1909. CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  1910. ADJUST_LOCAL_OFFSET(dst, dstw);
  1911. ADJUST_LOCAL_OFFSET(src1, src1w);
  1912. ADJUST_LOCAL_OFFSET(src2, src2w);
  1913. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  1914. CHECK_EXTRA_REGS(src1, src1w, (void)0);
  1915. CHECK_EXTRA_REGS(src2, src2w, (void)0);
  1916. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  1917. compiler->mode32 = op & SLJIT_I32_OP;
  1918. #endif
  1919. if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
  1920. return SLJIT_SUCCESS;
  1921. switch (GET_OPCODE(op)) {
  1922. case SLJIT_ADD:
  1923. if (!HAS_FLAGS(op)) {
  1924. if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
  1925. return compiler->error;
  1926. }
  1927. return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
  1928. dst, dstw, src1, src1w, src2, src2w);
  1929. case SLJIT_ADDC:
  1930. return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
  1931. dst, dstw, src1, src1w, src2, src2w);
  1932. case SLJIT_SUB:
  1933. if (!HAS_FLAGS(op)) {
  1934. if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
  1935. return compiler->error;
  1936. }
  1937. if (dst == SLJIT_UNUSED)
  1938. return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
  1939. return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  1940. dst, dstw, src1, src1w, src2, src2w);
  1941. case SLJIT_SUBC:
  1942. return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
  1943. dst, dstw, src1, src1w, src2, src2w);
  1944. case SLJIT_MUL:
  1945. return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
  1946. case SLJIT_AND:
  1947. if (dst == SLJIT_UNUSED)
  1948. return emit_test_binary(compiler, src1, src1w, src2, src2w);
  1949. return emit_cum_binary(compiler, BINARY_OPCODE(AND),
  1950. dst, dstw, src1, src1w, src2, src2w);
  1951. case SLJIT_OR:
  1952. return emit_cum_binary(compiler, BINARY_OPCODE(OR),
  1953. dst, dstw, src1, src1w, src2, src2w);
  1954. case SLJIT_XOR:
  1955. return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
  1956. dst, dstw, src1, src1w, src2, src2w);
  1957. case SLJIT_SHL:
  1958. return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
  1959. dst, dstw, src1, src1w, src2, src2w);
  1960. case SLJIT_LSHR:
  1961. return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
  1962. dst, dstw, src1, src1w, src2, src2w);
  1963. case SLJIT_ASHR:
  1964. return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
  1965. dst, dstw, src1, src1w, src2, src2w);
  1966. }
  1967. return SLJIT_SUCCESS;
  1968. }
  1969. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
  1970. {
  1971. CHECK_REG_INDEX(check_sljit_get_register_index(reg));
  1972. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1973. if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
  1974. return -1;
  1975. #endif
  1976. return reg_map[reg];
  1977. }
  1978. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
  1979. {
  1980. CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
  1981. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  1982. return reg;
  1983. #else
  1984. return freg_map[reg];
  1985. #endif
  1986. }
  1987. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
  1988. void *instruction, sljit_s32 size)
  1989. {
  1990. sljit_u8 *inst;
  1991. CHECK_ERROR();
  1992. CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
  1993. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  1994. FAIL_IF(!inst);
  1995. INC_SIZE(size);
  1996. SLJIT_MEMCPY(inst, instruction, size);
  1997. return SLJIT_SUCCESS;
  1998. }
  1999. /* --------------------------------------------------------------------- */
  2000. /* Floating point operators */
  2001. /* --------------------------------------------------------------------- */
  2002. /* Alignment(3) + 4 * 16 bytes. */
  2003. static sljit_s32 sse2_data[3 + (4 * 4)];
  2004. static sljit_s32 *sse2_buffer;
  2005. static void init_compiler(void)
  2006. {
  2007. /* Align to 16 bytes. */
  2008. sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
  2009. /* Single precision constants (each constant is 16 byte long). */
  2010. sse2_buffer[0] = 0x80000000;
  2011. sse2_buffer[4] = 0x7fffffff;
  2012. /* Double precision constants (each constant is 16 byte long). */
  2013. sse2_buffer[8] = 0;
  2014. sse2_buffer[9] = 0x80000000;
  2015. sse2_buffer[12] = 0xffffffff;
  2016. sse2_buffer[13] = 0x7fffffff;
  2017. }
  2018. static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
  2019. sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
  2020. {
  2021. sljit_u8 *inst;
  2022. inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
  2023. FAIL_IF(!inst);
  2024. *inst++ = GROUP_0F;
  2025. *inst = opcode;
  2026. return SLJIT_SUCCESS;
  2027. }
  2028. static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
  2029. sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
  2030. {
  2031. sljit_u8 *inst;
  2032. inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
  2033. FAIL_IF(!inst);
  2034. *inst++ = GROUP_0F;
  2035. *inst = opcode;
  2036. return SLJIT_SUCCESS;
  2037. }
  2038. static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
  2039. sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
  2040. {
  2041. return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
  2042. }
  2043. static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
  2044. sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
  2045. {
  2046. return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
  2047. }
  2048. static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
  2049. sljit_s32 dst, sljit_sw dstw,
  2050. sljit_s32 src, sljit_sw srcw)
  2051. {
  2052. sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  2053. sljit_u8 *inst;
  2054. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2055. if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
  2056. compiler->mode32 = 0;
  2057. #endif
  2058. inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
  2059. FAIL_IF(!inst);
  2060. *inst++ = GROUP_0F;
  2061. *inst = CVTTSD2SI_r_xm;
  2062. if (dst & SLJIT_MEM)
  2063. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2064. return SLJIT_SUCCESS;
  2065. }
  2066. static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
  2067. sljit_s32 dst, sljit_sw dstw,
  2068. sljit_s32 src, sljit_sw srcw)
  2069. {
  2070. sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
  2071. sljit_u8 *inst;
  2072. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2073. if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
  2074. compiler->mode32 = 0;
  2075. #endif
  2076. if (src & SLJIT_IMM) {
  2077. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2078. if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
  2079. srcw = (sljit_s32)srcw;
  2080. #endif
  2081. EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
  2082. src = TMP_REG1;
  2083. srcw = 0;
  2084. }
  2085. inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
  2086. FAIL_IF(!inst);
  2087. *inst++ = GROUP_0F;
  2088. *inst = CVTSI2SD_x_rm;
  2089. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2090. compiler->mode32 = 1;
  2091. #endif
  2092. if (dst_r == TMP_FREG)
  2093. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2094. return SLJIT_SUCCESS;
  2095. }
  2096. static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
  2097. sljit_s32 src1, sljit_sw src1w,
  2098. sljit_s32 src2, sljit_sw src2w)
  2099. {
  2100. if (!FAST_IS_REG(src1)) {
  2101. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
  2102. src1 = TMP_FREG;
  2103. }
  2104. return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
  2105. }
  2106. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
  2107. sljit_s32 dst, sljit_sw dstw,
  2108. sljit_s32 src, sljit_sw srcw)
  2109. {
  2110. sljit_s32 dst_r;
  2111. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2112. compiler->mode32 = 1;
  2113. #endif
  2114. CHECK_ERROR();
  2115. SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
  2116. if (GET_OPCODE(op) == SLJIT_MOV_F64) {
  2117. if (FAST_IS_REG(dst))
  2118. return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
  2119. if (FAST_IS_REG(src))
  2120. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
  2121. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
  2122. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2123. }
  2124. if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
  2125. dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
  2126. if (FAST_IS_REG(src)) {
  2127. /* We overwrite the high bits of source. From SLJIT point of view,
  2128. this is not an issue.
  2129. Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
  2130. FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
  2131. }
  2132. else {
  2133. FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
  2134. src = TMP_FREG;
  2135. }
  2136. FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
  2137. if (dst_r == TMP_FREG)
  2138. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2139. return SLJIT_SUCCESS;
  2140. }
  2141. if (FAST_IS_REG(dst)) {
  2142. dst_r = dst;
  2143. if (dst != src)
  2144. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
  2145. }
  2146. else {
  2147. dst_r = TMP_FREG;
  2148. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
  2149. }
  2150. switch (GET_OPCODE(op)) {
  2151. case SLJIT_NEG_F64:
  2152. FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
  2153. break;
  2154. case SLJIT_ABS_F64:
  2155. FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
  2156. break;
  2157. }
  2158. if (dst_r == TMP_FREG)
  2159. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2160. return SLJIT_SUCCESS;
  2161. }
  2162. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
  2163. sljit_s32 dst, sljit_sw dstw,
  2164. sljit_s32 src1, sljit_sw src1w,
  2165. sljit_s32 src2, sljit_sw src2w)
  2166. {
  2167. sljit_s32 dst_r;
  2168. CHECK_ERROR();
  2169. CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  2170. ADJUST_LOCAL_OFFSET(dst, dstw);
  2171. ADJUST_LOCAL_OFFSET(src1, src1w);
  2172. ADJUST_LOCAL_OFFSET(src2, src2w);
  2173. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2174. compiler->mode32 = 1;
  2175. #endif
  2176. if (FAST_IS_REG(dst)) {
  2177. dst_r = dst;
  2178. if (dst == src1)
  2179. ; /* Do nothing here. */
  2180. else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
  2181. /* Swap arguments. */
  2182. src2 = src1;
  2183. src2w = src1w;
  2184. }
  2185. else if (dst != src2)
  2186. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
  2187. else {
  2188. dst_r = TMP_FREG;
  2189. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
  2190. }
  2191. }
  2192. else {
  2193. dst_r = TMP_FREG;
  2194. FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
  2195. }
  2196. switch (GET_OPCODE(op)) {
  2197. case SLJIT_ADD_F64:
  2198. FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2199. break;
  2200. case SLJIT_SUB_F64:
  2201. FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2202. break;
  2203. case SLJIT_MUL_F64:
  2204. FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2205. break;
  2206. case SLJIT_DIV_F64:
  2207. FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
  2208. break;
  2209. }
  2210. if (dst_r == TMP_FREG)
  2211. return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
  2212. return SLJIT_SUCCESS;
  2213. }
  2214. /* --------------------------------------------------------------------- */
  2215. /* Conditional instructions */
  2216. /* --------------------------------------------------------------------- */
  2217. SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
  2218. {
  2219. sljit_u8 *inst;
  2220. struct sljit_label *label;
  2221. CHECK_ERROR_PTR();
  2222. CHECK_PTR(check_sljit_emit_label(compiler));
  2223. if (compiler->last_label && compiler->last_label->size == compiler->size)
  2224. return compiler->last_label;
  2225. label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
  2226. PTR_FAIL_IF(!label);
  2227. set_label(label, compiler);
  2228. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2229. PTR_FAIL_IF(!inst);
  2230. *inst++ = 0;
  2231. *inst++ = 0;
  2232. return label;
  2233. }
  2234. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
  2235. {
  2236. sljit_u8 *inst;
  2237. struct sljit_jump *jump;
  2238. CHECK_ERROR_PTR();
  2239. CHECK_PTR(check_sljit_emit_jump(compiler, type));
  2240. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2241. PTR_FAIL_IF_NULL(jump);
  2242. set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
  2243. type &= 0xff;
  2244. /* Worst case size. */
  2245. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2246. compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
  2247. #else
  2248. compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
  2249. #endif
  2250. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2251. PTR_FAIL_IF_NULL(inst);
  2252. *inst++ = 0;
  2253. *inst++ = 1;
  2254. return jump;
  2255. }
  2256. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
  2257. {
  2258. sljit_u8 *inst;
  2259. struct sljit_jump *jump;
  2260. CHECK_ERROR();
  2261. CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
  2262. ADJUST_LOCAL_OFFSET(src, srcw);
  2263. CHECK_EXTRA_REGS(src, srcw, (void)0);
  2264. if (src == SLJIT_IMM) {
  2265. jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
  2266. FAIL_IF_NULL(jump);
  2267. set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
  2268. jump->u.target = srcw;
  2269. /* Worst case size. */
  2270. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2271. compiler->size += 5;
  2272. #else
  2273. compiler->size += 10 + 3;
  2274. #endif
  2275. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2276. FAIL_IF_NULL(inst);
  2277. *inst++ = 0;
  2278. *inst++ = 1;
  2279. }
  2280. else {
  2281. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2282. /* REX_W is not necessary (src is not immediate). */
  2283. compiler->mode32 = 1;
  2284. #endif
  2285. inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  2286. FAIL_IF(!inst);
  2287. *inst++ = GROUP_FF;
  2288. *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
  2289. }
  2290. return SLJIT_SUCCESS;
  2291. }
  2292. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
  2293. sljit_s32 dst, sljit_sw dstw,
  2294. sljit_s32 type)
  2295. {
  2296. sljit_u8 *inst;
  2297. sljit_u8 cond_set = 0;
  2298. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2299. sljit_s32 reg;
  2300. #endif
  2301. /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
  2302. sljit_s32 dst_save = dst;
  2303. sljit_sw dstw_save = dstw;
  2304. CHECK_ERROR();
  2305. CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
  2306. ADJUST_LOCAL_OFFSET(dst, dstw);
  2307. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2308. type &= 0xff;
  2309. /* setcc = jcc + 0x10. */
  2310. cond_set = get_jump_code(type) + 0x10;
  2311. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2312. if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
  2313. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
  2314. FAIL_IF(!inst);
  2315. INC_SIZE(4 + 3);
  2316. /* Set low register to conditional flag. */
  2317. *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
  2318. *inst++ = GROUP_0F;
  2319. *inst++ = cond_set;
  2320. *inst++ = MOD_REG | reg_lmap[TMP_REG1];
  2321. *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
  2322. *inst++ = OR_rm8_r8;
  2323. *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
  2324. return SLJIT_SUCCESS;
  2325. }
  2326. reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
  2327. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
  2328. FAIL_IF(!inst);
  2329. INC_SIZE(4 + 4);
  2330. /* Set low register to conditional flag. */
  2331. *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
  2332. *inst++ = GROUP_0F;
  2333. *inst++ = cond_set;
  2334. *inst++ = MOD_REG | reg_lmap[reg];
  2335. *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
  2336. /* The movzx instruction does not affect flags. */
  2337. *inst++ = GROUP_0F;
  2338. *inst++ = MOVZX_r_rm8;
  2339. *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
  2340. if (reg != TMP_REG1)
  2341. return SLJIT_SUCCESS;
  2342. if (GET_OPCODE(op) < SLJIT_ADD) {
  2343. compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
  2344. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2345. }
  2346. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  2347. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2348. compiler->skip_checks = 1;
  2349. #endif
  2350. return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
  2351. #else
  2352. /* The SLJIT_CONFIG_X86_32 code path starts here. */
  2353. if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
  2354. if (reg_map[dst] <= 4) {
  2355. /* Low byte is accessible. */
  2356. inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
  2357. FAIL_IF(!inst);
  2358. INC_SIZE(3 + 3);
  2359. /* Set low byte to conditional flag. */
  2360. *inst++ = GROUP_0F;
  2361. *inst++ = cond_set;
  2362. *inst++ = MOD_REG | reg_map[dst];
  2363. *inst++ = GROUP_0F;
  2364. *inst++ = MOVZX_r_rm8;
  2365. *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
  2366. return SLJIT_SUCCESS;
  2367. }
  2368. /* Low byte is not accessible. */
  2369. if (cpu_has_cmov == -1)
  2370. get_cpu_features();
  2371. if (cpu_has_cmov) {
  2372. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
  2373. /* a xor reg, reg operation would overwrite the flags. */
  2374. EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
  2375. inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
  2376. FAIL_IF(!inst);
  2377. INC_SIZE(3);
  2378. *inst++ = GROUP_0F;
  2379. /* cmovcc = setcc - 0x50. */
  2380. *inst++ = cond_set - 0x50;
  2381. *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
  2382. return SLJIT_SUCCESS;
  2383. }
  2384. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
  2385. FAIL_IF(!inst);
  2386. INC_SIZE(1 + 3 + 3 + 1);
  2387. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2388. /* Set al to conditional flag. */
  2389. *inst++ = GROUP_0F;
  2390. *inst++ = cond_set;
  2391. *inst++ = MOD_REG | 0 /* eax */;
  2392. *inst++ = GROUP_0F;
  2393. *inst++ = MOVZX_r_rm8;
  2394. *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
  2395. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2396. return SLJIT_SUCCESS;
  2397. }
  2398. if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
  2399. SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
  2400. if (dst != SLJIT_R0) {
  2401. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
  2402. FAIL_IF(!inst);
  2403. INC_SIZE(1 + 3 + 2 + 1);
  2404. /* Set low register to conditional flag. */
  2405. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2406. *inst++ = GROUP_0F;
  2407. *inst++ = cond_set;
  2408. *inst++ = MOD_REG | 0 /* eax */;
  2409. *inst++ = OR_rm8_r8;
  2410. *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
  2411. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2412. }
  2413. else {
  2414. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
  2415. FAIL_IF(!inst);
  2416. INC_SIZE(2 + 3 + 2 + 2);
  2417. /* Set low register to conditional flag. */
  2418. *inst++ = XCHG_r_rm;
  2419. *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
  2420. *inst++ = GROUP_0F;
  2421. *inst++ = cond_set;
  2422. *inst++ = MOD_REG | 1 /* ecx */;
  2423. *inst++ = OR_rm8_r8;
  2424. *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
  2425. *inst++ = XCHG_r_rm;
  2426. *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
  2427. }
  2428. return SLJIT_SUCCESS;
  2429. }
  2430. /* Set TMP_REG1 to the bit. */
  2431. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
  2432. FAIL_IF(!inst);
  2433. INC_SIZE(1 + 3 + 3 + 1);
  2434. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2435. /* Set al to conditional flag. */
  2436. *inst++ = GROUP_0F;
  2437. *inst++ = cond_set;
  2438. *inst++ = MOD_REG | 0 /* eax */;
  2439. *inst++ = GROUP_0F;
  2440. *inst++ = MOVZX_r_rm8;
  2441. *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
  2442. *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
  2443. if (GET_OPCODE(op) < SLJIT_ADD)
  2444. return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
  2445. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  2446. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  2447. compiler->skip_checks = 1;
  2448. #endif
  2449. return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
  2450. #endif /* SLJIT_CONFIG_X86_64 */
  2451. }
  2452. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
  2453. sljit_s32 dst_reg,
  2454. sljit_s32 src, sljit_sw srcw)
  2455. {
  2456. sljit_u8* inst;
  2457. CHECK_ERROR();
  2458. CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
  2459. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2460. dst_reg &= ~SLJIT_I32_OP;
  2461. if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
  2462. return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
  2463. #else
  2464. if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
  2465. return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
  2466. #endif
  2467. /* ADJUST_LOCAL_OFFSET is not needed. */
  2468. CHECK_EXTRA_REGS(src, srcw, (void)0);
  2469. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2470. compiler->mode32 = dst_reg & SLJIT_I32_OP;
  2471. dst_reg &= ~SLJIT_I32_OP;
  2472. #endif
  2473. if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
  2474. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
  2475. src = TMP_REG1;
  2476. srcw = 0;
  2477. }
  2478. inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
  2479. FAIL_IF(!inst);
  2480. *inst++ = GROUP_0F;
  2481. *inst = get_jump_code(type & 0xff) - 0x40;
  2482. return SLJIT_SUCCESS;
  2483. }
  2484. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
  2485. {
  2486. CHECK_ERROR();
  2487. CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
  2488. ADJUST_LOCAL_OFFSET(dst, dstw);
  2489. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2490. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2491. compiler->mode32 = 0;
  2492. #endif
  2493. ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
  2494. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2495. if (NOT_HALFWORD(offset)) {
  2496. FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
  2497. #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
  2498. SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
  2499. return compiler->error;
  2500. #else
  2501. return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
  2502. #endif
  2503. }
  2504. #endif
  2505. if (offset != 0)
  2506. return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
  2507. return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
  2508. }
  2509. SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
  2510. {
  2511. sljit_u8 *inst;
  2512. struct sljit_const *const_;
  2513. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2514. sljit_s32 reg;
  2515. #endif
  2516. CHECK_ERROR_PTR();
  2517. CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
  2518. ADJUST_LOCAL_OFFSET(dst, dstw);
  2519. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2520. const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
  2521. PTR_FAIL_IF(!const_);
  2522. set_const(const_, compiler);
  2523. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2524. compiler->mode32 = 0;
  2525. reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
  2526. if (emit_load_imm64(compiler, reg, init_value))
  2527. return NULL;
  2528. #else
  2529. if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
  2530. return NULL;
  2531. #endif
  2532. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2533. PTR_FAIL_IF(!inst);
  2534. *inst++ = 0;
  2535. *inst++ = 2;
  2536. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2537. if (dst & SLJIT_MEM)
  2538. if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
  2539. return NULL;
  2540. #endif
  2541. return const_;
  2542. }
  2543. SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
  2544. {
  2545. struct sljit_put_label *put_label;
  2546. sljit_u8 *inst;
  2547. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2548. sljit_s32 reg;
  2549. sljit_uw start_size;
  2550. #endif
  2551. CHECK_ERROR_PTR();
  2552. CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
  2553. ADJUST_LOCAL_OFFSET(dst, dstw);
  2554. CHECK_EXTRA_REGS(dst, dstw, (void)0);
  2555. put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
  2556. PTR_FAIL_IF(!put_label);
  2557. set_put_label(put_label, compiler, 0);
  2558. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2559. compiler->mode32 = 0;
  2560. reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
  2561. if (emit_load_imm64(compiler, reg, 0))
  2562. return NULL;
  2563. #else
  2564. if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
  2565. return NULL;
  2566. #endif
  2567. #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
  2568. if (dst & SLJIT_MEM) {
  2569. start_size = compiler->size;
  2570. if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
  2571. return NULL;
  2572. put_label->flags = compiler->size - start_size;
  2573. }
  2574. #endif
  2575. inst = (sljit_u8*)ensure_buf(compiler, 2);
  2576. PTR_FAIL_IF(!inst);
  2577. *inst++ = 0;
  2578. *inst++ = 3;
  2579. return put_label;
  2580. }
  2581. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
  2582. {
  2583. SLJIT_UNUSED_ARG(executable_offset);
  2584. #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
  2585. sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
  2586. #else
  2587. sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
  2588. #endif
  2589. }
  2590. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
  2591. {
  2592. SLJIT_UNUSED_ARG(executable_offset);
  2593. sljit_unaligned_store_sw((void*)addr, new_constant);
  2594. }