sljitNativeX86_64.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without modification, are
  7. * permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this list of
  10. * conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  13. * of conditions and the following disclaimer in the documentation and/or other materials
  14. * provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  17. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  19. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  20. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  21. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  22. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  24. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /* x86 64-bit arch dependent functions. */
  27. static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
  28. {
  29. sljit_u8 *inst;
  30. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
  31. FAIL_IF(!inst);
  32. INC_SIZE(2 + sizeof(sljit_sw));
  33. *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
  34. *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
  35. sljit_unaligned_store_sw(inst, imm);
  36. return SLJIT_SUCCESS;
  37. }
  38. static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
  39. {
  40. sljit_s32 type = jump->flags >> TYPE_SHIFT;
  41. int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
  42. /* The relative jump below specialized for this case. */
  43. SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
  44. if (type < SLJIT_JUMP) {
  45. /* Invert type. */
  46. *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
  47. *code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
  48. }
  49. *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
  50. *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
  51. jump->addr = (sljit_uw)code_ptr;
  52. if (jump->flags & JUMP_LABEL)
  53. jump->flags |= PATCH_MD;
  54. else if (short_addr)
  55. sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
  56. else
  57. sljit_unaligned_store_sw(code_ptr, jump->u.target);
  58. code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
  59. *code_ptr++ = REX_B;
  60. *code_ptr++ = GROUP_FF;
  61. *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
  62. return code_ptr;
  63. }
  64. static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
  65. {
  66. if (max_label > HALFWORD_MAX) {
  67. put_label->addr -= put_label->flags;
  68. put_label->flags = PATCH_MD;
  69. return code_ptr;
  70. }
  71. if (put_label->flags == 0) {
  72. /* Destination is register. */
  73. code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
  74. SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  75. SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
  76. if ((code_ptr[0] & 0x07) != 0) {
  77. code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x08);
  78. code_ptr += 2 + sizeof(sljit_s32);
  79. }
  80. else {
  81. code_ptr[0] = code_ptr[1];
  82. code_ptr += 1 + sizeof(sljit_s32);
  83. }
  84. put_label->addr = (sljit_uw)code_ptr;
  85. return code_ptr;
  86. }
  87. code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
  88. SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
  89. SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  90. if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
  91. code_ptr += 2 + sizeof(sljit_uw);
  92. SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
  93. }
  94. SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
  95. code_ptr[0] = (sljit_u8)(code_ptr[0] & ~0x4);
  96. code_ptr[1] = MOV_rm_i32;
  97. code_ptr[2] = (sljit_u8)(code_ptr[2] & ~(0x7 << 3));
  98. code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
  99. put_label->addr = (sljit_uw)code_ptr;
  100. put_label->flags = 0;
  101. return code_ptr;
  102. }
  103. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  104. sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  105. sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  106. {
  107. sljit_s32 args, i, tmp, size, saved_register_size;
  108. sljit_u8 *inst;
  109. CHECK_ERROR();
  110. CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  111. set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  112. compiler->mode32 = 0;
  113. #ifdef _WIN64
  114. /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  115. if (fscratches >= 6 || fsaveds >= 1)
  116. compiler->locals_offset = 6 * sizeof(sljit_sw);
  117. else
  118. compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  119. #endif
  120. /* Including the return address saved by the call instruction. */
  121. saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  122. tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  123. for (i = SLJIT_S0; i >= tmp; i--) {
  124. size = reg_map[i] >= 8 ? 2 : 1;
  125. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  126. FAIL_IF(!inst);
  127. INC_SIZE(size);
  128. if (reg_map[i] >= 8)
  129. *inst++ = REX_B;
  130. PUSH_REG(reg_lmap[i]);
  131. }
  132. for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  133. size = reg_map[i] >= 8 ? 2 : 1;
  134. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  135. FAIL_IF(!inst);
  136. INC_SIZE(size);
  137. if (reg_map[i] >= 8)
  138. *inst++ = REX_B;
  139. PUSH_REG(reg_lmap[i]);
  140. }
  141. args = get_arg_count(arg_types);
  142. if (args > 0) {
  143. size = args * 3;
  144. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  145. FAIL_IF(!inst);
  146. INC_SIZE(size);
  147. #ifndef _WIN64
  148. if (args > 0) {
  149. inst[0] = REX_W;
  150. inst[1] = MOV_r_rm;
  151. inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
  152. inst += 3;
  153. }
  154. if (args > 1) {
  155. inst[0] = REX_W | REX_R;
  156. inst[1] = MOV_r_rm;
  157. inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
  158. inst += 3;
  159. }
  160. if (args > 2) {
  161. inst[0] = REX_W | REX_R;
  162. inst[1] = MOV_r_rm;
  163. inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
  164. }
  165. #else
  166. if (args > 0) {
  167. inst[0] = REX_W;
  168. inst[1] = MOV_r_rm;
  169. inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
  170. inst += 3;
  171. }
  172. if (args > 1) {
  173. inst[0] = REX_W;
  174. inst[1] = MOV_r_rm;
  175. inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
  176. inst += 3;
  177. }
  178. if (args > 2) {
  179. inst[0] = REX_W | REX_B;
  180. inst[1] = MOV_r_rm;
  181. inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
  182. }
  183. #endif
  184. }
  185. local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  186. compiler->local_size = local_size;
  187. #ifdef _WIN64
  188. if (local_size > 0) {
  189. if (local_size <= 4 * 4096) {
  190. if (local_size > 4096)
  191. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
  192. if (local_size > 2 * 4096)
  193. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
  194. if (local_size > 3 * 4096)
  195. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
  196. }
  197. else {
  198. EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
  199. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
  200. SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
  201. EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
  202. FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  203. SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
  204. FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  205. TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
  206. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  207. FAIL_IF(!inst);
  208. INC_SIZE(2);
  209. inst[0] = JNE_i8;
  210. inst[1] = (sljit_s8) -19;
  211. }
  212. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
  213. }
  214. #endif
  215. if (local_size > 0) {
  216. FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
  217. SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
  218. }
  219. #ifdef _WIN64
  220. /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
  221. if (fscratches >= 6 || fsaveds >= 1) {
  222. inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  223. FAIL_IF(!inst);
  224. INC_SIZE(5);
  225. *inst++ = GROUP_0F;
  226. sljit_unaligned_store_s32(inst, 0x20247429);
  227. }
  228. #endif
  229. return SLJIT_SUCCESS;
  230. }
  231. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  232. sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
  233. sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  234. {
  235. sljit_s32 saved_register_size;
  236. CHECK_ERROR();
  237. CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
  238. set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
  239. #ifdef _WIN64
  240. /* Two/four register slots for parameters plus space for xmm6 register if needed. */
  241. if (fscratches >= 6 || fsaveds >= 1)
  242. compiler->locals_offset = 6 * sizeof(sljit_sw);
  243. else
  244. compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
  245. #endif
  246. /* Including the return address saved by the call instruction. */
  247. saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  248. compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
  249. return SLJIT_SUCCESS;
  250. }
  251. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  252. {
  253. sljit_s32 i, tmp, size;
  254. sljit_u8 *inst;
  255. CHECK_ERROR();
  256. CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  257. FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  258. #ifdef _WIN64
  259. /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
  260. if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
  261. inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
  262. FAIL_IF(!inst);
  263. INC_SIZE(5);
  264. *inst++ = GROUP_0F;
  265. sljit_unaligned_store_s32(inst, 0x20247428);
  266. }
  267. #endif
  268. if (compiler->local_size > 0) {
  269. if (compiler->local_size <= 127) {
  270. inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
  271. FAIL_IF(!inst);
  272. INC_SIZE(4);
  273. *inst++ = REX_W;
  274. *inst++ = GROUP_BINARY_83;
  275. *inst++ = MOD_REG | ADD | 4;
  276. *inst = compiler->local_size;
  277. }
  278. else {
  279. inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
  280. FAIL_IF(!inst);
  281. INC_SIZE(7);
  282. *inst++ = REX_W;
  283. *inst++ = GROUP_BINARY_81;
  284. *inst++ = MOD_REG | ADD | 4;
  285. sljit_unaligned_store_s32(inst, compiler->local_size);
  286. }
  287. }
  288. tmp = compiler->scratches;
  289. for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
  290. size = reg_map[i] >= 8 ? 2 : 1;
  291. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  292. FAIL_IF(!inst);
  293. INC_SIZE(size);
  294. if (reg_map[i] >= 8)
  295. *inst++ = REX_B;
  296. POP_REG(reg_lmap[i]);
  297. }
  298. tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
  299. for (i = tmp; i <= SLJIT_S0; i++) {
  300. size = reg_map[i] >= 8 ? 2 : 1;
  301. inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
  302. FAIL_IF(!inst);
  303. INC_SIZE(size);
  304. if (reg_map[i] >= 8)
  305. *inst++ = REX_B;
  306. POP_REG(reg_lmap[i]);
  307. }
  308. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  309. FAIL_IF(!inst);
  310. INC_SIZE(1);
  311. RET();
  312. return SLJIT_SUCCESS;
  313. }
  314. /* --------------------------------------------------------------------- */
  315. /* Operators */
  316. /* --------------------------------------------------------------------- */
  317. static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
  318. {
  319. sljit_u8 *inst;
  320. sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
  321. inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
  322. FAIL_IF(!inst);
  323. INC_SIZE(length);
  324. if (rex)
  325. *inst++ = rex;
  326. *inst++ = opcode;
  327. sljit_unaligned_store_s32(inst, imm);
  328. return SLJIT_SUCCESS;
  329. }
  330. static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
  331. /* The register or immediate operand. */
  332. sljit_s32 a, sljit_sw imma,
  333. /* The general operand (not immediate). */
  334. sljit_s32 b, sljit_sw immb)
  335. {
  336. sljit_u8 *inst;
  337. sljit_u8 *buf_ptr;
  338. sljit_u8 rex = 0;
  339. sljit_s32 flags = size & ~0xf;
  340. sljit_s32 inst_size;
  341. /* The immediate operand must be 32 bit. */
  342. SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
  343. /* Both cannot be switched on. */
  344. SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
  345. /* Size flags not allowed for typed instructions. */
  346. SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
  347. /* Both size flags cannot be switched on. */
  348. SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
  349. /* SSE2 and immediate is not possible. */
  350. SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
  351. SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
  352. && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
  353. && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
  354. size &= 0xf;
  355. inst_size = size;
  356. if (!compiler->mode32 && !(flags & EX86_NO_REXW))
  357. rex |= REX_W;
  358. else if (flags & EX86_REX)
  359. rex |= REX;
  360. if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
  361. inst_size++;
  362. if (flags & EX86_PREF_66)
  363. inst_size++;
  364. /* Calculate size of b. */
  365. inst_size += 1; /* mod r/m byte. */
  366. if (b & SLJIT_MEM) {
  367. if (!(b & OFFS_REG_MASK)) {
  368. if (NOT_HALFWORD(immb)) {
  369. PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
  370. immb = 0;
  371. if (b & REG_MASK)
  372. b |= TO_OFFS_REG(TMP_REG2);
  373. else
  374. b |= TMP_REG2;
  375. }
  376. else if (reg_lmap[b & REG_MASK] == 4)
  377. b |= TO_OFFS_REG(SLJIT_SP);
  378. }
  379. if ((b & REG_MASK) == SLJIT_UNUSED)
  380. inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
  381. else {
  382. if (reg_map[b & REG_MASK] >= 8)
  383. rex |= REX_B;
  384. if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
  385. /* Immediate operand. */
  386. if (immb <= 127 && immb >= -128)
  387. inst_size += sizeof(sljit_s8);
  388. else
  389. inst_size += sizeof(sljit_s32);
  390. }
  391. else if (reg_lmap[b & REG_MASK] == 5)
  392. inst_size += sizeof(sljit_s8);
  393. if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
  394. inst_size += 1; /* SIB byte. */
  395. if (reg_map[OFFS_REG(b)] >= 8)
  396. rex |= REX_X;
  397. }
  398. }
  399. }
  400. else if (!(flags & EX86_SSE2_OP2)) {
  401. if (reg_map[b] >= 8)
  402. rex |= REX_B;
  403. }
  404. else if (freg_map[b] >= 8)
  405. rex |= REX_B;
  406. if (a & SLJIT_IMM) {
  407. if (flags & EX86_BIN_INS) {
  408. if (imma <= 127 && imma >= -128) {
  409. inst_size += 1;
  410. flags |= EX86_BYTE_ARG;
  411. } else
  412. inst_size += 4;
  413. }
  414. else if (flags & EX86_SHIFT_INS) {
  415. imma &= compiler->mode32 ? 0x1f : 0x3f;
  416. if (imma != 1) {
  417. inst_size ++;
  418. flags |= EX86_BYTE_ARG;
  419. }
  420. } else if (flags & EX86_BYTE_ARG)
  421. inst_size++;
  422. else if (flags & EX86_HALF_ARG)
  423. inst_size += sizeof(short);
  424. else
  425. inst_size += sizeof(sljit_s32);
  426. }
  427. else {
  428. SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
  429. /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
  430. if (!(flags & EX86_SSE2_OP1)) {
  431. if (reg_map[a] >= 8)
  432. rex |= REX_R;
  433. }
  434. else if (freg_map[a] >= 8)
  435. rex |= REX_R;
  436. }
  437. if (rex)
  438. inst_size++;
  439. inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
  440. PTR_FAIL_IF(!inst);
  441. /* Encoding the byte. */
  442. INC_SIZE(inst_size);
  443. if (flags & EX86_PREF_F2)
  444. *inst++ = 0xf2;
  445. if (flags & EX86_PREF_F3)
  446. *inst++ = 0xf3;
  447. if (flags & EX86_PREF_66)
  448. *inst++ = 0x66;
  449. if (rex)
  450. *inst++ = rex;
  451. buf_ptr = inst + size;
  452. /* Encode mod/rm byte. */
  453. if (!(flags & EX86_SHIFT_INS)) {
  454. if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
  455. *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
  456. if (a & SLJIT_IMM)
  457. *buf_ptr = 0;
  458. else if (!(flags & EX86_SSE2_OP1))
  459. *buf_ptr = reg_lmap[a] << 3;
  460. else
  461. *buf_ptr = freg_lmap[a] << 3;
  462. }
  463. else {
  464. if (a & SLJIT_IMM) {
  465. if (imma == 1)
  466. *inst = GROUP_SHIFT_1;
  467. else
  468. *inst = GROUP_SHIFT_N;
  469. } else
  470. *inst = GROUP_SHIFT_CL;
  471. *buf_ptr = 0;
  472. }
  473. if (!(b & SLJIT_MEM))
  474. *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : freg_lmap[b]);
  475. else if ((b & REG_MASK) != SLJIT_UNUSED) {
  476. if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
  477. if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  478. if (immb <= 127 && immb >= -128)
  479. *buf_ptr |= 0x40;
  480. else
  481. *buf_ptr |= 0x80;
  482. }
  483. if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
  484. *buf_ptr++ |= reg_lmap[b & REG_MASK];
  485. else {
  486. *buf_ptr++ |= 0x04;
  487. *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
  488. }
  489. if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
  490. if (immb <= 127 && immb >= -128)
  491. *buf_ptr++ = immb; /* 8 bit displacement. */
  492. else {
  493. sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  494. buf_ptr += sizeof(sljit_s32);
  495. }
  496. }
  497. }
  498. else {
  499. if (reg_lmap[b & REG_MASK] == 5)
  500. *buf_ptr |= 0x40;
  501. *buf_ptr++ |= 0x04;
  502. *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
  503. if (reg_lmap[b & REG_MASK] == 5)
  504. *buf_ptr++ = 0;
  505. }
  506. }
  507. else {
  508. *buf_ptr++ |= 0x04;
  509. *buf_ptr++ = 0x25;
  510. sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
  511. buf_ptr += sizeof(sljit_s32);
  512. }
  513. if (a & SLJIT_IMM) {
  514. if (flags & EX86_BYTE_ARG)
  515. *buf_ptr = imma;
  516. else if (flags & EX86_HALF_ARG)
  517. sljit_unaligned_store_s16(buf_ptr, imma);
  518. else if (!(flags & EX86_SHIFT_INS))
  519. sljit_unaligned_store_s32(buf_ptr, imma);
  520. }
  521. return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
  522. }
  523. /* --------------------------------------------------------------------- */
  524. /* Call / return instructions */
  525. /* --------------------------------------------------------------------- */
  526. #ifndef _WIN64
  527. static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  528. {
  529. sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  530. sljit_s32 word_arg_count = 0;
  531. SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
  532. compiler->mode32 = 0;
  533. /* Remove return value. */
  534. arg_types >>= SLJIT_DEF_SHIFT;
  535. while (arg_types) {
  536. if ((arg_types & SLJIT_DEF_MASK) < SLJIT_ARG_TYPE_F32)
  537. word_arg_count++;
  538. arg_types >>= SLJIT_DEF_SHIFT;
  539. }
  540. if (word_arg_count == 0)
  541. return SLJIT_SUCCESS;
  542. if (src & SLJIT_MEM) {
  543. ADJUST_LOCAL_OFFSET(src, srcw);
  544. EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  545. *src_ptr = TMP_REG2;
  546. }
  547. else if (src == SLJIT_R2 && word_arg_count >= SLJIT_R2)
  548. *src_ptr = TMP_REG1;
  549. if (word_arg_count >= 3)
  550. EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
  551. return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
  552. }
  553. #else
  554. static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr, sljit_sw srcw)
  555. {
  556. sljit_s32 src = src_ptr ? (*src_ptr) : 0;
  557. sljit_s32 arg_count = 0;
  558. sljit_s32 word_arg_count = 0;
  559. sljit_s32 float_arg_count = 0;
  560. sljit_s32 types = 0;
  561. sljit_s32 data_trandfer = 0;
  562. static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
  563. SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
  564. compiler->mode32 = 0;
  565. arg_types >>= SLJIT_DEF_SHIFT;
  566. while (arg_types) {
  567. types = (types << SLJIT_DEF_SHIFT) | (arg_types & SLJIT_DEF_MASK);
  568. switch (arg_types & SLJIT_DEF_MASK) {
  569. case SLJIT_ARG_TYPE_F32:
  570. case SLJIT_ARG_TYPE_F64:
  571. arg_count++;
  572. float_arg_count++;
  573. if (arg_count != float_arg_count)
  574. data_trandfer = 1;
  575. break;
  576. default:
  577. arg_count++;
  578. word_arg_count++;
  579. if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
  580. data_trandfer = 1;
  581. if (src == word_arg_regs[arg_count]) {
  582. EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
  583. *src_ptr = TMP_REG2;
  584. }
  585. }
  586. break;
  587. }
  588. arg_types >>= SLJIT_DEF_SHIFT;
  589. }
  590. if (!data_trandfer)
  591. return SLJIT_SUCCESS;
  592. if (src & SLJIT_MEM) {
  593. ADJUST_LOCAL_OFFSET(src, srcw);
  594. EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
  595. *src_ptr = TMP_REG2;
  596. }
  597. while (types) {
  598. switch (types & SLJIT_DEF_MASK) {
  599. case SLJIT_ARG_TYPE_F32:
  600. if (arg_count != float_arg_count)
  601. FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
  602. arg_count--;
  603. float_arg_count--;
  604. break;
  605. case SLJIT_ARG_TYPE_F64:
  606. if (arg_count != float_arg_count)
  607. FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
  608. arg_count--;
  609. float_arg_count--;
  610. break;
  611. default:
  612. if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
  613. EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
  614. arg_count--;
  615. word_arg_count--;
  616. break;
  617. }
  618. types >>= SLJIT_DEF_SHIFT;
  619. }
  620. return SLJIT_SUCCESS;
  621. }
  622. #endif
  623. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
  624. sljit_s32 arg_types)
  625. {
  626. CHECK_ERROR_PTR();
  627. CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
  628. PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL, 0));
  629. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  630. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  631. compiler->skip_checks = 1;
  632. #endif
  633. return sljit_emit_jump(compiler, type);
  634. }
  635. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
  636. sljit_s32 arg_types,
  637. sljit_s32 src, sljit_sw srcw)
  638. {
  639. CHECK_ERROR();
  640. CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
  641. FAIL_IF(call_with_args(compiler, arg_types, &src, srcw));
  642. #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
  643. || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
  644. compiler->skip_checks = 1;
  645. #endif
  646. return sljit_emit_ijump(compiler, type, src, srcw);
  647. }
  648. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
  649. {
  650. sljit_u8 *inst;
  651. CHECK_ERROR();
  652. CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
  653. ADJUST_LOCAL_OFFSET(dst, dstw);
  654. /* For UNUSED dst. Uncommon, but possible. */
  655. if (dst == SLJIT_UNUSED)
  656. dst = TMP_REG1;
  657. if (FAST_IS_REG(dst)) {
  658. if (reg_map[dst] < 8) {
  659. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  660. FAIL_IF(!inst);
  661. INC_SIZE(1);
  662. POP_REG(reg_lmap[dst]);
  663. return SLJIT_SUCCESS;
  664. }
  665. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
  666. FAIL_IF(!inst);
  667. INC_SIZE(2);
  668. *inst++ = REX_B;
  669. POP_REG(reg_lmap[dst]);
  670. return SLJIT_SUCCESS;
  671. }
  672. /* REX_W is not necessary (src is not immediate). */
  673. compiler->mode32 = 1;
  674. inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
  675. FAIL_IF(!inst);
  676. *inst++ = POP_rm;
  677. return SLJIT_SUCCESS;
  678. }
  679. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
  680. {
  681. sljit_u8 *inst;
  682. CHECK_ERROR();
  683. CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
  684. ADJUST_LOCAL_OFFSET(src, srcw);
  685. if (FAST_IS_REG(src)) {
  686. if (reg_map[src] < 8) {
  687. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
  688. FAIL_IF(!inst);
  689. INC_SIZE(1 + 1);
  690. PUSH_REG(reg_lmap[src]);
  691. }
  692. else {
  693. inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
  694. FAIL_IF(!inst);
  695. INC_SIZE(2 + 1);
  696. *inst++ = REX_B;
  697. PUSH_REG(reg_lmap[src]);
  698. }
  699. }
  700. else {
  701. /* REX_W is not necessary (src is not immediate). */
  702. compiler->mode32 = 1;
  703. inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
  704. FAIL_IF(!inst);
  705. *inst++ = GROUP_FF;
  706. *inst |= PUSH_rm;
  707. inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
  708. FAIL_IF(!inst);
  709. INC_SIZE(1);
  710. }
  711. RET();
  712. return SLJIT_SUCCESS;
  713. }
  714. /* --------------------------------------------------------------------- */
  715. /* Extend input */
  716. /* --------------------------------------------------------------------- */
  717. static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
  718. sljit_s32 dst, sljit_sw dstw,
  719. sljit_s32 src, sljit_sw srcw)
  720. {
  721. sljit_u8* inst;
  722. sljit_s32 dst_r;
  723. compiler->mode32 = 0;
  724. if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
  725. return SLJIT_SUCCESS; /* Empty instruction. */
  726. if (src & SLJIT_IMM) {
  727. if (FAST_IS_REG(dst)) {
  728. if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
  729. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  730. FAIL_IF(!inst);
  731. *inst = MOV_rm_i32;
  732. return SLJIT_SUCCESS;
  733. }
  734. return emit_load_imm64(compiler, dst, srcw);
  735. }
  736. compiler->mode32 = 1;
  737. inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
  738. FAIL_IF(!inst);
  739. *inst = MOV_rm_i32;
  740. compiler->mode32 = 0;
  741. return SLJIT_SUCCESS;
  742. }
  743. dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
  744. if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
  745. dst_r = src;
  746. else {
  747. if (sign) {
  748. inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
  749. FAIL_IF(!inst);
  750. *inst++ = MOVSXD_r_rm;
  751. } else {
  752. compiler->mode32 = 1;
  753. FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
  754. compiler->mode32 = 0;
  755. }
  756. }
  757. if (dst & SLJIT_MEM) {
  758. compiler->mode32 = 1;
  759. inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
  760. FAIL_IF(!inst);
  761. *inst = MOV_rm_r;
  762. compiler->mode32 = 0;
  763. }
  764. return SLJIT_SUCCESS;
  765. }