sljitNativeTILEGX_64.c 75 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555
  1. /*
  2. * Stack-less Just-In-Time compiler
  3. *
  4. * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved.
  5. * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without modification, are
  8. * permitted provided that the following conditions are met:
  9. *
  10. * 1. Redistributions of source code must retain the above copyright notice, this list of
  11. * conditions and the following disclaimer.
  12. *
  13. * 2. Redistributions in binary form must reproduce the above copyright notice, this list
  14. * of conditions and the following disclaimer in the documentation and/or other materials
  15. * provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
  18. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
  20. * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  21. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  22. * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  23. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  25. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. /* TileGX architecture. */
  28. /* Contributed by Tilera Corporation. */
  29. #include "sljitNativeTILEGX-encoder.c"
  30. #define SIMM_8BIT_MAX (0x7f)
  31. #define SIMM_8BIT_MIN (-0x80)
  32. #define SIMM_16BIT_MAX (0x7fff)
  33. #define SIMM_16BIT_MIN (-0x8000)
  34. #define SIMM_17BIT_MAX (0xffff)
  35. #define SIMM_17BIT_MIN (-0x10000)
  36. #define SIMM_32BIT_MAX (0x7fffffff)
  37. #define SIMM_32BIT_MIN (-0x7fffffff - 1)
  38. #define SIMM_48BIT_MAX (0x7fffffff0000L)
  39. #define SIMM_48BIT_MIN (-0x800000000000L)
  40. #define IMM16(imm) ((imm) & 0xffff)
  41. #define UIMM_16BIT_MAX (0xffff)
  42. #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
  43. #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
  44. #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
  45. #define ADDR_TMP (SLJIT_NUMBER_OF_REGISTERS + 5)
  46. #define PIC_ADDR_REG TMP_REG2
  47. static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = {
  48. 63, 0, 1, 2, 3, 4, 30, 31, 32, 33, 34, 54, 5, 16, 6, 7
  49. };
  50. #define SLJIT_LOCALS_REG_mapped 54
  51. #define TMP_REG1_mapped 5
  52. #define TMP_REG2_mapped 16
  53. #define TMP_REG3_mapped 6
  54. #define ADDR_TMP_mapped 7
  55. /* Flags are keept in volatile registers. */
  56. #define EQUAL_FLAG 8
  57. /* And carry flag as well. */
  58. #define ULESS_FLAG 9
  59. #define UGREATER_FLAG 10
  60. #define LESS_FLAG 11
  61. #define GREATER_FLAG 12
  62. #define OVERFLOW_FLAG 13
  63. #define ZERO 63
  64. #define RA 55
  65. #define TMP_EREG1 14
  66. #define TMP_EREG2 15
  67. #define LOAD_DATA 0x01
  68. #define WORD_DATA 0x00
  69. #define BYTE_DATA 0x02
  70. #define HALF_DATA 0x04
  71. #define INT_DATA 0x06
  72. #define SIGNED_DATA 0x08
  73. #define DOUBLE_DATA 0x10
  74. /* Separates integer and floating point registers */
  75. #define GPR_REG 0xf
  76. #define MEM_MASK 0x1f
  77. #define WRITE_BACK 0x00020
  78. #define ARG_TEST 0x00040
  79. #define ALT_KEEP_CACHE 0x00080
  80. #define CUMULATIVE_OP 0x00100
  81. #define LOGICAL_OP 0x00200
  82. #define IMM_OP 0x00400
  83. #define SRC2_IMM 0x00800
  84. #define UNUSED_DEST 0x01000
  85. #define REG_DEST 0x02000
  86. #define REG1_SOURCE 0x04000
  87. #define REG2_SOURCE 0x08000
  88. #define SLOW_SRC1 0x10000
  89. #define SLOW_SRC2 0x20000
  90. #define SLOW_DEST 0x40000
  91. /* Only these flags are set. UNUSED_DEST is not set when no flags should be set.
  92. */
  93. #define CHECK_FLAGS(list) (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list))))
  94. SLJIT_API_FUNC_ATTRIBUTE const char *sljit_get_platform_name(void)
  95. {
  96. return "TileGX" SLJIT_CPUINFO;
  97. }
  98. /* Length of an instruction word */
  99. typedef sljit_uw sljit_ins;
  100. struct jit_instr {
  101. const struct tilegx_opcode* opcode;
  102. tilegx_pipeline pipe;
  103. unsigned long input_registers;
  104. unsigned long output_registers;
  105. int operand_value[4];
  106. int line;
  107. };
  108. /* Opcode Helper Macros */
  109. #define TILEGX_X_MODE 0
  110. #define X_MODE create_Mode(TILEGX_X_MODE)
  111. #define FNOP_X0 \
  112. create_Opcode_X0(RRR_0_OPCODE_X0) | \
  113. create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
  114. create_UnaryOpcodeExtension_X0(FNOP_UNARY_OPCODE_X0)
  115. #define FNOP_X1 \
  116. create_Opcode_X1(RRR_0_OPCODE_X1) | \
  117. create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
  118. create_UnaryOpcodeExtension_X1(FNOP_UNARY_OPCODE_X1)
  119. #define NOP \
  120. create_Mode(TILEGX_X_MODE) | FNOP_X0 | FNOP_X1
  121. #define ANOP_X0 \
  122. create_Opcode_X0(RRR_0_OPCODE_X0) | \
  123. create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
  124. create_UnaryOpcodeExtension_X0(NOP_UNARY_OPCODE_X0)
  125. #define BPT create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  126. create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
  127. create_UnaryOpcodeExtension_X1(ILL_UNARY_OPCODE_X1) | \
  128. create_Dest_X1(0x1C) | create_SrcA_X1(0x25) | ANOP_X0
  129. #define ADD_X1 \
  130. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  131. create_RRROpcodeExtension_X1(ADD_RRR_0_OPCODE_X1) | FNOP_X0
  132. #define ADDI_X1 \
  133. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
  134. create_Imm8OpcodeExtension_X1(ADDI_IMM8_OPCODE_X1) | FNOP_X0
  135. #define SUB_X1 \
  136. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  137. create_RRROpcodeExtension_X1(SUB_RRR_0_OPCODE_X1) | FNOP_X0
  138. #define NOR_X1 \
  139. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  140. create_RRROpcodeExtension_X1(NOR_RRR_0_OPCODE_X1) | FNOP_X0
  141. #define OR_X1 \
  142. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  143. create_RRROpcodeExtension_X1(OR_RRR_0_OPCODE_X1) | FNOP_X0
  144. #define AND_X1 \
  145. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  146. create_RRROpcodeExtension_X1(AND_RRR_0_OPCODE_X1) | FNOP_X0
  147. #define XOR_X1 \
  148. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  149. create_RRROpcodeExtension_X1(XOR_RRR_0_OPCODE_X1) | FNOP_X0
  150. #define CMOVNEZ_X0 \
  151. create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
  152. create_RRROpcodeExtension_X0(CMOVNEZ_RRR_0_OPCODE_X0) | FNOP_X1
  153. #define CMOVEQZ_X0 \
  154. create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
  155. create_RRROpcodeExtension_X0(CMOVEQZ_RRR_0_OPCODE_X0) | FNOP_X1
  156. #define ADDLI_X1 \
  157. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(ADDLI_OPCODE_X1) | FNOP_X0
  158. #define V4INT_L_X1 \
  159. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  160. create_RRROpcodeExtension_X1(V4INT_L_RRR_0_OPCODE_X1) | FNOP_X0
  161. #define BFEXTU_X0 \
  162. create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
  163. create_BFOpcodeExtension_X0(BFEXTU_BF_OPCODE_X0) | FNOP_X1
  164. #define BFEXTS_X0 \
  165. create_Mode(TILEGX_X_MODE) | create_Opcode_X0(BF_OPCODE_X0) | \
  166. create_BFOpcodeExtension_X0(BFEXTS_BF_OPCODE_X0) | FNOP_X1
  167. #define SHL16INSLI_X1 \
  168. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHL16INSLI_OPCODE_X1) | FNOP_X0
  169. #define ST_X1 \
  170. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  171. create_RRROpcodeExtension_X1(ST_RRR_0_OPCODE_X1) | create_Dest_X1(0x0) | FNOP_X0
  172. #define LD_X1 \
  173. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  174. create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
  175. create_UnaryOpcodeExtension_X1(LD_UNARY_OPCODE_X1) | FNOP_X0
  176. #define JR_X1 \
  177. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  178. create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
  179. create_UnaryOpcodeExtension_X1(JR_UNARY_OPCODE_X1) | FNOP_X0
  180. #define JALR_X1 \
  181. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  182. create_RRROpcodeExtension_X1(UNARY_RRR_0_OPCODE_X1) | \
  183. create_UnaryOpcodeExtension_X1(JALR_UNARY_OPCODE_X1) | FNOP_X0
  184. #define CLZ_X0 \
  185. create_Mode(TILEGX_X_MODE) | create_Opcode_X0(RRR_0_OPCODE_X0) | \
  186. create_RRROpcodeExtension_X0(UNARY_RRR_0_OPCODE_X0) | \
  187. create_UnaryOpcodeExtension_X0(CNTLZ_UNARY_OPCODE_X0) | FNOP_X1
  188. #define CMPLTUI_X1 \
  189. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
  190. create_Imm8OpcodeExtension_X1(CMPLTUI_IMM8_OPCODE_X1) | FNOP_X0
  191. #define CMPLTU_X1 \
  192. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  193. create_RRROpcodeExtension_X1(CMPLTU_RRR_0_OPCODE_X1) | FNOP_X0
  194. #define CMPLTS_X1 \
  195. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  196. create_RRROpcodeExtension_X1(CMPLTS_RRR_0_OPCODE_X1) | FNOP_X0
  197. #define XORI_X1 \
  198. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
  199. create_Imm8OpcodeExtension_X1(XORI_IMM8_OPCODE_X1) | FNOP_X0
  200. #define ORI_X1 \
  201. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
  202. create_Imm8OpcodeExtension_X1(ORI_IMM8_OPCODE_X1) | FNOP_X0
  203. #define ANDI_X1 \
  204. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(IMM8_OPCODE_X1) | \
  205. create_Imm8OpcodeExtension_X1(ANDI_IMM8_OPCODE_X1) | FNOP_X0
  206. #define SHLI_X1 \
  207. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
  208. create_ShiftOpcodeExtension_X1(SHLI_SHIFT_OPCODE_X1) | FNOP_X0
  209. #define SHL_X1 \
  210. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  211. create_RRROpcodeExtension_X1(SHL_RRR_0_OPCODE_X1) | FNOP_X0
  212. #define SHRSI_X1 \
  213. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
  214. create_ShiftOpcodeExtension_X1(SHRSI_SHIFT_OPCODE_X1) | FNOP_X0
  215. #define SHRS_X1 \
  216. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  217. create_RRROpcodeExtension_X1(SHRS_RRR_0_OPCODE_X1) | FNOP_X0
  218. #define SHRUI_X1 \
  219. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(SHIFT_OPCODE_X1) | \
  220. create_ShiftOpcodeExtension_X1(SHRUI_SHIFT_OPCODE_X1) | FNOP_X0
  221. #define SHRU_X1 \
  222. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(RRR_0_OPCODE_X1) | \
  223. create_RRROpcodeExtension_X1(SHRU_RRR_0_OPCODE_X1) | FNOP_X0
  224. #define BEQZ_X1 \
  225. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
  226. create_BrType_X1(BEQZ_BRANCH_OPCODE_X1) | FNOP_X0
  227. #define BNEZ_X1 \
  228. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(BRANCH_OPCODE_X1) | \
  229. create_BrType_X1(BNEZ_BRANCH_OPCODE_X1) | FNOP_X0
  230. #define J_X1 \
  231. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
  232. create_JumpOpcodeExtension_X1(J_JUMP_OPCODE_X1) | FNOP_X0
  233. #define JAL_X1 \
  234. create_Mode(TILEGX_X_MODE) | create_Opcode_X1(JUMP_OPCODE_X1) | \
  235. create_JumpOpcodeExtension_X1(JAL_JUMP_OPCODE_X1) | FNOP_X0
  236. #define DEST_X0(x) create_Dest_X0(x)
  237. #define SRCA_X0(x) create_SrcA_X0(x)
  238. #define SRCB_X0(x) create_SrcB_X0(x)
  239. #define DEST_X1(x) create_Dest_X1(x)
  240. #define SRCA_X1(x) create_SrcA_X1(x)
  241. #define SRCB_X1(x) create_SrcB_X1(x)
  242. #define IMM16_X1(x) create_Imm16_X1(x)
  243. #define IMM8_X1(x) create_Imm8_X1(x)
  244. #define BFSTART_X0(x) create_BFStart_X0(x)
  245. #define BFEND_X0(x) create_BFEnd_X0(x)
  246. #define SHIFTIMM_X1(x) create_ShAmt_X1(x)
  247. #define JOFF_X1(x) create_JumpOff_X1(x)
  248. #define BOFF_X1(x) create_BrOff_X1(x)
  249. static const tilegx_mnemonic data_transfer_insts[16] = {
  250. /* u w s */ TILEGX_OPC_ST /* st */,
  251. /* u w l */ TILEGX_OPC_LD /* ld */,
  252. /* u b s */ TILEGX_OPC_ST1 /* st1 */,
  253. /* u b l */ TILEGX_OPC_LD1U /* ld1u */,
  254. /* u h s */ TILEGX_OPC_ST2 /* st2 */,
  255. /* u h l */ TILEGX_OPC_LD2U /* ld2u */,
  256. /* u i s */ TILEGX_OPC_ST4 /* st4 */,
  257. /* u i l */ TILEGX_OPC_LD4U /* ld4u */,
  258. /* s w s */ TILEGX_OPC_ST /* st */,
  259. /* s w l */ TILEGX_OPC_LD /* ld */,
  260. /* s b s */ TILEGX_OPC_ST1 /* st1 */,
  261. /* s b l */ TILEGX_OPC_LD1S /* ld1s */,
  262. /* s h s */ TILEGX_OPC_ST2 /* st2 */,
  263. /* s h l */ TILEGX_OPC_LD2S /* ld2s */,
  264. /* s i s */ TILEGX_OPC_ST4 /* st4 */,
  265. /* s i l */ TILEGX_OPC_LD4S /* ld4s */,
  266. };
  267. #ifdef TILEGX_JIT_DEBUG
  268. static sljit_s32 push_inst_debug(struct sljit_compiler *compiler, sljit_ins ins, int line)
  269. {
  270. sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
  271. FAIL_IF(!ptr);
  272. *ptr = ins;
  273. compiler->size++;
  274. printf("|%04d|S0|:\t\t", line);
  275. print_insn_tilegx(ptr);
  276. return SLJIT_SUCCESS;
  277. }
  278. static sljit_s32 push_inst_nodebug(struct sljit_compiler *compiler, sljit_ins ins)
  279. {
  280. sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
  281. FAIL_IF(!ptr);
  282. *ptr = ins;
  283. compiler->size++;
  284. return SLJIT_SUCCESS;
  285. }
  286. #define push_inst(a, b) push_inst_debug(a, b, __LINE__)
  287. #else
  288. static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
  289. {
  290. sljit_ins *ptr = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
  291. FAIL_IF(!ptr);
  292. *ptr = ins;
  293. compiler->size++;
  294. return SLJIT_SUCCESS;
  295. }
  296. #endif
  297. #define BUNDLE_FORMAT_MASK(p0, p1, p2) \
  298. ((p0) | ((p1) << 8) | ((p2) << 16))
  299. #define BUNDLE_FORMAT(p0, p1, p2) \
  300. { \
  301. { \
  302. (tilegx_pipeline)(p0), \
  303. (tilegx_pipeline)(p1), \
  304. (tilegx_pipeline)(p2) \
  305. }, \
  306. BUNDLE_FORMAT_MASK(1 << (p0), 1 << (p1), (1 << (p2))) \
  307. }
  308. #define NO_PIPELINE TILEGX_NUM_PIPELINE_ENCODINGS
  309. #define tilegx_is_x_pipeline(p) ((int)(p) <= (int)TILEGX_PIPELINE_X1)
  310. #define PI(encoding) \
  311. push_inst(compiler, encoding)
  312. #define PB3(opcode, dst, srca, srcb) \
  313. push_3_buffer(compiler, opcode, dst, srca, srcb, __LINE__)
  314. #define PB2(opcode, dst, src) \
  315. push_2_buffer(compiler, opcode, dst, src, __LINE__)
  316. #define JR(reg) \
  317. push_jr_buffer(compiler, TILEGX_OPC_JR, reg, __LINE__)
  318. #define ADD(dst, srca, srcb) \
  319. push_3_buffer(compiler, TILEGX_OPC_ADD, dst, srca, srcb, __LINE__)
  320. #define SUB(dst, srca, srcb) \
  321. push_3_buffer(compiler, TILEGX_OPC_SUB, dst, srca, srcb, __LINE__)
  322. #define MUL(dst, srca, srcb) \
  323. push_3_buffer(compiler, TILEGX_OPC_MULX, dst, srca, srcb, __LINE__)
  324. #define NOR(dst, srca, srcb) \
  325. push_3_buffer(compiler, TILEGX_OPC_NOR, dst, srca, srcb, __LINE__)
  326. #define OR(dst, srca, srcb) \
  327. push_3_buffer(compiler, TILEGX_OPC_OR, dst, srca, srcb, __LINE__)
  328. #define XOR(dst, srca, srcb) \
  329. push_3_buffer(compiler, TILEGX_OPC_XOR, dst, srca, srcb, __LINE__)
  330. #define AND(dst, srca, srcb) \
  331. push_3_buffer(compiler, TILEGX_OPC_AND, dst, srca, srcb, __LINE__)
  332. #define CLZ(dst, src) \
  333. push_2_buffer(compiler, TILEGX_OPC_CLZ, dst, src, __LINE__)
  334. #define SHLI(dst, srca, srcb) \
  335. push_3_buffer(compiler, TILEGX_OPC_SHLI, dst, srca, srcb, __LINE__)
  336. #define SHRUI(dst, srca, imm) \
  337. push_3_buffer(compiler, TILEGX_OPC_SHRUI, dst, srca, imm, __LINE__)
  338. #define XORI(dst, srca, imm) \
  339. push_3_buffer(compiler, TILEGX_OPC_XORI, dst, srca, imm, __LINE__)
  340. #define ORI(dst, srca, imm) \
  341. push_3_buffer(compiler, TILEGX_OPC_ORI, dst, srca, imm, __LINE__)
  342. #define CMPLTU(dst, srca, srcb) \
  343. push_3_buffer(compiler, TILEGX_OPC_CMPLTU, dst, srca, srcb, __LINE__)
  344. #define CMPLTS(dst, srca, srcb) \
  345. push_3_buffer(compiler, TILEGX_OPC_CMPLTS, dst, srca, srcb, __LINE__)
  346. #define CMPLTUI(dst, srca, imm) \
  347. push_3_buffer(compiler, TILEGX_OPC_CMPLTUI, dst, srca, imm, __LINE__)
  348. #define CMOVNEZ(dst, srca, srcb) \
  349. push_3_buffer(compiler, TILEGX_OPC_CMOVNEZ, dst, srca, srcb, __LINE__)
  350. #define CMOVEQZ(dst, srca, srcb) \
  351. push_3_buffer(compiler, TILEGX_OPC_CMOVEQZ, dst, srca, srcb, __LINE__)
  352. #define ADDLI(dst, srca, srcb) \
  353. push_3_buffer(compiler, TILEGX_OPC_ADDLI, dst, srca, srcb, __LINE__)
  354. #define SHL16INSLI(dst, srca, srcb) \
  355. push_3_buffer(compiler, TILEGX_OPC_SHL16INSLI, dst, srca, srcb, __LINE__)
  356. #define LD_ADD(dst, addr, adjust) \
  357. push_3_buffer(compiler, TILEGX_OPC_LD_ADD, dst, addr, adjust, __LINE__)
  358. #define ST_ADD(src, addr, adjust) \
  359. push_3_buffer(compiler, TILEGX_OPC_ST_ADD, src, addr, adjust, __LINE__)
  360. #define LD(dst, addr) \
  361. push_2_buffer(compiler, TILEGX_OPC_LD, dst, addr, __LINE__)
  362. #define BFEXTU(dst, src, start, end) \
  363. push_4_buffer(compiler, TILEGX_OPC_BFEXTU, dst, src, start, end, __LINE__)
  364. #define BFEXTS(dst, src, start, end) \
  365. push_4_buffer(compiler, TILEGX_OPC_BFEXTS, dst, src, start, end, __LINE__)
  366. #define ADD_SOLO(dest, srca, srcb) \
  367. push_inst(compiler, ADD_X1 | DEST_X1(dest) | SRCA_X1(srca) | SRCB_X1(srcb))
  368. #define ADDI_SOLO(dest, srca, imm) \
  369. push_inst(compiler, ADDI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM8_X1(imm))
  370. #define ADDLI_SOLO(dest, srca, imm) \
  371. push_inst(compiler, ADDLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
  372. #define SHL16INSLI_SOLO(dest, srca, imm) \
  373. push_inst(compiler, SHL16INSLI_X1 | DEST_X1(dest) | SRCA_X1(srca) | IMM16_X1(imm))
  374. #define JALR_SOLO(reg) \
  375. push_inst(compiler, JALR_X1 | SRCA_X1(reg))
  376. #define JR_SOLO(reg) \
  377. push_inst(compiler, JR_X1 | SRCA_X1(reg))
  378. struct Format {
  379. /* Mapping of bundle issue slot to assigned pipe. */
  380. tilegx_pipeline pipe[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
  381. /* Mask of pipes used by this bundle. */
  382. unsigned int pipe_mask;
  383. };
  384. const struct Format formats[] =
  385. {
  386. /* In Y format we must always have something in Y2, since it has
  387. * no fnop, so this conveys that Y2 must always be used. */
  388. BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, NO_PIPELINE),
  389. BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, NO_PIPELINE),
  390. BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, NO_PIPELINE),
  391. BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, NO_PIPELINE),
  392. /* Y format has three instructions. */
  393. BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2),
  394. BUNDLE_FORMAT(TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1),
  395. BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y2),
  396. BUNDLE_FORMAT(TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0),
  397. BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y0, TILEGX_PIPELINE_Y1),
  398. BUNDLE_FORMAT(TILEGX_PIPELINE_Y2, TILEGX_PIPELINE_Y1, TILEGX_PIPELINE_Y0),
  399. /* X format has only two instructions. */
  400. BUNDLE_FORMAT(TILEGX_PIPELINE_X0, TILEGX_PIPELINE_X1, NO_PIPELINE),
  401. BUNDLE_FORMAT(TILEGX_PIPELINE_X1, TILEGX_PIPELINE_X0, NO_PIPELINE)
  402. };
  403. struct jit_instr inst_buf[TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE];
  404. unsigned long inst_buf_index;
  405. tilegx_pipeline get_any_valid_pipe(const struct tilegx_opcode* opcode)
  406. {
  407. /* FIXME: tile: we could pregenerate this. */
  408. int pipe;
  409. for (pipe = 0; ((opcode->pipes & (1 << pipe)) == 0 && pipe < TILEGX_NUM_PIPELINE_ENCODINGS); pipe++)
  410. ;
  411. return (tilegx_pipeline)(pipe);
  412. }
  413. void insert_nop(tilegx_mnemonic opc, int line)
  414. {
  415. const struct tilegx_opcode* opcode = NULL;
  416. memmove(&inst_buf[1], &inst_buf[0], inst_buf_index * sizeof inst_buf[0]);
  417. opcode = &tilegx_opcodes[opc];
  418. inst_buf[0].opcode = opcode;
  419. inst_buf[0].pipe = get_any_valid_pipe(opcode);
  420. inst_buf[0].input_registers = 0;
  421. inst_buf[0].output_registers = 0;
  422. inst_buf[0].line = line;
  423. ++inst_buf_index;
  424. }
  425. const struct Format* compute_format()
  426. {
  427. unsigned int compatible_pipes = BUNDLE_FORMAT_MASK(
  428. inst_buf[0].opcode->pipes,
  429. inst_buf[1].opcode->pipes,
  430. (inst_buf_index == 3 ? inst_buf[2].opcode->pipes : (1 << NO_PIPELINE)));
  431. const struct Format* match = NULL;
  432. const struct Format *b = NULL;
  433. unsigned int i;
  434. for (i = 0; i < sizeof formats / sizeof formats[0]; i++) {
  435. b = &formats[i];
  436. if ((b->pipe_mask & compatible_pipes) == b->pipe_mask) {
  437. match = b;
  438. break;
  439. }
  440. }
  441. return match;
  442. }
  443. sljit_s32 assign_pipes()
  444. {
  445. unsigned long output_registers = 0;
  446. unsigned int i = 0;
  447. if (inst_buf_index == 1) {
  448. tilegx_mnemonic opc = inst_buf[0].opcode->can_bundle
  449. ? TILEGX_OPC_FNOP : TILEGX_OPC_NOP;
  450. insert_nop(opc, __LINE__);
  451. }
  452. const struct Format* match = compute_format();
  453. if (match == NULL)
  454. return -1;
  455. for (i = 0; i < inst_buf_index; i++) {
  456. if ((i > 0) && ((inst_buf[i].input_registers & output_registers) != 0))
  457. return -1;
  458. if ((i > 0) && ((inst_buf[i].output_registers & output_registers) != 0))
  459. return -1;
  460. /* Don't include Rzero in the match set, to avoid triggering
  461. needlessly on 'prefetch' instrs. */
  462. output_registers |= inst_buf[i].output_registers & 0xFFFFFFFFFFFFFFL;
  463. inst_buf[i].pipe = match->pipe[i];
  464. }
  465. /* If only 2 instrs, and in Y-mode, insert a nop. */
  466. if (inst_buf_index == 2 && !tilegx_is_x_pipeline(match->pipe[0])) {
  467. insert_nop(TILEGX_OPC_FNOP, __LINE__);
  468. /* Select the yet unassigned pipe. */
  469. tilegx_pipeline pipe = (tilegx_pipeline)(((TILEGX_PIPELINE_Y0
  470. + TILEGX_PIPELINE_Y1 + TILEGX_PIPELINE_Y2)
  471. - (inst_buf[1].pipe + inst_buf[2].pipe)));
  472. inst_buf[0].pipe = pipe;
  473. }
  474. return 0;
  475. }
  476. tilegx_bundle_bits get_bundle_bit(struct jit_instr *inst)
  477. {
  478. int i, val;
  479. const struct tilegx_opcode* opcode = inst->opcode;
  480. tilegx_bundle_bits bits = opcode->fixed_bit_values[inst->pipe];
  481. const struct tilegx_operand* operand = NULL;
  482. for (i = 0; i < opcode->num_operands; i++) {
  483. operand = &tilegx_operands[opcode->operands[inst->pipe][i]];
  484. val = inst->operand_value[i];
  485. bits |= operand->insert(val);
  486. }
  487. return bits;
  488. }
  489. static sljit_s32 update_buffer(struct sljit_compiler *compiler)
  490. {
  491. int i;
  492. int orig_index = inst_buf_index;
  493. struct jit_instr inst0 = inst_buf[0];
  494. struct jit_instr inst1 = inst_buf[1];
  495. struct jit_instr inst2 = inst_buf[2];
  496. tilegx_bundle_bits bits = 0;
  497. /* If the bundle is valid as is, perform the encoding and return 1. */
  498. if (assign_pipes() == 0) {
  499. for (i = 0; i < inst_buf_index; i++) {
  500. bits |= get_bundle_bit(inst_buf + i);
  501. #ifdef TILEGX_JIT_DEBUG
  502. printf("|%04d", inst_buf[i].line);
  503. #endif
  504. }
  505. #ifdef TILEGX_JIT_DEBUG
  506. if (inst_buf_index == 3)
  507. printf("|M0|:\t");
  508. else
  509. printf("|M0|:\t\t");
  510. print_insn_tilegx(&bits);
  511. #endif
  512. inst_buf_index = 0;
  513. #ifdef TILEGX_JIT_DEBUG
  514. return push_inst_nodebug(compiler, bits);
  515. #else
  516. return push_inst(compiler, bits);
  517. #endif
  518. }
  519. /* If the bundle is invalid, split it in two. First encode the first two
  520. (or possibly 1) instructions, and then the last, separately. Note that
  521. assign_pipes may have re-ordered the instrs (by inserting no-ops in
  522. lower slots) so we need to reset them. */
  523. inst_buf_index = orig_index - 1;
  524. inst_buf[0] = inst0;
  525. inst_buf[1] = inst1;
  526. inst_buf[2] = inst2;
  527. if (assign_pipes() == 0) {
  528. for (i = 0; i < inst_buf_index; i++) {
  529. bits |= get_bundle_bit(inst_buf + i);
  530. #ifdef TILEGX_JIT_DEBUG
  531. printf("|%04d", inst_buf[i].line);
  532. #endif
  533. }
  534. #ifdef TILEGX_JIT_DEBUG
  535. if (inst_buf_index == 3)
  536. printf("|M1|:\t");
  537. else
  538. printf("|M1|:\t\t");
  539. print_insn_tilegx(&bits);
  540. #endif
  541. if ((orig_index - 1) == 2) {
  542. inst_buf[0] = inst2;
  543. inst_buf_index = 1;
  544. } else if ((orig_index - 1) == 1) {
  545. inst_buf[0] = inst1;
  546. inst_buf_index = 1;
  547. } else
  548. SLJIT_UNREACHABLE();
  549. #ifdef TILEGX_JIT_DEBUG
  550. return push_inst_nodebug(compiler, bits);
  551. #else
  552. return push_inst(compiler, bits);
  553. #endif
  554. } else {
  555. /* We had 3 instrs of which the first 2 can't live in the same bundle.
  556. Split those two. Note that we don't try to then combine the second
  557. and third instr into a single bundle. First instruction: */
  558. inst_buf_index = 1;
  559. inst_buf[0] = inst0;
  560. inst_buf[1] = inst1;
  561. inst_buf[2] = inst2;
  562. if (assign_pipes() == 0) {
  563. for (i = 0; i < inst_buf_index; i++) {
  564. bits |= get_bundle_bit(inst_buf + i);
  565. #ifdef TILEGX_JIT_DEBUG
  566. printf("|%04d", inst_buf[i].line);
  567. #endif
  568. }
  569. #ifdef TILEGX_JIT_DEBUG
  570. if (inst_buf_index == 3)
  571. printf("|M2|:\t");
  572. else
  573. printf("|M2|:\t\t");
  574. print_insn_tilegx(&bits);
  575. #endif
  576. inst_buf[0] = inst1;
  577. inst_buf[1] = inst2;
  578. inst_buf_index = orig_index - 1;
  579. #ifdef TILEGX_JIT_DEBUG
  580. return push_inst_nodebug(compiler, bits);
  581. #else
  582. return push_inst(compiler, bits);
  583. #endif
  584. } else
  585. SLJIT_UNREACHABLE();
  586. }
  587. SLJIT_UNREACHABLE();
  588. }
  589. static sljit_s32 flush_buffer(struct sljit_compiler *compiler)
  590. {
  591. while (inst_buf_index != 0) {
  592. FAIL_IF(update_buffer(compiler));
  593. }
  594. return SLJIT_SUCCESS;
  595. }
  596. static sljit_s32 push_4_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int op3, int line)
  597. {
  598. if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
  599. FAIL_IF(update_buffer(compiler));
  600. const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
  601. inst_buf[inst_buf_index].opcode = opcode;
  602. inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
  603. inst_buf[inst_buf_index].operand_value[0] = op0;
  604. inst_buf[inst_buf_index].operand_value[1] = op1;
  605. inst_buf[inst_buf_index].operand_value[2] = op2;
  606. inst_buf[inst_buf_index].operand_value[3] = op3;
  607. inst_buf[inst_buf_index].input_registers = 1L << op1;
  608. inst_buf[inst_buf_index].output_registers = 1L << op0;
  609. inst_buf[inst_buf_index].line = line;
  610. inst_buf_index++;
  611. return SLJIT_SUCCESS;
  612. }
  613. static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int op2, int line)
  614. {
  615. if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
  616. FAIL_IF(update_buffer(compiler));
  617. const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
  618. inst_buf[inst_buf_index].opcode = opcode;
  619. inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
  620. inst_buf[inst_buf_index].operand_value[0] = op0;
  621. inst_buf[inst_buf_index].operand_value[1] = op1;
  622. inst_buf[inst_buf_index].operand_value[2] = op2;
  623. inst_buf[inst_buf_index].line = line;
  624. switch (opc) {
  625. case TILEGX_OPC_ST_ADD:
  626. inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
  627. inst_buf[inst_buf_index].output_registers = 1L << op0;
  628. break;
  629. case TILEGX_OPC_LD_ADD:
  630. inst_buf[inst_buf_index].input_registers = 1L << op1;
  631. inst_buf[inst_buf_index].output_registers = (1L << op0) | (1L << op1);
  632. break;
  633. case TILEGX_OPC_ADD:
  634. case TILEGX_OPC_AND:
  635. case TILEGX_OPC_SUB:
  636. case TILEGX_OPC_MULX:
  637. case TILEGX_OPC_OR:
  638. case TILEGX_OPC_XOR:
  639. case TILEGX_OPC_NOR:
  640. case TILEGX_OPC_SHL:
  641. case TILEGX_OPC_SHRU:
  642. case TILEGX_OPC_SHRS:
  643. case TILEGX_OPC_CMPLTU:
  644. case TILEGX_OPC_CMPLTS:
  645. case TILEGX_OPC_CMOVEQZ:
  646. case TILEGX_OPC_CMOVNEZ:
  647. inst_buf[inst_buf_index].input_registers = (1L << op1) | (1L << op2);
  648. inst_buf[inst_buf_index].output_registers = 1L << op0;
  649. break;
  650. case TILEGX_OPC_ADDLI:
  651. case TILEGX_OPC_XORI:
  652. case TILEGX_OPC_ORI:
  653. case TILEGX_OPC_SHLI:
  654. case TILEGX_OPC_SHRUI:
  655. case TILEGX_OPC_SHRSI:
  656. case TILEGX_OPC_SHL16INSLI:
  657. case TILEGX_OPC_CMPLTUI:
  658. case TILEGX_OPC_CMPLTSI:
  659. inst_buf[inst_buf_index].input_registers = 1L << op1;
  660. inst_buf[inst_buf_index].output_registers = 1L << op0;
  661. break;
  662. default:
  663. printf("unrecoginzed opc: %s\n", opcode->name);
  664. SLJIT_UNREACHABLE();
  665. }
  666. inst_buf_index++;
  667. return SLJIT_SUCCESS;
  668. }
  669. static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int op1, int line)
  670. {
  671. if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
  672. FAIL_IF(update_buffer(compiler));
  673. const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
  674. inst_buf[inst_buf_index].opcode = opcode;
  675. inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
  676. inst_buf[inst_buf_index].operand_value[0] = op0;
  677. inst_buf[inst_buf_index].operand_value[1] = op1;
  678. inst_buf[inst_buf_index].line = line;
  679. switch (opc) {
  680. case TILEGX_OPC_BEQZ:
  681. case TILEGX_OPC_BNEZ:
  682. inst_buf[inst_buf_index].input_registers = 1L << op0;
  683. break;
  684. case TILEGX_OPC_ST:
  685. case TILEGX_OPC_ST1:
  686. case TILEGX_OPC_ST2:
  687. case TILEGX_OPC_ST4:
  688. inst_buf[inst_buf_index].input_registers = (1L << op0) | (1L << op1);
  689. inst_buf[inst_buf_index].output_registers = 0;
  690. break;
  691. case TILEGX_OPC_CLZ:
  692. case TILEGX_OPC_LD:
  693. case TILEGX_OPC_LD1U:
  694. case TILEGX_OPC_LD1S:
  695. case TILEGX_OPC_LD2U:
  696. case TILEGX_OPC_LD2S:
  697. case TILEGX_OPC_LD4U:
  698. case TILEGX_OPC_LD4S:
  699. inst_buf[inst_buf_index].input_registers = 1L << op1;
  700. inst_buf[inst_buf_index].output_registers = 1L << op0;
  701. break;
  702. default:
  703. printf("unrecoginzed opc: %s\n", opcode->name);
  704. SLJIT_UNREACHABLE();
  705. }
  706. inst_buf_index++;
  707. return SLJIT_SUCCESS;
  708. }
  709. static sljit_s32 push_0_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int line)
  710. {
  711. if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
  712. FAIL_IF(update_buffer(compiler));
  713. const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
  714. inst_buf[inst_buf_index].opcode = opcode;
  715. inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
  716. inst_buf[inst_buf_index].input_registers = 0;
  717. inst_buf[inst_buf_index].output_registers = 0;
  718. inst_buf[inst_buf_index].line = line;
  719. inst_buf_index++;
  720. return SLJIT_SUCCESS;
  721. }
  722. static sljit_s32 push_jr_buffer(struct sljit_compiler *compiler, tilegx_mnemonic opc, int op0, int line)
  723. {
  724. if (inst_buf_index == TILEGX_MAX_INSTRUCTIONS_PER_BUNDLE)
  725. FAIL_IF(update_buffer(compiler));
  726. const struct tilegx_opcode* opcode = &tilegx_opcodes[opc];
  727. inst_buf[inst_buf_index].opcode = opcode;
  728. inst_buf[inst_buf_index].pipe = get_any_valid_pipe(opcode);
  729. inst_buf[inst_buf_index].operand_value[0] = op0;
  730. inst_buf[inst_buf_index].input_registers = 1L << op0;
  731. inst_buf[inst_buf_index].output_registers = 0;
  732. inst_buf[inst_buf_index].line = line;
  733. inst_buf_index++;
  734. return flush_buffer(compiler);
  735. }
  736. static SLJIT_INLINE sljit_ins * detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
  737. {
  738. sljit_sw diff;
  739. sljit_uw target_addr;
  740. sljit_ins *inst;
  741. if (jump->flags & SLJIT_REWRITABLE_JUMP)
  742. return code_ptr;
  743. if (jump->flags & JUMP_ADDR)
  744. target_addr = jump->u.target;
  745. else {
  746. SLJIT_ASSERT(jump->flags & JUMP_LABEL);
  747. target_addr = (sljit_uw)(code + jump->u.label->size);
  748. }
  749. inst = (sljit_ins *)jump->addr;
  750. if (jump->flags & IS_COND)
  751. inst--;
  752. diff = ((sljit_sw) target_addr - (sljit_sw) inst) >> 3;
  753. if (diff <= SIMM_17BIT_MAX && diff >= SIMM_17BIT_MIN) {
  754. jump->flags |= PATCH_B;
  755. if (!(jump->flags & IS_COND)) {
  756. if (jump->flags & IS_JAL) {
  757. jump->flags &= ~(PATCH_B);
  758. jump->flags |= PATCH_J;
  759. inst[0] = JAL_X1;
  760. #ifdef TILEGX_JIT_DEBUG
  761. printf("[runtime relocate]%04d:\t", __LINE__);
  762. print_insn_tilegx(inst);
  763. #endif
  764. } else {
  765. inst[0] = BEQZ_X1 | SRCA_X1(ZERO);
  766. #ifdef TILEGX_JIT_DEBUG
  767. printf("[runtime relocate]%04d:\t", __LINE__);
  768. print_insn_tilegx(inst);
  769. #endif
  770. }
  771. return inst;
  772. }
  773. inst[0] = inst[0] ^ (0x7L << 55);
  774. #ifdef TILEGX_JIT_DEBUG
  775. printf("[runtime relocate]%04d:\t", __LINE__);
  776. print_insn_tilegx(inst);
  777. #endif
  778. jump->addr -= sizeof(sljit_ins);
  779. return inst;
  780. }
  781. if (jump->flags & IS_COND) {
  782. if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
  783. jump->flags |= PATCH_J;
  784. inst[0] = (inst[0] & ~(BOFF_X1(-1))) | BOFF_X1(2);
  785. inst[1] = J_X1;
  786. return inst + 1;
  787. }
  788. return code_ptr;
  789. }
  790. if ((target_addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL)) {
  791. jump->flags |= PATCH_J;
  792. if (jump->flags & IS_JAL) {
  793. inst[0] = JAL_X1;
  794. #ifdef TILEGX_JIT_DEBUG
  795. printf("[runtime relocate]%04d:\t", __LINE__);
  796. print_insn_tilegx(inst);
  797. #endif
  798. } else {
  799. inst[0] = J_X1;
  800. #ifdef TILEGX_JIT_DEBUG
  801. printf("[runtime relocate]%04d:\t", __LINE__);
  802. print_insn_tilegx(inst);
  803. #endif
  804. }
  805. return inst;
  806. }
  807. return code_ptr;
  808. }
  809. SLJIT_API_FUNC_ATTRIBUTE void * sljit_generate_code(struct sljit_compiler *compiler)
  810. {
  811. struct sljit_memory_fragment *buf;
  812. sljit_ins *code;
  813. sljit_ins *code_ptr;
  814. sljit_ins *buf_ptr;
  815. sljit_ins *buf_end;
  816. sljit_uw word_count;
  817. sljit_uw addr;
  818. struct sljit_label *label;
  819. struct sljit_jump *jump;
  820. struct sljit_const *const_;
  821. CHECK_ERROR_PTR();
  822. CHECK_PTR(check_sljit_generate_code(compiler));
  823. reverse_buf(compiler);
  824. code = (sljit_ins *)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
  825. PTR_FAIL_WITH_EXEC_IF(code);
  826. buf = compiler->buf;
  827. code_ptr = code;
  828. word_count = 0;
  829. label = compiler->labels;
  830. jump = compiler->jumps;
  831. const_ = compiler->consts;
  832. do {
  833. buf_ptr = (sljit_ins *)buf->memory;
  834. buf_end = buf_ptr + (buf->used_size >> 3);
  835. do {
  836. *code_ptr = *buf_ptr++;
  837. SLJIT_ASSERT(!label || label->size >= word_count);
  838. SLJIT_ASSERT(!jump || jump->addr >= word_count);
  839. SLJIT_ASSERT(!const_ || const_->addr >= word_count);
  840. /* These structures are ordered by their address. */
  841. if (label && label->size == word_count) {
  842. /* Just recording the address. */
  843. label->addr = (sljit_uw) code_ptr;
  844. label->size = code_ptr - code;
  845. label = label->next;
  846. }
  847. if (jump && jump->addr == word_count) {
  848. if (jump->flags & IS_JAL)
  849. jump->addr = (sljit_uw)(code_ptr - 4);
  850. else
  851. jump->addr = (sljit_uw)(code_ptr - 3);
  852. code_ptr = detect_jump_type(jump, code_ptr, code);
  853. jump = jump->next;
  854. }
  855. if (const_ && const_->addr == word_count) {
  856. /* Just recording the address. */
  857. const_->addr = (sljit_uw) code_ptr;
  858. const_ = const_->next;
  859. }
  860. code_ptr++;
  861. word_count++;
  862. } while (buf_ptr < buf_end);
  863. buf = buf->next;
  864. } while (buf);
  865. if (label && label->size == word_count) {
  866. label->addr = (sljit_uw) code_ptr;
  867. label->size = code_ptr - code;
  868. label = label->next;
  869. }
  870. SLJIT_ASSERT(!label);
  871. SLJIT_ASSERT(!jump);
  872. SLJIT_ASSERT(!const_);
  873. SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
  874. jump = compiler->jumps;
  875. while (jump) {
  876. do {
  877. addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
  878. buf_ptr = (sljit_ins *)jump->addr;
  879. if (jump->flags & PATCH_B) {
  880. addr = (sljit_sw)(addr - (jump->addr)) >> 3;
  881. SLJIT_ASSERT((sljit_sw) addr <= SIMM_17BIT_MAX && (sljit_sw) addr >= SIMM_17BIT_MIN);
  882. buf_ptr[0] = (buf_ptr[0] & ~(BOFF_X1(-1))) | BOFF_X1(addr);
  883. #ifdef TILEGX_JIT_DEBUG
  884. printf("[runtime relocate]%04d:\t", __LINE__);
  885. print_insn_tilegx(buf_ptr);
  886. #endif
  887. break;
  888. }
  889. if (jump->flags & PATCH_J) {
  890. SLJIT_ASSERT((addr & ~0x3FFFFFFFL) == ((jump->addr + sizeof(sljit_ins)) & ~0x3FFFFFFFL));
  891. addr = (sljit_sw)(addr - (jump->addr)) >> 3;
  892. buf_ptr[0] = (buf_ptr[0] & ~(JOFF_X1(-1))) | JOFF_X1(addr);
  893. #ifdef TILEGX_JIT_DEBUG
  894. printf("[runtime relocate]%04d:\t", __LINE__);
  895. print_insn_tilegx(buf_ptr);
  896. #endif
  897. break;
  898. }
  899. SLJIT_ASSERT(!(jump->flags & IS_JAL));
  900. /* Set the fields of immediate loads. */
  901. buf_ptr[0] = (buf_ptr[0] & ~(0xFFFFL << 43)) | (((addr >> 32) & 0xFFFFL) << 43);
  902. buf_ptr[1] = (buf_ptr[1] & ~(0xFFFFL << 43)) | (((addr >> 16) & 0xFFFFL) << 43);
  903. buf_ptr[2] = (buf_ptr[2] & ~(0xFFFFL << 43)) | ((addr & 0xFFFFL) << 43);
  904. } while (0);
  905. jump = jump->next;
  906. }
  907. compiler->error = SLJIT_ERR_COMPILED;
  908. compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
  909. SLJIT_CACHE_FLUSH(code, code_ptr);
  910. return code;
  911. }
  912. static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm)
  913. {
  914. if (imm <= SIMM_16BIT_MAX && imm >= SIMM_16BIT_MIN)
  915. return ADDLI(dst_ar, ZERO, imm);
  916. if (imm <= SIMM_32BIT_MAX && imm >= SIMM_32BIT_MIN) {
  917. FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 16));
  918. return SHL16INSLI(dst_ar, dst_ar, imm);
  919. }
  920. if (imm <= SIMM_48BIT_MAX && imm >= SIMM_48BIT_MIN) {
  921. FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
  922. FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
  923. return SHL16INSLI(dst_ar, dst_ar, imm);
  924. }
  925. FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 48));
  926. FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 32));
  927. FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
  928. return SHL16INSLI(dst_ar, dst_ar, imm);
  929. }
  930. static sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush)
  931. {
  932. /* Should *not* be optimized as load_immediate, as pcre relocation
  933. mechanism will match this fixed 4-instruction pattern. */
  934. if (flush) {
  935. FAIL_IF(ADDLI_SOLO(dst_ar, ZERO, imm >> 32));
  936. FAIL_IF(SHL16INSLI_SOLO(dst_ar, dst_ar, imm >> 16));
  937. return SHL16INSLI_SOLO(dst_ar, dst_ar, imm);
  938. }
  939. FAIL_IF(ADDLI(dst_ar, ZERO, imm >> 32));
  940. FAIL_IF(SHL16INSLI(dst_ar, dst_ar, imm >> 16));
  941. return SHL16INSLI(dst_ar, dst_ar, imm);
  942. }
  943. static sljit_s32 emit_const_64(struct sljit_compiler *compiler, sljit_s32 dst_ar, sljit_sw imm, int flush)
  944. {
  945. /* Should *not* be optimized as load_immediate, as pcre relocation
  946. mechanism will match this fixed 4-instruction pattern. */
  947. if (flush) {
  948. FAIL_IF(ADDLI_SOLO(reg_map[dst_ar], ZERO, imm >> 48));
  949. FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
  950. FAIL_IF(SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
  951. return SHL16INSLI_SOLO(reg_map[dst_ar], reg_map[dst_ar], imm);
  952. }
  953. FAIL_IF(ADDLI(reg_map[dst_ar], ZERO, imm >> 48));
  954. FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 32));
  955. FAIL_IF(SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm >> 16));
  956. return SHL16INSLI(reg_map[dst_ar], reg_map[dst_ar], imm);
  957. }
  958. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
  959. sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
  960. sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  961. {
  962. sljit_ins base;
  963. sljit_s32 i, tmp;
  964. CHECK_ERROR();
  965. CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
  966. set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
  967. local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  968. local_size = (local_size + 7) & ~7;
  969. compiler->local_size = local_size;
  970. if (local_size <= SIMM_16BIT_MAX) {
  971. /* Frequent case. */
  972. FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, -local_size));
  973. base = SLJIT_LOCALS_REG_mapped;
  974. } else {
  975. FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
  976. FAIL_IF(ADD(TMP_REG2_mapped, SLJIT_LOCALS_REG_mapped, ZERO));
  977. FAIL_IF(SUB(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
  978. base = TMP_REG2_mapped;
  979. local_size = 0;
  980. }
  981. /* Save the return address. */
  982. FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
  983. FAIL_IF(ST_ADD(ADDR_TMP_mapped, RA, -8));
  984. /* Save the S registers. */
  985. tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  986. for (i = SLJIT_S0; i >= tmp; i--) {
  987. FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
  988. }
  989. /* Save the R registers that need to be reserved. */
  990. for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  991. FAIL_IF(ST_ADD(ADDR_TMP_mapped, reg_map[i], -8));
  992. }
  993. /* Move the arguments to S registers. */
  994. for (i = 0; i < args; i++) {
  995. FAIL_IF(ADD(reg_map[SLJIT_S0 - i], i, ZERO));
  996. }
  997. return SLJIT_SUCCESS;
  998. }
  999. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
  1000. sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
  1001. sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
  1002. {
  1003. CHECK_ERROR();
  1004. CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
  1005. set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
  1006. local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
  1007. compiler->local_size = (local_size + 7) & ~7;
  1008. return SLJIT_SUCCESS;
  1009. }
  1010. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
  1011. {
  1012. sljit_s32 local_size;
  1013. sljit_ins base;
  1014. sljit_s32 i, tmp;
  1015. sljit_s32 saveds;
  1016. CHECK_ERROR();
  1017. CHECK(check_sljit_emit_return(compiler, op, src, srcw));
  1018. FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
  1019. local_size = compiler->local_size;
  1020. if (local_size <= SIMM_16BIT_MAX)
  1021. base = SLJIT_LOCALS_REG_mapped;
  1022. else {
  1023. FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, local_size));
  1024. FAIL_IF(ADD(TMP_REG1_mapped, SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped));
  1025. base = TMP_REG1_mapped;
  1026. local_size = 0;
  1027. }
  1028. /* Restore the return address. */
  1029. FAIL_IF(ADDLI(ADDR_TMP_mapped, base, local_size - 8));
  1030. FAIL_IF(LD_ADD(RA, ADDR_TMP_mapped, -8));
  1031. /* Restore the S registers. */
  1032. saveds = compiler->saveds;
  1033. tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
  1034. for (i = SLJIT_S0; i >= tmp; i--) {
  1035. FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
  1036. }
  1037. /* Restore the R registers that need to be reserved. */
  1038. for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
  1039. FAIL_IF(LD_ADD(reg_map[i], ADDR_TMP_mapped, -8));
  1040. }
  1041. if (compiler->local_size <= SIMM_16BIT_MAX)
  1042. FAIL_IF(ADDLI(SLJIT_LOCALS_REG_mapped, SLJIT_LOCALS_REG_mapped, compiler->local_size));
  1043. else
  1044. FAIL_IF(ADD(SLJIT_LOCALS_REG_mapped, TMP_REG1_mapped, ZERO));
  1045. return JR(RA);
  1046. }
  1047. /* reg_ar is an absoulute register! */
  1048. /* Can perform an operation using at most 1 instruction. */
  1049. static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
  1050. {
  1051. SLJIT_ASSERT(arg & SLJIT_MEM);
  1052. if ((!(flags & WRITE_BACK) || !(arg & REG_MASK))
  1053. && !(arg & OFFS_REG_MASK) && argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
  1054. /* Works for both absoulte and relative addresses. */
  1055. if (SLJIT_UNLIKELY(flags & ARG_TEST))
  1056. return 1;
  1057. FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[arg & REG_MASK], argw));
  1058. if (flags & LOAD_DATA)
  1059. FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
  1060. else
  1061. FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
  1062. return -1;
  1063. }
  1064. return 0;
  1065. }
  1066. /* See getput_arg below.
  1067. Note: can_cache is called only for binary operators. Those
  1068. operators always uses word arguments without write back. */
  1069. static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
  1070. {
  1071. SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
  1072. /* Simple operation except for updates. */
  1073. if (arg & OFFS_REG_MASK) {
  1074. argw &= 0x3;
  1075. next_argw &= 0x3;
  1076. if (argw && argw == next_argw
  1077. && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
  1078. return 1;
  1079. return 0;
  1080. }
  1081. if (arg == next_arg) {
  1082. if (((next_argw - argw) <= SIMM_16BIT_MAX
  1083. && (next_argw - argw) >= SIMM_16BIT_MIN))
  1084. return 1;
  1085. return 0;
  1086. }
  1087. return 0;
  1088. }
  1089. /* Emit the necessary instructions. See can_cache above. */
  1090. static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
  1091. {
  1092. sljit_s32 tmp_ar, base;
  1093. SLJIT_ASSERT(arg & SLJIT_MEM);
  1094. if (!(next_arg & SLJIT_MEM)) {
  1095. next_arg = 0;
  1096. next_argw = 0;
  1097. }
  1098. if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
  1099. tmp_ar = reg_ar;
  1100. else
  1101. tmp_ar = TMP_REG1_mapped;
  1102. base = arg & REG_MASK;
  1103. if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
  1104. argw &= 0x3;
  1105. if ((flags & WRITE_BACK) && reg_ar == reg_map[base]) {
  1106. SLJIT_ASSERT(!(flags & LOAD_DATA) && reg_map[TMP_REG1] != reg_ar);
  1107. FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
  1108. reg_ar = TMP_REG1_mapped;
  1109. }
  1110. /* Using the cache. */
  1111. if (argw == compiler->cache_argw) {
  1112. if (!(flags & WRITE_BACK)) {
  1113. if (arg == compiler->cache_arg) {
  1114. if (flags & LOAD_DATA)
  1115. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
  1116. else
  1117. return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
  1118. }
  1119. if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
  1120. if (arg == next_arg && argw == (next_argw & 0x3)) {
  1121. compiler->cache_arg = arg;
  1122. compiler->cache_argw = argw;
  1123. FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], TMP_REG3_mapped));
  1124. if (flags & LOAD_DATA)
  1125. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
  1126. else
  1127. return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
  1128. }
  1129. FAIL_IF(ADD(tmp_ar, reg_map[base], TMP_REG3_mapped));
  1130. if (flags & LOAD_DATA)
  1131. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
  1132. else
  1133. return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
  1134. }
  1135. } else {
  1136. if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
  1137. FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
  1138. if (flags & LOAD_DATA)
  1139. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
  1140. else
  1141. return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
  1142. }
  1143. }
  1144. }
  1145. if (SLJIT_UNLIKELY(argw)) {
  1146. compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
  1147. compiler->cache_argw = argw;
  1148. FAIL_IF(SHLI(TMP_REG3_mapped, reg_map[OFFS_REG(arg)], argw));
  1149. }
  1150. if (!(flags & WRITE_BACK)) {
  1151. if (arg == next_arg && argw == (next_argw & 0x3)) {
  1152. compiler->cache_arg = arg;
  1153. compiler->cache_argw = argw;
  1154. FAIL_IF(ADD(TMP_REG3_mapped, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
  1155. tmp_ar = TMP_REG3_mapped;
  1156. } else
  1157. FAIL_IF(ADD(tmp_ar, reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
  1158. if (flags & LOAD_DATA)
  1159. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
  1160. else
  1161. return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
  1162. }
  1163. FAIL_IF(ADD(reg_map[base], reg_map[base], reg_map[!argw ? OFFS_REG(arg) : TMP_REG3]));
  1164. if (flags & LOAD_DATA)
  1165. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
  1166. else
  1167. return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
  1168. }
  1169. if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) {
  1170. /* Update only applies if a base register exists. */
  1171. if (reg_ar == reg_map[base]) {
  1172. SLJIT_ASSERT(!(flags & LOAD_DATA) && TMP_REG1_mapped != reg_ar);
  1173. if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
  1174. FAIL_IF(ADDLI(ADDR_TMP_mapped, reg_map[base], argw));
  1175. if (flags & LOAD_DATA)
  1176. FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, ADDR_TMP_mapped));
  1177. else
  1178. FAIL_IF(PB2(data_transfer_insts[flags & MEM_MASK], ADDR_TMP_mapped, reg_ar));
  1179. if (argw)
  1180. return ADDLI(reg_map[base], reg_map[base], argw);
  1181. return SLJIT_SUCCESS;
  1182. }
  1183. FAIL_IF(ADD(TMP_REG1_mapped, reg_ar, ZERO));
  1184. reg_ar = TMP_REG1_mapped;
  1185. }
  1186. if (argw <= SIMM_16BIT_MAX && argw >= SIMM_16BIT_MIN) {
  1187. if (argw)
  1188. FAIL_IF(ADDLI(reg_map[base], reg_map[base], argw));
  1189. } else {
  1190. if (compiler->cache_arg == SLJIT_MEM
  1191. && argw - compiler->cache_argw <= SIMM_16BIT_MAX
  1192. && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
  1193. if (argw != compiler->cache_argw) {
  1194. FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
  1195. compiler->cache_argw = argw;
  1196. }
  1197. FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
  1198. } else {
  1199. compiler->cache_arg = SLJIT_MEM;
  1200. compiler->cache_argw = argw;
  1201. FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
  1202. FAIL_IF(ADD(reg_map[base], reg_map[base], TMP_REG3_mapped));
  1203. }
  1204. }
  1205. if (flags & LOAD_DATA)
  1206. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, reg_map[base]);
  1207. else
  1208. return PB2(data_transfer_insts[flags & MEM_MASK], reg_map[base], reg_ar);
  1209. }
  1210. if (compiler->cache_arg == arg
  1211. && argw - compiler->cache_argw <= SIMM_16BIT_MAX
  1212. && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
  1213. if (argw != compiler->cache_argw) {
  1214. FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
  1215. compiler->cache_argw = argw;
  1216. }
  1217. if (flags & LOAD_DATA)
  1218. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
  1219. else
  1220. return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
  1221. }
  1222. if (compiler->cache_arg == SLJIT_MEM
  1223. && argw - compiler->cache_argw <= SIMM_16BIT_MAX
  1224. && argw - compiler->cache_argw >= SIMM_16BIT_MIN) {
  1225. if (argw != compiler->cache_argw)
  1226. FAIL_IF(ADDLI(TMP_REG3_mapped, TMP_REG3_mapped, argw - compiler->cache_argw));
  1227. } else {
  1228. compiler->cache_arg = SLJIT_MEM;
  1229. FAIL_IF(load_immediate(compiler, TMP_REG3_mapped, argw));
  1230. }
  1231. compiler->cache_argw = argw;
  1232. if (!base) {
  1233. if (flags & LOAD_DATA)
  1234. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
  1235. else
  1236. return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
  1237. }
  1238. if (arg == next_arg
  1239. && next_argw - argw <= SIMM_16BIT_MAX
  1240. && next_argw - argw >= SIMM_16BIT_MIN) {
  1241. compiler->cache_arg = arg;
  1242. FAIL_IF(ADD(TMP_REG3_mapped, TMP_REG3_mapped, reg_map[base]));
  1243. if (flags & LOAD_DATA)
  1244. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, TMP_REG3_mapped);
  1245. else
  1246. return PB2(data_transfer_insts[flags & MEM_MASK], TMP_REG3_mapped, reg_ar);
  1247. }
  1248. FAIL_IF(ADD(tmp_ar, TMP_REG3_mapped, reg_map[base]));
  1249. if (flags & LOAD_DATA)
  1250. return PB2(data_transfer_insts[flags & MEM_MASK], reg_ar, tmp_ar);
  1251. else
  1252. return PB2(data_transfer_insts[flags & MEM_MASK], tmp_ar, reg_ar);
  1253. }
  1254. static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
  1255. {
  1256. if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
  1257. return compiler->error;
  1258. compiler->cache_arg = 0;
  1259. compiler->cache_argw = 0;
  1260. return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
  1261. }
  1262. static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
  1263. {
  1264. if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
  1265. return compiler->error;
  1266. return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
  1267. }
  1268. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
  1269. {
  1270. CHECK_ERROR();
  1271. CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
  1272. ADJUST_LOCAL_OFFSET(dst, dstw);
  1273. /* For UNUSED dst. Uncommon, but possible. */
  1274. if (dst == SLJIT_UNUSED)
  1275. return SLJIT_SUCCESS;
  1276. if (FAST_IS_REG(dst))
  1277. return ADD(reg_map[dst], RA, ZERO);
  1278. /* Memory. */
  1279. return emit_op_mem(compiler, WORD_DATA, RA, dst, dstw);
  1280. }
  1281. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
  1282. {
  1283. CHECK_ERROR();
  1284. CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
  1285. ADJUST_LOCAL_OFFSET(src, srcw);
  1286. if (FAST_IS_REG(src))
  1287. FAIL_IF(ADD(RA, reg_map[src], ZERO));
  1288. else if (src & SLJIT_MEM)
  1289. FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RA, src, srcw));
  1290. else if (src & SLJIT_IMM)
  1291. FAIL_IF(load_immediate(compiler, RA, srcw));
  1292. return JR(RA);
  1293. }
  1294. static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
  1295. {
  1296. sljit_s32 overflow_ra = 0;
  1297. switch (GET_OPCODE(op)) {
  1298. case SLJIT_MOV:
  1299. case SLJIT_MOV_P:
  1300. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
  1301. if (dst != src2)
  1302. return ADD(reg_map[dst], reg_map[src2], ZERO);
  1303. return SLJIT_SUCCESS;
  1304. case SLJIT_MOV_U32:
  1305. case SLJIT_MOV_S32:
  1306. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
  1307. if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
  1308. if (op == SLJIT_MOV_S32)
  1309. return BFEXTS(reg_map[dst], reg_map[src2], 0, 31);
  1310. return BFEXTU(reg_map[dst], reg_map[src2], 0, 31);
  1311. } else if (dst != src2) {
  1312. SLJIT_ASSERT(src2 == 0);
  1313. return ADD(reg_map[dst], reg_map[src2], ZERO);
  1314. }
  1315. return SLJIT_SUCCESS;
  1316. case SLJIT_MOV_U8:
  1317. case SLJIT_MOV_S8:
  1318. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
  1319. if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
  1320. if (op == SLJIT_MOV_S8)
  1321. return BFEXTS(reg_map[dst], reg_map[src2], 0, 7);
  1322. return BFEXTU(reg_map[dst], reg_map[src2], 0, 7);
  1323. } else if (dst != src2) {
  1324. SLJIT_ASSERT(src2 == 0);
  1325. return ADD(reg_map[dst], reg_map[src2], ZERO);
  1326. }
  1327. return SLJIT_SUCCESS;
  1328. case SLJIT_MOV_U16:
  1329. case SLJIT_MOV_S16:
  1330. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
  1331. if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
  1332. if (op == SLJIT_MOV_S16)
  1333. return BFEXTS(reg_map[dst], reg_map[src2], 0, 15);
  1334. return BFEXTU(reg_map[dst], reg_map[src2], 0, 15);
  1335. } else if (dst != src2) {
  1336. SLJIT_ASSERT(src2 == 0);
  1337. return ADD(reg_map[dst], reg_map[src2], ZERO);
  1338. }
  1339. return SLJIT_SUCCESS;
  1340. case SLJIT_NOT:
  1341. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
  1342. if (op & SLJIT_SET_E)
  1343. FAIL_IF(NOR(EQUAL_FLAG, reg_map[src2], reg_map[src2]));
  1344. if (CHECK_FLAGS(SLJIT_SET_E))
  1345. FAIL_IF(NOR(reg_map[dst], reg_map[src2], reg_map[src2]));
  1346. return SLJIT_SUCCESS;
  1347. case SLJIT_CLZ:
  1348. SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
  1349. if (op & SLJIT_SET_E)
  1350. FAIL_IF(CLZ(EQUAL_FLAG, reg_map[src2]));
  1351. if (CHECK_FLAGS(SLJIT_SET_E))
  1352. FAIL_IF(CLZ(reg_map[dst], reg_map[src2]));
  1353. return SLJIT_SUCCESS;
  1354. case SLJIT_ADD:
  1355. if (flags & SRC2_IMM) {
  1356. if (op & SLJIT_SET_O) {
  1357. FAIL_IF(SHRUI(TMP_EREG1, reg_map[src1], 63));
  1358. if (src2 < 0)
  1359. FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
  1360. }
  1361. if (op & SLJIT_SET_E)
  1362. FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], src2));
  1363. if (op & SLJIT_SET_C) {
  1364. if (src2 >= 0)
  1365. FAIL_IF(ORI(ULESS_FLAG ,reg_map[src1], src2));
  1366. else {
  1367. FAIL_IF(ADDLI(ULESS_FLAG ,ZERO, src2));
  1368. FAIL_IF(OR(ULESS_FLAG,reg_map[src1],ULESS_FLAG));
  1369. }
  1370. }
  1371. /* dst may be the same as src1 or src2. */
  1372. if (CHECK_FLAGS(SLJIT_SET_E))
  1373. FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
  1374. if (op & SLJIT_SET_O) {
  1375. FAIL_IF(SHRUI(OVERFLOW_FLAG, reg_map[dst], 63));
  1376. if (src2 < 0)
  1377. FAIL_IF(XORI(OVERFLOW_FLAG, OVERFLOW_FLAG, 1));
  1378. }
  1379. } else {
  1380. if (op & SLJIT_SET_O) {
  1381. FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
  1382. FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
  1383. if (src1 != dst)
  1384. overflow_ra = reg_map[src1];
  1385. else if (src2 != dst)
  1386. overflow_ra = reg_map[src2];
  1387. else {
  1388. /* Rare ocasion. */
  1389. FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
  1390. overflow_ra = TMP_EREG2;
  1391. }
  1392. }
  1393. if (op & SLJIT_SET_E)
  1394. FAIL_IF(ADD(EQUAL_FLAG ,reg_map[src1], reg_map[src2]));
  1395. if (op & SLJIT_SET_C)
  1396. FAIL_IF(OR(ULESS_FLAG,reg_map[src1], reg_map[src2]));
  1397. /* dst may be the same as src1 or src2. */
  1398. if (CHECK_FLAGS(SLJIT_SET_E))
  1399. FAIL_IF(ADD(reg_map[dst],reg_map[src1], reg_map[src2]));
  1400. if (op & SLJIT_SET_O) {
  1401. FAIL_IF(XOR(OVERFLOW_FLAG,reg_map[dst], overflow_ra));
  1402. FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
  1403. }
  1404. }
  1405. /* a + b >= a | b (otherwise, the carry should be set to 1). */
  1406. if (op & SLJIT_SET_C)
  1407. FAIL_IF(CMPLTU(ULESS_FLAG ,reg_map[dst] ,ULESS_FLAG));
  1408. if (op & SLJIT_SET_O)
  1409. return CMOVNEZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
  1410. return SLJIT_SUCCESS;
  1411. case SLJIT_ADDC:
  1412. if (flags & SRC2_IMM) {
  1413. if (op & SLJIT_SET_C) {
  1414. if (src2 >= 0)
  1415. FAIL_IF(ORI(TMP_EREG1, reg_map[src1], src2));
  1416. else {
  1417. FAIL_IF(ADDLI(TMP_EREG1, ZERO, src2));
  1418. FAIL_IF(OR(TMP_EREG1, reg_map[src1], TMP_EREG1));
  1419. }
  1420. }
  1421. FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], src2));
  1422. } else {
  1423. if (op & SLJIT_SET_C)
  1424. FAIL_IF(OR(TMP_EREG1, reg_map[src1], reg_map[src2]));
  1425. /* dst may be the same as src1 or src2. */
  1426. FAIL_IF(ADD(reg_map[dst], reg_map[src1], reg_map[src2]));
  1427. }
  1428. if (op & SLJIT_SET_C)
  1429. FAIL_IF(CMPLTU(TMP_EREG1, reg_map[dst], TMP_EREG1));
  1430. FAIL_IF(ADD(reg_map[dst], reg_map[dst], ULESS_FLAG));
  1431. if (!(op & SLJIT_SET_C))
  1432. return SLJIT_SUCCESS;
  1433. /* Set TMP_EREG2 (dst == 0) && (ULESS_FLAG == 1). */
  1434. FAIL_IF(CMPLTUI(TMP_EREG2, reg_map[dst], 1));
  1435. FAIL_IF(AND(TMP_EREG2, TMP_EREG2, ULESS_FLAG));
  1436. /* Set carry flag. */
  1437. return OR(ULESS_FLAG, TMP_EREG2, TMP_EREG1);
  1438. case SLJIT_SUB:
  1439. if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_16BIT_MIN)) {
  1440. FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
  1441. src2 = TMP_REG2;
  1442. flags &= ~SRC2_IMM;
  1443. }
  1444. if (flags & SRC2_IMM) {
  1445. if (op & SLJIT_SET_O) {
  1446. FAIL_IF(SHRUI(TMP_EREG1,reg_map[src1], 63));
  1447. if (src2 < 0)
  1448. FAIL_IF(XORI(TMP_EREG1, TMP_EREG1, 1));
  1449. if (src1 != dst)
  1450. overflow_ra = reg_map[src1];
  1451. else {
  1452. /* Rare ocasion. */
  1453. FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
  1454. overflow_ra = TMP_EREG2;
  1455. }
  1456. }
  1457. if (op & SLJIT_SET_E)
  1458. FAIL_IF(ADDLI(EQUAL_FLAG, reg_map[src1], -src2));
  1459. if (op & SLJIT_SET_C) {
  1460. FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2));
  1461. FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], ADDR_TMP_mapped));
  1462. }
  1463. /* dst may be the same as src1 or src2. */
  1464. if (CHECK_FLAGS(SLJIT_SET_E))
  1465. FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
  1466. } else {
  1467. if (op & SLJIT_SET_O) {
  1468. FAIL_IF(XOR(TMP_EREG1, reg_map[src1], reg_map[src2]));
  1469. FAIL_IF(SHRUI(TMP_EREG1, TMP_EREG1, 63));
  1470. if (src1 != dst)
  1471. overflow_ra = reg_map[src1];
  1472. else {
  1473. /* Rare ocasion. */
  1474. FAIL_IF(ADD(TMP_EREG2, reg_map[src1], ZERO));
  1475. overflow_ra = TMP_EREG2;
  1476. }
  1477. }
  1478. if (op & SLJIT_SET_E)
  1479. FAIL_IF(SUB(EQUAL_FLAG, reg_map[src1], reg_map[src2]));
  1480. if (op & (SLJIT_SET_U | SLJIT_SET_C))
  1481. FAIL_IF(CMPLTU(ULESS_FLAG, reg_map[src1], reg_map[src2]));
  1482. if (op & SLJIT_SET_U)
  1483. FAIL_IF(CMPLTU(UGREATER_FLAG, reg_map[src2], reg_map[src1]));
  1484. if (op & SLJIT_SET_S) {
  1485. FAIL_IF(CMPLTS(LESS_FLAG ,reg_map[src1] ,reg_map[src2]));
  1486. FAIL_IF(CMPLTS(GREATER_FLAG ,reg_map[src2] ,reg_map[src1]));
  1487. }
  1488. /* dst may be the same as src1 or src2. */
  1489. if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))
  1490. FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
  1491. }
  1492. if (op & SLJIT_SET_O) {
  1493. FAIL_IF(XOR(OVERFLOW_FLAG, reg_map[dst], overflow_ra));
  1494. FAIL_IF(SHRUI(OVERFLOW_FLAG, OVERFLOW_FLAG, 63));
  1495. return CMOVEQZ(OVERFLOW_FLAG, TMP_EREG1, ZERO);
  1496. }
  1497. return SLJIT_SUCCESS;
  1498. case SLJIT_SUBC:
  1499. if ((flags & SRC2_IMM) && src2 == SIMM_16BIT_MIN) {
  1500. FAIL_IF(ADDLI(TMP_REG2_mapped, ZERO, src2));
  1501. src2 = TMP_REG2;
  1502. flags &= ~SRC2_IMM;
  1503. }
  1504. if (flags & SRC2_IMM) {
  1505. if (op & SLJIT_SET_C) {
  1506. FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, -src2));
  1507. FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], ADDR_TMP_mapped));
  1508. }
  1509. /* dst may be the same as src1 or src2. */
  1510. FAIL_IF(ADDLI(reg_map[dst], reg_map[src1], -src2));
  1511. } else {
  1512. if (op & SLJIT_SET_C)
  1513. FAIL_IF(CMPLTU(TMP_EREG1, reg_map[src1], reg_map[src2]));
  1514. /* dst may be the same as src1 or src2. */
  1515. FAIL_IF(SUB(reg_map[dst], reg_map[src1], reg_map[src2]));
  1516. }
  1517. if (op & SLJIT_SET_C)
  1518. FAIL_IF(CMOVEQZ(TMP_EREG1, reg_map[dst], ULESS_FLAG));
  1519. FAIL_IF(SUB(reg_map[dst], reg_map[dst], ULESS_FLAG));
  1520. if (op & SLJIT_SET_C)
  1521. FAIL_IF(ADD(ULESS_FLAG, TMP_EREG1, ZERO));
  1522. return SLJIT_SUCCESS;
  1523. case SLJIT_MUL:
  1524. if (flags & SRC2_IMM) {
  1525. FAIL_IF(load_immediate(compiler, TMP_REG2_mapped, src2));
  1526. src2 = TMP_REG2;
  1527. flags &= ~SRC2_IMM;
  1528. }
  1529. FAIL_IF(MUL(reg_map[dst], reg_map[src1], reg_map[src2]));
  1530. return SLJIT_SUCCESS;
  1531. #define EMIT_LOGICAL(op_imm, op_norm) \
  1532. if (flags & SRC2_IMM) { \
  1533. FAIL_IF(load_immediate(compiler, ADDR_TMP_mapped, src2)); \
  1534. if (op & SLJIT_SET_E) \
  1535. FAIL_IF(push_3_buffer( \
  1536. compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
  1537. ADDR_TMP_mapped, __LINE__)); \
  1538. if (CHECK_FLAGS(SLJIT_SET_E)) \
  1539. FAIL_IF(push_3_buffer( \
  1540. compiler, op_norm, reg_map[dst], reg_map[src1], \
  1541. ADDR_TMP_mapped, __LINE__)); \
  1542. } else { \
  1543. if (op & SLJIT_SET_E) \
  1544. FAIL_IF(push_3_buffer( \
  1545. compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
  1546. reg_map[src2], __LINE__)); \
  1547. if (CHECK_FLAGS(SLJIT_SET_E)) \
  1548. FAIL_IF(push_3_buffer( \
  1549. compiler, op_norm, reg_map[dst], reg_map[src1], \
  1550. reg_map[src2], __LINE__)); \
  1551. }
  1552. case SLJIT_AND:
  1553. EMIT_LOGICAL(TILEGX_OPC_ANDI, TILEGX_OPC_AND);
  1554. return SLJIT_SUCCESS;
  1555. case SLJIT_OR:
  1556. EMIT_LOGICAL(TILEGX_OPC_ORI, TILEGX_OPC_OR);
  1557. return SLJIT_SUCCESS;
  1558. case SLJIT_XOR:
  1559. EMIT_LOGICAL(TILEGX_OPC_XORI, TILEGX_OPC_XOR);
  1560. return SLJIT_SUCCESS;
  1561. #define EMIT_SHIFT(op_imm, op_norm) \
  1562. if (flags & SRC2_IMM) { \
  1563. if (op & SLJIT_SET_E) \
  1564. FAIL_IF(push_3_buffer( \
  1565. compiler, op_imm, EQUAL_FLAG, reg_map[src1], \
  1566. src2 & 0x3F, __LINE__)); \
  1567. if (CHECK_FLAGS(SLJIT_SET_E)) \
  1568. FAIL_IF(push_3_buffer( \
  1569. compiler, op_imm, reg_map[dst], reg_map[src1], \
  1570. src2 & 0x3F, __LINE__)); \
  1571. } else { \
  1572. if (op & SLJIT_SET_E) \
  1573. FAIL_IF(push_3_buffer( \
  1574. compiler, op_norm, EQUAL_FLAG, reg_map[src1], \
  1575. reg_map[src2], __LINE__)); \
  1576. if (CHECK_FLAGS(SLJIT_SET_E)) \
  1577. FAIL_IF(push_3_buffer( \
  1578. compiler, op_norm, reg_map[dst], reg_map[src1], \
  1579. reg_map[src2], __LINE__)); \
  1580. }
  1581. case SLJIT_SHL:
  1582. EMIT_SHIFT(TILEGX_OPC_SHLI, TILEGX_OPC_SHL);
  1583. return SLJIT_SUCCESS;
  1584. case SLJIT_LSHR:
  1585. EMIT_SHIFT(TILEGX_OPC_SHRUI, TILEGX_OPC_SHRU);
  1586. return SLJIT_SUCCESS;
  1587. case SLJIT_ASHR:
  1588. EMIT_SHIFT(TILEGX_OPC_SHRSI, TILEGX_OPC_SHRS);
  1589. return SLJIT_SUCCESS;
  1590. }
  1591. SLJIT_UNREACHABLE();
  1592. return SLJIT_SUCCESS;
  1593. }
  1594. static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
  1595. {
  1596. /* arg1 goes to TMP_REG1 or src reg.
  1597. arg2 goes to TMP_REG2, imm or src reg.
  1598. TMP_REG3 can be used for caching.
  1599. result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
  1600. sljit_s32 dst_r = TMP_REG2;
  1601. sljit_s32 src1_r;
  1602. sljit_sw src2_r = 0;
  1603. sljit_s32 sugg_src2_r = TMP_REG2;
  1604. if (!(flags & ALT_KEEP_CACHE)) {
  1605. compiler->cache_arg = 0;
  1606. compiler->cache_argw = 0;
  1607. }
  1608. if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
  1609. if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
  1610. return SLJIT_SUCCESS;
  1611. if (GET_FLAGS(op))
  1612. flags |= UNUSED_DEST;
  1613. } else if (FAST_IS_REG(dst)) {
  1614. dst_r = dst;
  1615. flags |= REG_DEST;
  1616. if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
  1617. sugg_src2_r = dst_r;
  1618. } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1_mapped, dst, dstw))
  1619. flags |= SLOW_DEST;
  1620. if (flags & IMM_OP) {
  1621. if ((src2 & SLJIT_IMM) && src2w) {
  1622. if ((!(flags & LOGICAL_OP)
  1623. && (src2w <= SIMM_16BIT_MAX && src2w >= SIMM_16BIT_MIN))
  1624. || ((flags & LOGICAL_OP) && !(src2w & ~UIMM_16BIT_MAX))) {
  1625. flags |= SRC2_IMM;
  1626. src2_r = src2w;
  1627. }
  1628. }
  1629. if (!(flags & SRC2_IMM) && (flags & CUMULATIVE_OP) && (src1 & SLJIT_IMM) && src1w) {
  1630. if ((!(flags & LOGICAL_OP)
  1631. && (src1w <= SIMM_16BIT_MAX && src1w >= SIMM_16BIT_MIN))
  1632. || ((flags & LOGICAL_OP) && !(src1w & ~UIMM_16BIT_MAX))) {
  1633. flags |= SRC2_IMM;
  1634. src2_r = src1w;
  1635. /* And swap arguments. */
  1636. src1 = src2;
  1637. src1w = src2w;
  1638. src2 = SLJIT_IMM;
  1639. /* src2w = src2_r unneeded. */
  1640. }
  1641. }
  1642. }
  1643. /* Source 1. */
  1644. if (FAST_IS_REG(src1)) {
  1645. src1_r = src1;
  1646. flags |= REG1_SOURCE;
  1647. } else if (src1 & SLJIT_IMM) {
  1648. if (src1w) {
  1649. FAIL_IF(load_immediate(compiler, TMP_REG1_mapped, src1w));
  1650. src1_r = TMP_REG1;
  1651. } else
  1652. src1_r = 0;
  1653. } else {
  1654. if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w))
  1655. FAIL_IF(compiler->error);
  1656. else
  1657. flags |= SLOW_SRC1;
  1658. src1_r = TMP_REG1;
  1659. }
  1660. /* Source 2. */
  1661. if (FAST_IS_REG(src2)) {
  1662. src2_r = src2;
  1663. flags |= REG2_SOURCE;
  1664. if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
  1665. dst_r = src2_r;
  1666. } else if (src2 & SLJIT_IMM) {
  1667. if (!(flags & SRC2_IMM)) {
  1668. if (src2w) {
  1669. FAIL_IF(load_immediate(compiler, reg_map[sugg_src2_r], src2w));
  1670. src2_r = sugg_src2_r;
  1671. } else {
  1672. src2_r = 0;
  1673. if ((op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) && (dst & SLJIT_MEM))
  1674. dst_r = 0;
  1675. }
  1676. }
  1677. } else {
  1678. if (getput_arg_fast(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w))
  1679. FAIL_IF(compiler->error);
  1680. else
  1681. flags |= SLOW_SRC2;
  1682. src2_r = sugg_src2_r;
  1683. }
  1684. if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
  1685. SLJIT_ASSERT(src2_r == TMP_REG2);
  1686. if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
  1687. FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, src1, src1w));
  1688. FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
  1689. } else {
  1690. FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, src2, src2w));
  1691. FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2_mapped, src2, src2w, dst, dstw));
  1692. }
  1693. } else if (flags & SLOW_SRC1)
  1694. FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1_mapped, src1, src1w, dst, dstw));
  1695. else if (flags & SLOW_SRC2)
  1696. FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, reg_map[sugg_src2_r], src2, src2w, dst, dstw));
  1697. FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
  1698. if (dst & SLJIT_MEM) {
  1699. if (!(flags & SLOW_DEST)) {
  1700. getput_arg_fast(compiler, flags, reg_map[dst_r], dst, dstw);
  1701. return compiler->error;
  1702. }
  1703. return getput_arg(compiler, flags, reg_map[dst_r], dst, dstw, 0, 0);
  1704. }
  1705. return SLJIT_SUCCESS;
  1706. }
  1707. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw, sljit_s32 type)
  1708. {
  1709. sljit_s32 sugg_dst_ar, dst_ar;
  1710. sljit_s32 flags = GET_ALL_FLAGS(op);
  1711. sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
  1712. CHECK_ERROR();
  1713. CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
  1714. ADJUST_LOCAL_OFFSET(dst, dstw);
  1715. op = GET_OPCODE(op);
  1716. if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32)
  1717. mem_type = INT_DATA | SIGNED_DATA;
  1718. sugg_dst_ar = reg_map[(op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2];
  1719. compiler->cache_arg = 0;
  1720. compiler->cache_argw = 0;
  1721. if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
  1722. ADJUST_LOCAL_OFFSET(src, srcw);
  1723. FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1_mapped, src, srcw, dst, dstw));
  1724. src = TMP_REG1;
  1725. srcw = 0;
  1726. }
  1727. switch (type & 0xff) {
  1728. case SLJIT_EQUAL:
  1729. case SLJIT_NOT_EQUAL:
  1730. FAIL_IF(CMPLTUI(sugg_dst_ar, EQUAL_FLAG, 1));
  1731. dst_ar = sugg_dst_ar;
  1732. break;
  1733. case SLJIT_LESS:
  1734. case SLJIT_GREATER_EQUAL:
  1735. dst_ar = ULESS_FLAG;
  1736. break;
  1737. case SLJIT_GREATER:
  1738. case SLJIT_LESS_EQUAL:
  1739. dst_ar = UGREATER_FLAG;
  1740. break;
  1741. case SLJIT_SIG_LESS:
  1742. case SLJIT_SIG_GREATER_EQUAL:
  1743. dst_ar = LESS_FLAG;
  1744. break;
  1745. case SLJIT_SIG_GREATER:
  1746. case SLJIT_SIG_LESS_EQUAL:
  1747. dst_ar = GREATER_FLAG;
  1748. break;
  1749. case SLJIT_OVERFLOW:
  1750. case SLJIT_NOT_OVERFLOW:
  1751. dst_ar = OVERFLOW_FLAG;
  1752. break;
  1753. case SLJIT_MUL_OVERFLOW:
  1754. case SLJIT_MUL_NOT_OVERFLOW:
  1755. FAIL_IF(CMPLTUI(sugg_dst_ar, OVERFLOW_FLAG, 1));
  1756. dst_ar = sugg_dst_ar;
  1757. type ^= 0x1; /* Flip type bit for the XORI below. */
  1758. break;
  1759. default:
  1760. SLJIT_UNREACHABLE();
  1761. dst_ar = sugg_dst_ar;
  1762. break;
  1763. }
  1764. if (type & 0x1) {
  1765. FAIL_IF(XORI(sugg_dst_ar, dst_ar, 1));
  1766. dst_ar = sugg_dst_ar;
  1767. }
  1768. if (op >= SLJIT_ADD) {
  1769. if (TMP_REG2_mapped != dst_ar)
  1770. FAIL_IF(ADD(TMP_REG2_mapped, dst_ar, ZERO));
  1771. return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0);
  1772. }
  1773. if (dst & SLJIT_MEM)
  1774. return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw);
  1775. if (sugg_dst_ar != dst_ar)
  1776. return ADD(sugg_dst_ar, dst_ar, ZERO);
  1777. return SLJIT_SUCCESS;
  1778. }
  1779. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) {
  1780. CHECK_ERROR();
  1781. CHECK(check_sljit_emit_op0(compiler, op));
  1782. op = GET_OPCODE(op);
  1783. switch (op) {
  1784. case SLJIT_NOP:
  1785. return push_0_buffer(compiler, TILEGX_OPC_FNOP, __LINE__);
  1786. case SLJIT_BREAKPOINT:
  1787. return PI(BPT);
  1788. case SLJIT_LMUL_UW:
  1789. case SLJIT_LMUL_SW:
  1790. case SLJIT_DIVMOD_UW:
  1791. case SLJIT_DIVMOD_SW:
  1792. case SLJIT_DIV_UW:
  1793. case SLJIT_DIV_SW:
  1794. SLJIT_UNREACHABLE();
  1795. }
  1796. return SLJIT_SUCCESS;
  1797. }
  1798. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
  1799. {
  1800. CHECK_ERROR();
  1801. CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
  1802. ADJUST_LOCAL_OFFSET(dst, dstw);
  1803. ADJUST_LOCAL_OFFSET(src, srcw);
  1804. switch (GET_OPCODE(op)) {
  1805. case SLJIT_MOV:
  1806. case SLJIT_MOV_P:
  1807. return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
  1808. case SLJIT_MOV_U32:
  1809. return emit_op(compiler, SLJIT_MOV_U32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
  1810. case SLJIT_MOV_S32:
  1811. return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
  1812. case SLJIT_MOV_U8:
  1813. return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw);
  1814. case SLJIT_MOV_S8:
  1815. return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw);
  1816. case SLJIT_MOV_U16:
  1817. return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw);
  1818. case SLJIT_MOV_S16:
  1819. return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw);
  1820. case SLJIT_MOVU:
  1821. case SLJIT_MOVU_P:
  1822. return emit_op(compiler, SLJIT_MOV, WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
  1823. case SLJIT_MOVU_U32:
  1824. return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
  1825. case SLJIT_MOVU_S32:
  1826. return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
  1827. case SLJIT_MOVU_U8:
  1828. return emit_op(compiler, SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8) srcw : srcw);
  1829. case SLJIT_MOVU_S8:
  1830. return emit_op(compiler, SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8) srcw : srcw);
  1831. case SLJIT_MOVU_U16:
  1832. return emit_op(compiler, SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16) srcw : srcw);
  1833. case SLJIT_MOVU_S16:
  1834. return emit_op(compiler, SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16) srcw : srcw);
  1835. case SLJIT_NOT:
  1836. return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
  1837. case SLJIT_NEG:
  1838. return emit_op(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), IMM_OP, dst, dstw, SLJIT_IMM, 0, src, srcw);
  1839. case SLJIT_CLZ:
  1840. return emit_op(compiler, op, (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
  1841. }
  1842. return SLJIT_SUCCESS;
  1843. }
  1844. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
  1845. {
  1846. CHECK_ERROR();
  1847. CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
  1848. ADJUST_LOCAL_OFFSET(dst, dstw);
  1849. ADJUST_LOCAL_OFFSET(src1, src1w);
  1850. ADJUST_LOCAL_OFFSET(src2, src2w);
  1851. switch (GET_OPCODE(op)) {
  1852. case SLJIT_ADD:
  1853. case SLJIT_ADDC:
  1854. return emit_op(compiler, op, CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
  1855. case SLJIT_SUB:
  1856. case SLJIT_SUBC:
  1857. return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
  1858. case SLJIT_MUL:
  1859. return emit_op(compiler, op, CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
  1860. case SLJIT_AND:
  1861. case SLJIT_OR:
  1862. case SLJIT_XOR:
  1863. return emit_op(compiler, op, CUMULATIVE_OP | LOGICAL_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
  1864. case SLJIT_SHL:
  1865. case SLJIT_LSHR:
  1866. case SLJIT_ASHR:
  1867. if (src2 & SLJIT_IMM)
  1868. src2w &= 0x3f;
  1869. if (op & SLJIT_I32_OP)
  1870. src2w &= 0x1f;
  1871. return emit_op(compiler, op, IMM_OP, dst, dstw, src1, src1w, src2, src2w);
  1872. }
  1873. return SLJIT_SUCCESS;
  1874. }
  1875. SLJIT_API_FUNC_ATTRIBUTE struct sljit_label * sljit_emit_label(struct sljit_compiler *compiler)
  1876. {
  1877. struct sljit_label *label;
  1878. flush_buffer(compiler);
  1879. CHECK_ERROR_PTR();
  1880. CHECK_PTR(check_sljit_emit_label(compiler));
  1881. if (compiler->last_label && compiler->last_label->size == compiler->size)
  1882. return compiler->last_label;
  1883. label = (struct sljit_label *)ensure_abuf(compiler, sizeof(struct sljit_label));
  1884. PTR_FAIL_IF(!label);
  1885. set_label(label, compiler);
  1886. return label;
  1887. }
  1888. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
  1889. {
  1890. sljit_s32 src_r = TMP_REG2;
  1891. struct sljit_jump *jump = NULL;
  1892. flush_buffer(compiler);
  1893. CHECK_ERROR();
  1894. CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
  1895. ADJUST_LOCAL_OFFSET(src, srcw);
  1896. if (FAST_IS_REG(src)) {
  1897. if (reg_map[src] != 0)
  1898. src_r = src;
  1899. else
  1900. FAIL_IF(ADD_SOLO(TMP_REG2_mapped, reg_map[src], ZERO));
  1901. }
  1902. if (type >= SLJIT_CALL0) {
  1903. SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
  1904. if (src & (SLJIT_IMM | SLJIT_MEM)) {
  1905. if (src & SLJIT_IMM)
  1906. FAIL_IF(emit_const(compiler, reg_map[PIC_ADDR_REG], srcw, 1));
  1907. else {
  1908. SLJIT_ASSERT(src_r == TMP_REG2 && (src & SLJIT_MEM));
  1909. FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
  1910. }
  1911. FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
  1912. FAIL_IF(ADDI_SOLO(54, 54, -16));
  1913. FAIL_IF(JALR_SOLO(reg_map[PIC_ADDR_REG]));
  1914. return ADDI_SOLO(54, 54, 16);
  1915. }
  1916. /* Register input. */
  1917. if (type >= SLJIT_CALL1)
  1918. FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
  1919. FAIL_IF(ADD_SOLO(reg_map[PIC_ADDR_REG], reg_map[src_r], ZERO));
  1920. FAIL_IF(ADDI_SOLO(54, 54, -16));
  1921. FAIL_IF(JALR_SOLO(reg_map[src_r]));
  1922. return ADDI_SOLO(54, 54, 16);
  1923. }
  1924. if (src & SLJIT_IMM) {
  1925. jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
  1926. FAIL_IF(!jump);
  1927. set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_JAL : 0));
  1928. jump->u.target = srcw;
  1929. FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
  1930. if (type >= SLJIT_FAST_CALL) {
  1931. FAIL_IF(ADD_SOLO(ZERO, ZERO, ZERO));
  1932. jump->addr = compiler->size;
  1933. FAIL_IF(JR_SOLO(reg_map[src_r]));
  1934. } else {
  1935. jump->addr = compiler->size;
  1936. FAIL_IF(JR_SOLO(reg_map[src_r]));
  1937. }
  1938. return SLJIT_SUCCESS;
  1939. } else if (src & SLJIT_MEM) {
  1940. FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
  1941. flush_buffer(compiler);
  1942. }
  1943. FAIL_IF(JR_SOLO(reg_map[src_r]));
  1944. if (jump)
  1945. jump->addr = compiler->size;
  1946. return SLJIT_SUCCESS;
  1947. }
  1948. #define BR_Z(src) \
  1949. inst = BEQZ_X1 | SRCA_X1(src); \
  1950. flags = IS_COND;
  1951. #define BR_NZ(src) \
  1952. inst = BNEZ_X1 | SRCA_X1(src); \
  1953. flags = IS_COND;
  1954. SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
  1955. {
  1956. struct sljit_jump *jump;
  1957. sljit_ins inst;
  1958. sljit_s32 flags = 0;
  1959. flush_buffer(compiler);
  1960. CHECK_ERROR_PTR();
  1961. CHECK_PTR(check_sljit_emit_jump(compiler, type));
  1962. jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
  1963. PTR_FAIL_IF(!jump);
  1964. set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
  1965. type &= 0xff;
  1966. switch (type) {
  1967. case SLJIT_EQUAL:
  1968. BR_NZ(EQUAL_FLAG);
  1969. break;
  1970. case SLJIT_NOT_EQUAL:
  1971. BR_Z(EQUAL_FLAG);
  1972. break;
  1973. case SLJIT_LESS:
  1974. BR_Z(ULESS_FLAG);
  1975. break;
  1976. case SLJIT_GREATER_EQUAL:
  1977. BR_NZ(ULESS_FLAG);
  1978. break;
  1979. case SLJIT_GREATER:
  1980. BR_Z(UGREATER_FLAG);
  1981. break;
  1982. case SLJIT_LESS_EQUAL:
  1983. BR_NZ(UGREATER_FLAG);
  1984. break;
  1985. case SLJIT_SIG_LESS:
  1986. BR_Z(LESS_FLAG);
  1987. break;
  1988. case SLJIT_SIG_GREATER_EQUAL:
  1989. BR_NZ(LESS_FLAG);
  1990. break;
  1991. case SLJIT_SIG_GREATER:
  1992. BR_Z(GREATER_FLAG);
  1993. break;
  1994. case SLJIT_SIG_LESS_EQUAL:
  1995. BR_NZ(GREATER_FLAG);
  1996. break;
  1997. case SLJIT_OVERFLOW:
  1998. case SLJIT_MUL_OVERFLOW:
  1999. BR_Z(OVERFLOW_FLAG);
  2000. break;
  2001. case SLJIT_NOT_OVERFLOW:
  2002. case SLJIT_MUL_NOT_OVERFLOW:
  2003. BR_NZ(OVERFLOW_FLAG);
  2004. break;
  2005. default:
  2006. /* Not conditional branch. */
  2007. inst = 0;
  2008. break;
  2009. }
  2010. jump->flags |= flags;
  2011. if (inst) {
  2012. inst = inst | ((type <= SLJIT_JUMP) ? BOFF_X1(5) : BOFF_X1(6));
  2013. PTR_FAIL_IF(PI(inst));
  2014. }
  2015. PTR_FAIL_IF(emit_const(compiler, TMP_REG2_mapped, 0, 1));
  2016. if (type <= SLJIT_JUMP) {
  2017. jump->addr = compiler->size;
  2018. PTR_FAIL_IF(JR_SOLO(TMP_REG2_mapped));
  2019. } else {
  2020. SLJIT_ASSERT(reg_map[PIC_ADDR_REG] == 16 && PIC_ADDR_REG == TMP_REG2);
  2021. /* Cannot be optimized out if type is >= CALL0. */
  2022. jump->flags |= IS_JAL | (type >= SLJIT_CALL0 ? SLJIT_REWRITABLE_JUMP : 0);
  2023. PTR_FAIL_IF(ADD_SOLO(0, reg_map[SLJIT_R0], ZERO));
  2024. jump->addr = compiler->size;
  2025. PTR_FAIL_IF(JALR_SOLO(TMP_REG2_mapped));
  2026. }
  2027. return jump;
  2028. }
  2029. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw)
  2030. {
  2031. SLJIT_UNREACHABLE();
  2032. }
  2033. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w)
  2034. {
  2035. SLJIT_UNREACHABLE();
  2036. }
  2037. SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
  2038. {
  2039. struct sljit_const *const_;
  2040. sljit_s32 reg;
  2041. flush_buffer(compiler);
  2042. CHECK_ERROR_PTR();
  2043. CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
  2044. ADJUST_LOCAL_OFFSET(dst, dstw);
  2045. const_ = (struct sljit_const *)ensure_abuf(compiler, sizeof(struct sljit_const));
  2046. PTR_FAIL_IF(!const_);
  2047. set_const(const_, compiler);
  2048. reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
  2049. PTR_FAIL_IF(emit_const_64(compiler, reg, init_value, 1));
  2050. if (dst & SLJIT_MEM)
  2051. PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
  2052. return const_;
  2053. }
  2054. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target)
  2055. {
  2056. sljit_ins *inst = (sljit_ins *)addr;
  2057. inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43);
  2058. inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43);
  2059. inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43);
  2060. SLJIT_CACHE_FLUSH(inst, inst + 3);
  2061. }
  2062. SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
  2063. {
  2064. sljit_ins *inst = (sljit_ins *)addr;
  2065. inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_constant >> 48) & 0xFFFFL) << 43);
  2066. inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_constant >> 32) & 0xFFFFL) << 43);
  2067. inst[2] = (inst[2] & ~(0xFFFFL << 43)) | (((new_constant >> 16) & 0xFFFFL) << 43);
  2068. inst[3] = (inst[3] & ~(0xFFFFL << 43)) | ((new_constant & 0xFFFFL) << 43);
  2069. SLJIT_CACHE_FLUSH(inst, inst + 4);
  2070. }
  2071. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
  2072. {
  2073. CHECK_REG_INDEX(check_sljit_get_register_index(reg));
  2074. return reg_map[reg];
  2075. }
  2076. SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
  2077. void *instruction, sljit_s32 size)
  2078. {
  2079. CHECK_ERROR();
  2080. CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
  2081. return SLJIT_ERR_UNSUPPORTED;
  2082. }