WebAssemblyISelLowering.cpp 103 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681
  1. //=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This file implements the WebAssemblyTargetLowering class.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "WebAssemblyISelLowering.h"
  14. #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
  15. #include "Utils/WebAssemblyTypeUtilities.h"
  16. #include "Utils/WebAssemblyUtilities.h"
  17. #include "WebAssemblyMachineFunctionInfo.h"
  18. #include "WebAssemblySubtarget.h"
  19. #include "WebAssemblyTargetMachine.h"
  20. #include "llvm/CodeGen/CallingConvLower.h"
  21. #include "llvm/CodeGen/MachineFrameInfo.h"
  22. #include "llvm/CodeGen/MachineFunctionPass.h"
  23. #include "llvm/CodeGen/MachineInstrBuilder.h"
  24. #include "llvm/CodeGen/MachineJumpTableInfo.h"
  25. #include "llvm/CodeGen/MachineModuleInfo.h"
  26. #include "llvm/CodeGen/MachineRegisterInfo.h"
  27. #include "llvm/CodeGen/SelectionDAG.h"
  28. #include "llvm/CodeGen/SelectionDAGNodes.h"
  29. #include "llvm/IR/DiagnosticInfo.h"
  30. #include "llvm/IR/DiagnosticPrinter.h"
  31. #include "llvm/IR/Function.h"
  32. #include "llvm/IR/Intrinsics.h"
  33. #include "llvm/IR/IntrinsicsWebAssembly.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Support/KnownBits.h"
  37. #include "llvm/Support/MathExtras.h"
  38. #include "llvm/Support/raw_ostream.h"
  39. #include "llvm/Target/TargetOptions.h"
  40. using namespace llvm;
  41. #define DEBUG_TYPE "wasm-lower"
  42. WebAssemblyTargetLowering::WebAssemblyTargetLowering(
  43. const TargetMachine &TM, const WebAssemblySubtarget &STI)
  44. : TargetLowering(TM), Subtarget(&STI) {
  45. auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
  46. // Booleans always contain 0 or 1.
  47. setBooleanContents(ZeroOrOneBooleanContent);
  48. // Except in SIMD vectors
  49. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
  50. // We don't know the microarchitecture here, so just reduce register pressure.
  51. setSchedulingPreference(Sched::RegPressure);
  52. // Tell ISel that we have a stack pointer.
  53. setStackPointerRegisterToSaveRestore(
  54. Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
  55. // Set up the register classes.
  56. addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
  57. addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
  58. addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
  59. addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
  60. if (Subtarget->hasSIMD128()) {
  61. addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
  62. addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
  63. addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
  64. addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
  65. addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
  66. addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
  67. }
  68. if (Subtarget->hasReferenceTypes()) {
  69. addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
  70. addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
  71. }
  72. // Compute derived properties from the register classes.
  73. computeRegisterProperties(Subtarget->getRegisterInfo());
  74. // Transform loads and stores to pointers in address space 1 to loads and
  75. // stores to WebAssembly global variables, outside linear memory.
  76. for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
  77. setOperationAction(ISD::LOAD, T, Custom);
  78. setOperationAction(ISD::STORE, T, Custom);
  79. }
  80. if (Subtarget->hasSIMD128()) {
  81. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
  82. MVT::v2f64}) {
  83. setOperationAction(ISD::LOAD, T, Custom);
  84. setOperationAction(ISD::STORE, T, Custom);
  85. }
  86. }
  87. if (Subtarget->hasReferenceTypes()) {
  88. // We need custom load and store lowering for both externref, funcref and
  89. // Other. The MVT::Other here represents tables of reference types.
  90. for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
  91. setOperationAction(ISD::LOAD, T, Custom);
  92. setOperationAction(ISD::STORE, T, Custom);
  93. }
  94. }
  95. setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
  96. setOperationAction(ISD::GlobalTLSAddress, MVTPtr, Custom);
  97. setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
  98. setOperationAction(ISD::JumpTable, MVTPtr, Custom);
  99. setOperationAction(ISD::BlockAddress, MVTPtr, Custom);
  100. setOperationAction(ISD::BRIND, MVT::Other, Custom);
  101. // Take the default expansion for va_arg, va_copy, and va_end. There is no
  102. // default action for va_start, so we do that custom.
  103. setOperationAction(ISD::VASTART, MVT::Other, Custom);
  104. setOperationAction(ISD::VAARG, MVT::Other, Expand);
  105. setOperationAction(ISD::VACOPY, MVT::Other, Expand);
  106. setOperationAction(ISD::VAEND, MVT::Other, Expand);
  107. for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
  108. // Don't expand the floating-point types to constant pools.
  109. setOperationAction(ISD::ConstantFP, T, Legal);
  110. // Expand floating-point comparisons.
  111. for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
  112. ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
  113. setCondCodeAction(CC, T, Expand);
  114. // Expand floating-point library function operators.
  115. for (auto Op :
  116. {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
  117. setOperationAction(Op, T, Expand);
  118. // Note supported floating-point library function operators that otherwise
  119. // default to expand.
  120. for (auto Op :
  121. {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
  122. setOperationAction(Op, T, Legal);
  123. // Support minimum and maximum, which otherwise default to expand.
  124. setOperationAction(ISD::FMINIMUM, T, Legal);
  125. setOperationAction(ISD::FMAXIMUM, T, Legal);
  126. // WebAssembly currently has no builtin f16 support.
  127. setOperationAction(ISD::FP16_TO_FP, T, Expand);
  128. setOperationAction(ISD::FP_TO_FP16, T, Expand);
  129. setLoadExtAction(ISD::EXTLOAD, T, MVT::f16, Expand);
  130. setTruncStoreAction(T, MVT::f16, Expand);
  131. }
  132. // Expand unavailable integer operations.
  133. for (auto Op :
  134. {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
  135. ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
  136. ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
  137. for (auto T : {MVT::i32, MVT::i64})
  138. setOperationAction(Op, T, Expand);
  139. if (Subtarget->hasSIMD128())
  140. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
  141. setOperationAction(Op, T, Expand);
  142. }
  143. if (Subtarget->hasNontrappingFPToInt())
  144. for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
  145. for (auto T : {MVT::i32, MVT::i64})
  146. setOperationAction(Op, T, Custom);
  147. // SIMD-specific configuration
  148. if (Subtarget->hasSIMD128()) {
  149. // Hoist bitcasts out of shuffles
  150. setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
  151. // Combine extends of extract_subvectors into widening ops
  152. setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
  153. // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
  154. // conversions ops
  155. setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
  156. ISD::EXTRACT_SUBVECTOR});
  157. // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
  158. // into conversion ops
  159. setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
  160. ISD::FP_ROUND, ISD::CONCAT_VECTORS});
  161. setTargetDAGCombine(ISD::TRUNCATE);
  162. // Support saturating add for i8x16 and i16x8
  163. for (auto Op : {ISD::SADDSAT, ISD::UADDSAT})
  164. for (auto T : {MVT::v16i8, MVT::v8i16})
  165. setOperationAction(Op, T, Legal);
  166. // Support integer abs
  167. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
  168. setOperationAction(ISD::ABS, T, Legal);
  169. // Custom lower BUILD_VECTORs to minimize number of replace_lanes
  170. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
  171. MVT::v2f64})
  172. setOperationAction(ISD::BUILD_VECTOR, T, Custom);
  173. // We have custom shuffle lowering to expose the shuffle mask
  174. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
  175. MVT::v2f64})
  176. setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom);
  177. // Support splatting
  178. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
  179. MVT::v2f64})
  180. setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
  181. // Custom lowering since wasm shifts must have a scalar shift amount
  182. for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
  183. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
  184. setOperationAction(Op, T, Custom);
  185. // Custom lower lane accesses to expand out variable indices
  186. for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
  187. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
  188. MVT::v2f64})
  189. setOperationAction(Op, T, Custom);
  190. // There is no i8x16.mul instruction
  191. setOperationAction(ISD::MUL, MVT::v16i8, Expand);
  192. // There is no vector conditional select instruction
  193. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
  194. MVT::v2f64})
  195. setOperationAction(ISD::SELECT_CC, T, Expand);
  196. // Expand integer operations supported for scalars but not SIMD
  197. for (auto Op :
  198. {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
  199. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
  200. setOperationAction(Op, T, Expand);
  201. // But we do have integer min and max operations
  202. for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
  203. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
  204. setOperationAction(Op, T, Legal);
  205. // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
  206. setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
  207. setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
  208. setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
  209. // Custom lower bit counting operations for other types to scalarize them.
  210. for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
  211. for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
  212. setOperationAction(Op, T, Custom);
  213. // Expand float operations supported for scalars but not SIMD
  214. for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
  215. ISD::FEXP, ISD::FEXP2, ISD::FRINT})
  216. for (auto T : {MVT::v4f32, MVT::v2f64})
  217. setOperationAction(Op, T, Expand);
  218. // Unsigned comparison operations are unavailable for i64x2 vectors.
  219. for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
  220. setCondCodeAction(CC, MVT::v2i64, Custom);
  221. // 64x2 conversions are not in the spec
  222. for (auto Op :
  223. {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
  224. for (auto T : {MVT::v2i64, MVT::v2f64})
  225. setOperationAction(Op, T, Expand);
  226. // But saturating fp_to_int converstions are
  227. for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
  228. setOperationAction(Op, MVT::v4i32, Custom);
  229. }
  230. // As a special case, these operators use the type to mean the type to
  231. // sign-extend from.
  232. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
  233. if (!Subtarget->hasSignExt()) {
  234. // Sign extends are legal only when extending a vector extract
  235. auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
  236. for (auto T : {MVT::i8, MVT::i16, MVT::i32})
  237. setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action);
  238. }
  239. for (auto T : MVT::integer_fixedlen_vector_valuetypes())
  240. setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
  241. // Dynamic stack allocation: use the default expansion.
  242. setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
  243. setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
  244. setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
  245. setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
  246. setOperationAction(ISD::FrameIndex, MVT::i64, Custom);
  247. setOperationAction(ISD::CopyToReg, MVT::Other, Custom);
  248. // Expand these forms; we pattern-match the forms that we can handle in isel.
  249. for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
  250. for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
  251. setOperationAction(Op, T, Expand);
  252. // We have custom switch handling.
  253. setOperationAction(ISD::BR_JT, MVT::Other, Custom);
  254. // WebAssembly doesn't have:
  255. // - Floating-point extending loads.
  256. // - Floating-point truncating stores.
  257. // - i1 extending loads.
  258. // - truncating SIMD stores and most extending loads
  259. setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
  260. setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  261. for (auto T : MVT::integer_valuetypes())
  262. for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
  263. setLoadExtAction(Ext, T, MVT::i1, Promote);
  264. if (Subtarget->hasSIMD128()) {
  265. for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
  266. MVT::v2f64}) {
  267. for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
  268. if (MVT(T) != MemT) {
  269. setTruncStoreAction(T, MemT, Expand);
  270. for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
  271. setLoadExtAction(Ext, T, MemT, Expand);
  272. }
  273. }
  274. }
  275. // But some vector extending loads are legal
  276. for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
  277. setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
  278. setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
  279. setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
  280. }
  281. setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
  282. }
  283. // Don't do anything clever with build_pairs
  284. setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
  285. // Trap lowers to wasm unreachable
  286. setOperationAction(ISD::TRAP, MVT::Other, Legal);
  287. setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
  288. // Exception handling intrinsics
  289. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  290. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
  291. setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
  292. setMaxAtomicSizeInBitsSupported(64);
  293. // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is
  294. // consistent with the f64 and f128 names.
  295. setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
  296. setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
  297. // Define the emscripten name for return address helper.
  298. // TODO: when implementing other Wasm backends, make this generic or only do
  299. // this on emscripten depending on what they end up doing.
  300. setLibcallName(RTLIB::RETURN_ADDRESS, "emscripten_return_address");
  301. // Always convert switches to br_tables unless there is only one case, which
  302. // is equivalent to a simple branch. This reduces code size for wasm, and we
  303. // defer possible jump table optimizations to the VM.
  304. setMinimumJumpTableEntries(2);
  305. }
  306. MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,
  307. uint32_t AS) const {
  308. if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
  309. return MVT::externref;
  310. if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
  311. return MVT::funcref;
  312. return TargetLowering::getPointerTy(DL, AS);
  313. }
  314. MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,
  315. uint32_t AS) const {
  316. if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
  317. return MVT::externref;
  318. if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
  319. return MVT::funcref;
  320. return TargetLowering::getPointerMemTy(DL, AS);
  321. }
  322. TargetLowering::AtomicExpansionKind
  323. WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
  324. // We have wasm instructions for these
  325. switch (AI->getOperation()) {
  326. case AtomicRMWInst::Add:
  327. case AtomicRMWInst::Sub:
  328. case AtomicRMWInst::And:
  329. case AtomicRMWInst::Or:
  330. case AtomicRMWInst::Xor:
  331. case AtomicRMWInst::Xchg:
  332. return AtomicExpansionKind::None;
  333. default:
  334. break;
  335. }
  336. return AtomicExpansionKind::CmpXChg;
  337. }
  338. bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
  339. // Implementation copied from X86TargetLowering.
  340. unsigned Opc = VecOp.getOpcode();
  341. // Assume target opcodes can't be scalarized.
  342. // TODO - do we have any exceptions?
  343. if (Opc >= ISD::BUILTIN_OP_END)
  344. return false;
  345. // If the vector op is not supported, try to convert to scalar.
  346. EVT VecVT = VecOp.getValueType();
  347. if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
  348. return true;
  349. // If the vector op is supported, but the scalar op is not, the transform may
  350. // not be worthwhile.
  351. EVT ScalarVT = VecVT.getScalarType();
  352. return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
  353. }
  354. FastISel *WebAssemblyTargetLowering::createFastISel(
  355. FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
  356. return WebAssembly::createFastISel(FuncInfo, LibInfo);
  357. }
  358. MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
  359. EVT VT) const {
  360. unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
  361. if (BitWidth > 1 && BitWidth < 8)
  362. BitWidth = 8;
  363. if (BitWidth > 64) {
  364. // The shift will be lowered to a libcall, and compiler-rt libcalls expect
  365. // the count to be an i32.
  366. BitWidth = 32;
  367. assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
  368. "32-bit shift counts ought to be enough for anyone");
  369. }
  370. MVT Result = MVT::getIntegerVT(BitWidth);
  371. assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
  372. "Unable to represent scalar shift amount type");
  373. return Result;
  374. }
  375. // Lower an fp-to-int conversion operator from the LLVM opcode, which has an
  376. // undefined result on invalid/overflow, to the WebAssembly opcode, which
  377. // traps on invalid/overflow.
  378. static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
  379. MachineBasicBlock *BB,
  380. const TargetInstrInfo &TII,
  381. bool IsUnsigned, bool Int64,
  382. bool Float64, unsigned LoweredOpcode) {
  383. MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
  384. Register OutReg = MI.getOperand(0).getReg();
  385. Register InReg = MI.getOperand(1).getReg();
  386. unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
  387. unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
  388. unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
  389. unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
  390. unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
  391. unsigned Eqz = WebAssembly::EQZ_I32;
  392. unsigned And = WebAssembly::AND_I32;
  393. int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
  394. int64_t Substitute = IsUnsigned ? 0 : Limit;
  395. double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
  396. auto &Context = BB->getParent()->getFunction().getContext();
  397. Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
  398. const BasicBlock *LLVMBB = BB->getBasicBlock();
  399. MachineFunction *F = BB->getParent();
  400. MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
  401. MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
  402. MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
  403. MachineFunction::iterator It = ++BB->getIterator();
  404. F->insert(It, FalseMBB);
  405. F->insert(It, TrueMBB);
  406. F->insert(It, DoneMBB);
  407. // Transfer the remainder of BB and its successor edges to DoneMBB.
  408. DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
  409. DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
  410. BB->addSuccessor(TrueMBB);
  411. BB->addSuccessor(FalseMBB);
  412. TrueMBB->addSuccessor(DoneMBB);
  413. FalseMBB->addSuccessor(DoneMBB);
  414. unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
  415. Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
  416. Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
  417. CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  418. EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  419. FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
  420. TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
  421. MI.eraseFromParent();
  422. // For signed numbers, we can do a single comparison to determine whether
  423. // fabs(x) is within range.
  424. if (IsUnsigned) {
  425. Tmp0 = InReg;
  426. } else {
  427. BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
  428. }
  429. BuildMI(BB, DL, TII.get(FConst), Tmp1)
  430. .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
  431. BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
  432. // For unsigned numbers, we have to do a separate comparison with zero.
  433. if (IsUnsigned) {
  434. Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
  435. Register SecondCmpReg =
  436. MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  437. Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  438. BuildMI(BB, DL, TII.get(FConst), Tmp1)
  439. .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
  440. BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
  441. BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
  442. CmpReg = AndReg;
  443. }
  444. BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
  445. // Create the CFG diamond to select between doing the conversion or using
  446. // the substitute value.
  447. BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
  448. BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
  449. BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
  450. BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
  451. BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
  452. .addReg(FalseReg)
  453. .addMBB(FalseMBB)
  454. .addReg(TrueReg)
  455. .addMBB(TrueMBB);
  456. return DoneMBB;
  457. }
  458. static MachineBasicBlock *
  459. LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
  460. const WebAssemblySubtarget *Subtarget,
  461. const TargetInstrInfo &TII) {
  462. MachineInstr &CallParams = *CallResults.getPrevNode();
  463. assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
  464. assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
  465. CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
  466. bool IsIndirect = CallParams.getOperand(0).isReg();
  467. bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
  468. bool IsFuncrefCall = false;
  469. if (IsIndirect) {
  470. Register Reg = CallParams.getOperand(0).getReg();
  471. const MachineFunction *MF = BB->getParent();
  472. const MachineRegisterInfo &MRI = MF->getRegInfo();
  473. const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
  474. IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
  475. assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
  476. }
  477. unsigned CallOp;
  478. if (IsIndirect && IsRetCall) {
  479. CallOp = WebAssembly::RET_CALL_INDIRECT;
  480. } else if (IsIndirect) {
  481. CallOp = WebAssembly::CALL_INDIRECT;
  482. } else if (IsRetCall) {
  483. CallOp = WebAssembly::RET_CALL;
  484. } else {
  485. CallOp = WebAssembly::CALL;
  486. }
  487. MachineFunction &MF = *BB->getParent();
  488. const MCInstrDesc &MCID = TII.get(CallOp);
  489. MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
  490. // See if we must truncate the function pointer.
  491. // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
  492. // as 64-bit for uniformity with other pointer types.
  493. // See also: WebAssemblyFastISel::selectCall
  494. if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
  495. Register Reg32 =
  496. MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
  497. auto &FnPtr = CallParams.getOperand(0);
  498. BuildMI(*BB, CallResults.getIterator(), DL,
  499. TII.get(WebAssembly::I32_WRAP_I64), Reg32)
  500. .addReg(FnPtr.getReg());
  501. FnPtr.setReg(Reg32);
  502. }
  503. // Move the function pointer to the end of the arguments for indirect calls
  504. if (IsIndirect) {
  505. auto FnPtr = CallParams.getOperand(0);
  506. CallParams.removeOperand(0);
  507. // For funcrefs, call_indirect is done through __funcref_call_table and the
  508. // funcref is always installed in slot 0 of the table, therefore instead of
  509. // having the function pointer added at the end of the params list, a zero
  510. // (the index in
  511. // __funcref_call_table is added).
  512. if (IsFuncrefCall) {
  513. Register RegZero =
  514. MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
  515. MachineInstrBuilder MIBC0 =
  516. BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
  517. BB->insert(CallResults.getIterator(), MIBC0);
  518. MachineInstrBuilder(MF, CallParams).addReg(RegZero);
  519. } else
  520. CallParams.addOperand(FnPtr);
  521. }
  522. for (auto Def : CallResults.defs())
  523. MIB.add(Def);
  524. if (IsIndirect) {
  525. // Placeholder for the type index.
  526. MIB.addImm(0);
  527. // The table into which this call_indirect indexes.
  528. MCSymbolWasm *Table = IsFuncrefCall
  529. ? WebAssembly::getOrCreateFuncrefCallTableSymbol(
  530. MF.getContext(), Subtarget)
  531. : WebAssembly::getOrCreateFunctionTableSymbol(
  532. MF.getContext(), Subtarget);
  533. if (Subtarget->hasReferenceTypes()) {
  534. MIB.addSym(Table);
  535. } else {
  536. // For the MVP there is at most one table whose number is 0, but we can't
  537. // write a table symbol or issue relocations. Instead we just ensure the
  538. // table is live and write a zero.
  539. Table->setNoStrip();
  540. MIB.addImm(0);
  541. }
  542. }
  543. for (auto Use : CallParams.uses())
  544. MIB.add(Use);
  545. BB->insert(CallResults.getIterator(), MIB);
  546. CallParams.eraseFromParent();
  547. CallResults.eraseFromParent();
  548. // If this is a funcref call, to avoid hidden GC roots, we need to clear the
  549. // table slot with ref.null upon call_indirect return.
  550. //
  551. // This generates the following code, which comes right after a call_indirect
  552. // of a funcref:
  553. //
  554. // i32.const 0
  555. // ref.null func
  556. // table.set __funcref_call_table
  557. if (IsIndirect && IsFuncrefCall) {
  558. MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
  559. MF.getContext(), Subtarget);
  560. Register RegZero =
  561. MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
  562. MachineInstr *Const0 =
  563. BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
  564. BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
  565. Register RegFuncref =
  566. MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
  567. MachineInstr *RefNull =
  568. BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
  569. BB->insertAfter(Const0->getIterator(), RefNull);
  570. MachineInstr *TableSet =
  571. BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
  572. .addSym(Table)
  573. .addReg(RegZero)
  574. .addReg(RegFuncref);
  575. BB->insertAfter(RefNull->getIterator(), TableSet);
  576. }
  577. return BB;
  578. }
  579. MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
  580. MachineInstr &MI, MachineBasicBlock *BB) const {
  581. const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
  582. DebugLoc DL = MI.getDebugLoc();
  583. switch (MI.getOpcode()) {
  584. default:
  585. llvm_unreachable("Unexpected instr type to insert");
  586. case WebAssembly::FP_TO_SINT_I32_F32:
  587. return LowerFPToInt(MI, DL, BB, TII, false, false, false,
  588. WebAssembly::I32_TRUNC_S_F32);
  589. case WebAssembly::FP_TO_UINT_I32_F32:
  590. return LowerFPToInt(MI, DL, BB, TII, true, false, false,
  591. WebAssembly::I32_TRUNC_U_F32);
  592. case WebAssembly::FP_TO_SINT_I64_F32:
  593. return LowerFPToInt(MI, DL, BB, TII, false, true, false,
  594. WebAssembly::I64_TRUNC_S_F32);
  595. case WebAssembly::FP_TO_UINT_I64_F32:
  596. return LowerFPToInt(MI, DL, BB, TII, true, true, false,
  597. WebAssembly::I64_TRUNC_U_F32);
  598. case WebAssembly::FP_TO_SINT_I32_F64:
  599. return LowerFPToInt(MI, DL, BB, TII, false, false, true,
  600. WebAssembly::I32_TRUNC_S_F64);
  601. case WebAssembly::FP_TO_UINT_I32_F64:
  602. return LowerFPToInt(MI, DL, BB, TII, true, false, true,
  603. WebAssembly::I32_TRUNC_U_F64);
  604. case WebAssembly::FP_TO_SINT_I64_F64:
  605. return LowerFPToInt(MI, DL, BB, TII, false, true, true,
  606. WebAssembly::I64_TRUNC_S_F64);
  607. case WebAssembly::FP_TO_UINT_I64_F64:
  608. return LowerFPToInt(MI, DL, BB, TII, true, true, true,
  609. WebAssembly::I64_TRUNC_U_F64);
  610. case WebAssembly::CALL_RESULTS:
  611. case WebAssembly::RET_CALL_RESULTS:
  612. return LowerCallResults(MI, DL, BB, Subtarget, TII);
  613. }
  614. }
  615. const char *
  616. WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
  617. switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
  618. case WebAssemblyISD::FIRST_NUMBER:
  619. case WebAssemblyISD::FIRST_MEM_OPCODE:
  620. break;
  621. #define HANDLE_NODETYPE(NODE) \
  622. case WebAssemblyISD::NODE: \
  623. return "WebAssemblyISD::" #NODE;
  624. #define HANDLE_MEM_NODETYPE(NODE) HANDLE_NODETYPE(NODE)
  625. #include "WebAssemblyISD.def"
  626. #undef HANDLE_MEM_NODETYPE
  627. #undef HANDLE_NODETYPE
  628. }
  629. return nullptr;
  630. }
  631. std::pair<unsigned, const TargetRegisterClass *>
  632. WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
  633. const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
  634. // First, see if this is a constraint that directly corresponds to a
  635. // WebAssembly register class.
  636. if (Constraint.size() == 1) {
  637. switch (Constraint[0]) {
  638. case 'r':
  639. assert(VT != MVT::iPTR && "Pointer MVT not expected here");
  640. if (Subtarget->hasSIMD128() && VT.isVector()) {
  641. if (VT.getSizeInBits() == 128)
  642. return std::make_pair(0U, &WebAssembly::V128RegClass);
  643. }
  644. if (VT.isInteger() && !VT.isVector()) {
  645. if (VT.getSizeInBits() <= 32)
  646. return std::make_pair(0U, &WebAssembly::I32RegClass);
  647. if (VT.getSizeInBits() <= 64)
  648. return std::make_pair(0U, &WebAssembly::I64RegClass);
  649. }
  650. if (VT.isFloatingPoint() && !VT.isVector()) {
  651. switch (VT.getSizeInBits()) {
  652. case 32:
  653. return std::make_pair(0U, &WebAssembly::F32RegClass);
  654. case 64:
  655. return std::make_pair(0U, &WebAssembly::F64RegClass);
  656. default:
  657. break;
  658. }
  659. }
  660. break;
  661. default:
  662. break;
  663. }
  664. }
  665. return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
  666. }
  667. bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
  668. // Assume ctz is a relatively cheap operation.
  669. return true;
  670. }
  671. bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
  672. // Assume clz is a relatively cheap operation.
  673. return true;
  674. }
  675. bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
  676. const AddrMode &AM,
  677. Type *Ty, unsigned AS,
  678. Instruction *I) const {
  679. // WebAssembly offsets are added as unsigned without wrapping. The
  680. // isLegalAddressingMode gives us no way to determine if wrapping could be
  681. // happening, so we approximate this by accepting only non-negative offsets.
  682. if (AM.BaseOffs < 0)
  683. return false;
  684. // WebAssembly has no scale register operands.
  685. if (AM.Scale != 0)
  686. return false;
  687. // Everything else is legal.
  688. return true;
  689. }
  690. bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
  691. EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
  692. MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
  693. // WebAssembly supports unaligned accesses, though it should be declared
  694. // with the p2align attribute on loads and stores which do so, and there
  695. // may be a performance impact. We tell LLVM they're "fast" because
  696. // for the kinds of things that LLVM uses this for (merging adjacent stores
  697. // of constants, etc.), WebAssembly implementations will either want the
  698. // unaligned access or they'll split anyway.
  699. if (Fast)
  700. *Fast = 1;
  701. return true;
  702. }
  703. bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
  704. AttributeList Attr) const {
  705. // The current thinking is that wasm engines will perform this optimization,
  706. // so we can save on code size.
  707. return true;
  708. }
  709. bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
  710. EVT ExtT = ExtVal.getValueType();
  711. EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
  712. return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
  713. (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
  714. (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
  715. }
  716. bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
  717. const GlobalAddressSDNode *GA) const {
  718. // Wasm doesn't support function addresses with offsets
  719. const GlobalValue *GV = GA->getGlobal();
  720. return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
  721. }
  722. EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
  723. LLVMContext &C,
  724. EVT VT) const {
  725. if (VT.isVector())
  726. return VT.changeVectorElementTypeToInteger();
  727. // So far, all branch instructions in Wasm take an I32 condition.
  728. // The default TargetLowering::getSetCCResultType returns the pointer size,
  729. // which would be useful to reduce instruction counts when testing
  730. // against 64-bit pointers/values if at some point Wasm supports that.
  731. return EVT::getIntegerVT(C, 32);
  732. }
  733. bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
  734. const CallInst &I,
  735. MachineFunction &MF,
  736. unsigned Intrinsic) const {
  737. switch (Intrinsic) {
  738. case Intrinsic::wasm_memory_atomic_notify:
  739. Info.opc = ISD::INTRINSIC_W_CHAIN;
  740. Info.memVT = MVT::i32;
  741. Info.ptrVal = I.getArgOperand(0);
  742. Info.offset = 0;
  743. Info.align = Align(4);
  744. // atomic.notify instruction does not really load the memory specified with
  745. // this argument, but MachineMemOperand should either be load or store, so
  746. // we set this to a load.
  747. // FIXME Volatile isn't really correct, but currently all LLVM atomic
  748. // instructions are treated as volatiles in the backend, so we should be
  749. // consistent. The same applies for wasm_atomic_wait intrinsics too.
  750. Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
  751. return true;
  752. case Intrinsic::wasm_memory_atomic_wait32:
  753. Info.opc = ISD::INTRINSIC_W_CHAIN;
  754. Info.memVT = MVT::i32;
  755. Info.ptrVal = I.getArgOperand(0);
  756. Info.offset = 0;
  757. Info.align = Align(4);
  758. Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
  759. return true;
  760. case Intrinsic::wasm_memory_atomic_wait64:
  761. Info.opc = ISD::INTRINSIC_W_CHAIN;
  762. Info.memVT = MVT::i64;
  763. Info.ptrVal = I.getArgOperand(0);
  764. Info.offset = 0;
  765. Info.align = Align(8);
  766. Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
  767. return true;
  768. default:
  769. return false;
  770. }
  771. }
  772. void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
  773. const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
  774. const SelectionDAG &DAG, unsigned Depth) const {
  775. switch (Op.getOpcode()) {
  776. default:
  777. break;
  778. case ISD::INTRINSIC_WO_CHAIN: {
  779. unsigned IntNo = Op.getConstantOperandVal(0);
  780. switch (IntNo) {
  781. default:
  782. break;
  783. case Intrinsic::wasm_bitmask: {
  784. unsigned BitWidth = Known.getBitWidth();
  785. EVT VT = Op.getOperand(1).getSimpleValueType();
  786. unsigned PossibleBits = VT.getVectorNumElements();
  787. APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
  788. Known.Zero |= ZeroMask;
  789. break;
  790. }
  791. }
  792. }
  793. }
  794. }
  795. TargetLoweringBase::LegalizeTypeAction
  796. WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
  797. if (VT.isFixedLengthVector()) {
  798. MVT EltVT = VT.getVectorElementType();
  799. // We have legal vector types with these lane types, so widening the
  800. // vector would let us use some of the lanes directly without having to
  801. // extend or truncate values.
  802. if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
  803. EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
  804. return TypeWidenVector;
  805. }
  806. return TargetLoweringBase::getPreferredVectorAction(VT);
  807. }
  808. bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
  809. SDValue Op, const TargetLoweringOpt &TLO) const {
  810. // ISel process runs DAGCombiner after legalization; this step is called
  811. // SelectionDAG optimization phase. This post-legalization combining process
  812. // runs DAGCombiner on each node, and if there was a change to be made,
  813. // re-runs legalization again on it and its user nodes to make sure
  814. // everythiing is in a legalized state.
  815. //
  816. // The legalization calls lowering routines, and we do our custom lowering for
  817. // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
  818. // into zeros. But there is a set of routines in DAGCombiner that turns unused
  819. // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
  820. // turns unused vector elements into undefs. But this routine does not work
  821. // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
  822. // combination can result in a infinite loop, in which undefs are converted to
  823. // zeros in legalization and back to undefs in combining.
  824. //
  825. // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
  826. // running for build_vectors.
  827. if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
  828. return false;
  829. return true;
  830. }
  831. //===----------------------------------------------------------------------===//
  832. // WebAssembly Lowering private implementation.
  833. //===----------------------------------------------------------------------===//
  834. //===----------------------------------------------------------------------===//
  835. // Lowering Code
  836. //===----------------------------------------------------------------------===//
  837. static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
  838. MachineFunction &MF = DAG.getMachineFunction();
  839. DAG.getContext()->diagnose(
  840. DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
  841. }
  842. // Test whether the given calling convention is supported.
  843. static bool callingConvSupported(CallingConv::ID CallConv) {
  844. // We currently support the language-independent target-independent
  845. // conventions. We don't yet have a way to annotate calls with properties like
  846. // "cold", and we don't have any call-clobbered registers, so these are mostly
  847. // all handled the same.
  848. return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
  849. CallConv == CallingConv::Cold ||
  850. CallConv == CallingConv::PreserveMost ||
  851. CallConv == CallingConv::PreserveAll ||
  852. CallConv == CallingConv::CXX_FAST_TLS ||
  853. CallConv == CallingConv::WASM_EmscriptenInvoke ||
  854. CallConv == CallingConv::Swift;
  855. }
  856. SDValue
  857. WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
  858. SmallVectorImpl<SDValue> &InVals) const {
  859. SelectionDAG &DAG = CLI.DAG;
  860. SDLoc DL = CLI.DL;
  861. SDValue Chain = CLI.Chain;
  862. SDValue Callee = CLI.Callee;
  863. MachineFunction &MF = DAG.getMachineFunction();
  864. auto Layout = MF.getDataLayout();
  865. CallingConv::ID CallConv = CLI.CallConv;
  866. if (!callingConvSupported(CallConv))
  867. fail(DL, DAG,
  868. "WebAssembly doesn't support language-specific or target-specific "
  869. "calling conventions yet");
  870. if (CLI.IsPatchPoint)
  871. fail(DL, DAG, "WebAssembly doesn't support patch point yet");
  872. if (CLI.IsTailCall) {
  873. auto NoTail = [&](const char *Msg) {
  874. if (CLI.CB && CLI.CB->isMustTailCall())
  875. fail(DL, DAG, Msg);
  876. CLI.IsTailCall = false;
  877. };
  878. if (!Subtarget->hasTailCall())
  879. NoTail("WebAssembly 'tail-call' feature not enabled");
  880. // Varargs calls cannot be tail calls because the buffer is on the stack
  881. if (CLI.IsVarArg)
  882. NoTail("WebAssembly does not support varargs tail calls");
  883. // Do not tail call unless caller and callee return types match
  884. const Function &F = MF.getFunction();
  885. const TargetMachine &TM = getTargetMachine();
  886. Type *RetTy = F.getReturnType();
  887. SmallVector<MVT, 4> CallerRetTys;
  888. SmallVector<MVT, 4> CalleeRetTys;
  889. computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
  890. computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
  891. bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
  892. std::equal(CallerRetTys.begin(), CallerRetTys.end(),
  893. CalleeRetTys.begin());
  894. if (!TypesMatch)
  895. NoTail("WebAssembly tail call requires caller and callee return types to "
  896. "match");
  897. // If pointers to local stack values are passed, we cannot tail call
  898. if (CLI.CB) {
  899. for (auto &Arg : CLI.CB->args()) {
  900. Value *Val = Arg.get();
  901. // Trace the value back through pointer operations
  902. while (true) {
  903. Value *Src = Val->stripPointerCastsAndAliases();
  904. if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
  905. Src = GEP->getPointerOperand();
  906. if (Val == Src)
  907. break;
  908. Val = Src;
  909. }
  910. if (isa<AllocaInst>(Val)) {
  911. NoTail(
  912. "WebAssembly does not support tail calling with stack arguments");
  913. break;
  914. }
  915. }
  916. }
  917. }
  918. SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
  919. SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
  920. SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
  921. // The generic code may have added an sret argument. If we're lowering an
  922. // invoke function, the ABI requires that the function pointer be the first
  923. // argument, so we may have to swap the arguments.
  924. if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
  925. Outs[0].Flags.isSRet()) {
  926. std::swap(Outs[0], Outs[1]);
  927. std::swap(OutVals[0], OutVals[1]);
  928. }
  929. bool HasSwiftSelfArg = false;
  930. bool HasSwiftErrorArg = false;
  931. unsigned NumFixedArgs = 0;
  932. for (unsigned I = 0; I < Outs.size(); ++I) {
  933. const ISD::OutputArg &Out = Outs[I];
  934. SDValue &OutVal = OutVals[I];
  935. HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
  936. HasSwiftErrorArg |= Out.Flags.isSwiftError();
  937. if (Out.Flags.isNest())
  938. fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
  939. if (Out.Flags.isInAlloca())
  940. fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
  941. if (Out.Flags.isInConsecutiveRegs())
  942. fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
  943. if (Out.Flags.isInConsecutiveRegsLast())
  944. fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
  945. if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
  946. auto &MFI = MF.getFrameInfo();
  947. int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
  948. Out.Flags.getNonZeroByValAlign(),
  949. /*isSS=*/false);
  950. SDValue SizeNode =
  951. DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
  952. SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
  953. Chain = DAG.getMemcpy(
  954. Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getNonZeroByValAlign(),
  955. /*isVolatile*/ false, /*AlwaysInline=*/false,
  956. /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
  957. OutVal = FINode;
  958. }
  959. // Count the number of fixed args *after* legalization.
  960. NumFixedArgs += Out.IsFixed;
  961. }
  962. bool IsVarArg = CLI.IsVarArg;
  963. auto PtrVT = getPointerTy(Layout);
  964. // For swiftcc, emit additional swiftself and swifterror arguments
  965. // if there aren't. These additional arguments are also added for callee
  966. // signature They are necessary to match callee and caller signature for
  967. // indirect call.
  968. if (CallConv == CallingConv::Swift) {
  969. if (!HasSwiftSelfArg) {
  970. NumFixedArgs++;
  971. ISD::OutputArg Arg;
  972. Arg.Flags.setSwiftSelf();
  973. CLI.Outs.push_back(Arg);
  974. SDValue ArgVal = DAG.getUNDEF(PtrVT);
  975. CLI.OutVals.push_back(ArgVal);
  976. }
  977. if (!HasSwiftErrorArg) {
  978. NumFixedArgs++;
  979. ISD::OutputArg Arg;
  980. Arg.Flags.setSwiftError();
  981. CLI.Outs.push_back(Arg);
  982. SDValue ArgVal = DAG.getUNDEF(PtrVT);
  983. CLI.OutVals.push_back(ArgVal);
  984. }
  985. }
  986. // Analyze operands of the call, assigning locations to each operand.
  987. SmallVector<CCValAssign, 16> ArgLocs;
  988. CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
  989. if (IsVarArg) {
  990. // Outgoing non-fixed arguments are placed in a buffer. First
  991. // compute their offsets and the total amount of buffer space needed.
  992. for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
  993. const ISD::OutputArg &Out = Outs[I];
  994. SDValue &Arg = OutVals[I];
  995. EVT VT = Arg.getValueType();
  996. assert(VT != MVT::iPTR && "Legalized args should be concrete");
  997. Type *Ty = VT.getTypeForEVT(*DAG.getContext());
  998. Align Alignment =
  999. std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
  1000. unsigned Offset =
  1001. CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
  1002. CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
  1003. Offset, VT.getSimpleVT(),
  1004. CCValAssign::Full));
  1005. }
  1006. }
  1007. unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
  1008. SDValue FINode;
  1009. if (IsVarArg && NumBytes) {
  1010. // For non-fixed arguments, next emit stores to store the argument values
  1011. // to the stack buffer at the offsets computed above.
  1012. int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
  1013. Layout.getStackAlignment(),
  1014. /*isSS=*/false);
  1015. unsigned ValNo = 0;
  1016. SmallVector<SDValue, 8> Chains;
  1017. for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
  1018. assert(ArgLocs[ValNo].getValNo() == ValNo &&
  1019. "ArgLocs should remain in order and only hold varargs args");
  1020. unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
  1021. FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
  1022. SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
  1023. DAG.getConstant(Offset, DL, PtrVT));
  1024. Chains.push_back(
  1025. DAG.getStore(Chain, DL, Arg, Add,
  1026. MachinePointerInfo::getFixedStack(MF, FI, Offset)));
  1027. }
  1028. if (!Chains.empty())
  1029. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
  1030. } else if (IsVarArg) {
  1031. FINode = DAG.getIntPtrConstant(0, DL);
  1032. }
  1033. if (Callee->getOpcode() == ISD::GlobalAddress) {
  1034. // If the callee is a GlobalAddress node (quite common, every direct call
  1035. // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
  1036. // doesn't at MO_GOT which is not needed for direct calls.
  1037. GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
  1038. Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
  1039. getPointerTy(DAG.getDataLayout()),
  1040. GA->getOffset());
  1041. Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
  1042. getPointerTy(DAG.getDataLayout()), Callee);
  1043. }
  1044. // Compute the operands for the CALLn node.
  1045. SmallVector<SDValue, 16> Ops;
  1046. Ops.push_back(Chain);
  1047. Ops.push_back(Callee);
  1048. // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
  1049. // isn't reliable.
  1050. Ops.append(OutVals.begin(),
  1051. IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
  1052. // Add a pointer to the vararg buffer.
  1053. if (IsVarArg)
  1054. Ops.push_back(FINode);
  1055. SmallVector<EVT, 8> InTys;
  1056. for (const auto &In : Ins) {
  1057. assert(!In.Flags.isByVal() && "byval is not valid for return values");
  1058. assert(!In.Flags.isNest() && "nest is not valid for return values");
  1059. if (In.Flags.isInAlloca())
  1060. fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
  1061. if (In.Flags.isInConsecutiveRegs())
  1062. fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
  1063. if (In.Flags.isInConsecutiveRegsLast())
  1064. fail(DL, DAG,
  1065. "WebAssembly hasn't implemented cons regs last return values");
  1066. // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
  1067. // registers.
  1068. InTys.push_back(In.VT);
  1069. }
  1070. // Lastly, if this is a call to a funcref we need to add an instruction
  1071. // table.set to the chain and transform the call.
  1072. if (CLI.CB &&
  1073. WebAssembly::isFuncrefType(CLI.CB->getCalledOperand()->getType())) {
  1074. // In the absence of function references proposal where a funcref call is
  1075. // lowered to call_ref, using reference types we generate a table.set to set
  1076. // the funcref to a special table used solely for this purpose, followed by
  1077. // a call_indirect. Here we just generate the table set, and return the
  1078. // SDValue of the table.set so that LowerCall can finalize the lowering by
  1079. // generating the call_indirect.
  1080. SDValue Chain = Ops[0];
  1081. MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
  1082. MF.getContext(), Subtarget);
  1083. SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
  1084. SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
  1085. SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
  1086. SDValue TableSet = DAG.getMemIntrinsicNode(
  1087. WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
  1088. MVT::funcref,
  1089. // Machine Mem Operand args
  1090. MachinePointerInfo(
  1091. WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),
  1092. CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
  1093. MachineMemOperand::MOStore);
  1094. Ops[0] = TableSet; // The new chain is the TableSet itself
  1095. }
  1096. if (CLI.IsTailCall) {
  1097. // ret_calls do not return values to the current frame
  1098. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  1099. return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
  1100. }
  1101. InTys.push_back(MVT::Other);
  1102. SDVTList InTyList = DAG.getVTList(InTys);
  1103. SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
  1104. for (size_t I = 0; I < Ins.size(); ++I)
  1105. InVals.push_back(Res.getValue(I));
  1106. // Return the chain
  1107. return Res.getValue(Ins.size());
  1108. }
  1109. bool WebAssemblyTargetLowering::CanLowerReturn(
  1110. CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
  1111. const SmallVectorImpl<ISD::OutputArg> &Outs,
  1112. LLVMContext & /*Context*/) const {
  1113. // WebAssembly can only handle returning tuples with multivalue enabled
  1114. return Subtarget->hasMultivalue() || Outs.size() <= 1;
  1115. }
  1116. SDValue WebAssemblyTargetLowering::LowerReturn(
  1117. SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
  1118. const SmallVectorImpl<ISD::OutputArg> &Outs,
  1119. const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
  1120. SelectionDAG &DAG) const {
  1121. assert((Subtarget->hasMultivalue() || Outs.size() <= 1) &&
  1122. "MVP WebAssembly can only return up to one value");
  1123. if (!callingConvSupported(CallConv))
  1124. fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
  1125. SmallVector<SDValue, 4> RetOps(1, Chain);
  1126. RetOps.append(OutVals.begin(), OutVals.end());
  1127. Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
  1128. // Record the number and types of the return values.
  1129. for (const ISD::OutputArg &Out : Outs) {
  1130. assert(!Out.Flags.isByVal() && "byval is not valid for return values");
  1131. assert(!Out.Flags.isNest() && "nest is not valid for return values");
  1132. assert(Out.IsFixed && "non-fixed return value is not valid");
  1133. if (Out.Flags.isInAlloca())
  1134. fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
  1135. if (Out.Flags.isInConsecutiveRegs())
  1136. fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
  1137. if (Out.Flags.isInConsecutiveRegsLast())
  1138. fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
  1139. }
  1140. return Chain;
  1141. }
  1142. SDValue WebAssemblyTargetLowering::LowerFormalArguments(
  1143. SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
  1144. const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
  1145. SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
  1146. if (!callingConvSupported(CallConv))
  1147. fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
  1148. MachineFunction &MF = DAG.getMachineFunction();
  1149. auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
  1150. // Set up the incoming ARGUMENTS value, which serves to represent the liveness
  1151. // of the incoming values before they're represented by virtual registers.
  1152. MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
  1153. bool HasSwiftErrorArg = false;
  1154. bool HasSwiftSelfArg = false;
  1155. for (const ISD::InputArg &In : Ins) {
  1156. HasSwiftSelfArg |= In.Flags.isSwiftSelf();
  1157. HasSwiftErrorArg |= In.Flags.isSwiftError();
  1158. if (In.Flags.isInAlloca())
  1159. fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
  1160. if (In.Flags.isNest())
  1161. fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
  1162. if (In.Flags.isInConsecutiveRegs())
  1163. fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
  1164. if (In.Flags.isInConsecutiveRegsLast())
  1165. fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
  1166. // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
  1167. // registers.
  1168. InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
  1169. DAG.getTargetConstant(InVals.size(),
  1170. DL, MVT::i32))
  1171. : DAG.getUNDEF(In.VT));
  1172. // Record the number and types of arguments.
  1173. MFI->addParam(In.VT);
  1174. }
  1175. // For swiftcc, emit additional swiftself and swifterror arguments
  1176. // if there aren't. These additional arguments are also added for callee
  1177. // signature They are necessary to match callee and caller signature for
  1178. // indirect call.
  1179. auto PtrVT = getPointerTy(MF.getDataLayout());
  1180. if (CallConv == CallingConv::Swift) {
  1181. if (!HasSwiftSelfArg) {
  1182. MFI->addParam(PtrVT);
  1183. }
  1184. if (!HasSwiftErrorArg) {
  1185. MFI->addParam(PtrVT);
  1186. }
  1187. }
  1188. // Varargs are copied into a buffer allocated by the caller, and a pointer to
  1189. // the buffer is passed as an argument.
  1190. if (IsVarArg) {
  1191. MVT PtrVT = getPointerTy(MF.getDataLayout());
  1192. Register VarargVreg =
  1193. MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT));
  1194. MFI->setVarargBufferVreg(VarargVreg);
  1195. Chain = DAG.getCopyToReg(
  1196. Chain, DL, VarargVreg,
  1197. DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
  1198. DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
  1199. MFI->addParam(PtrVT);
  1200. }
  1201. // Record the number and types of arguments and results.
  1202. SmallVector<MVT, 4> Params;
  1203. SmallVector<MVT, 4> Results;
  1204. computeSignatureVTs(MF.getFunction().getFunctionType(), &MF.getFunction(),
  1205. MF.getFunction(), DAG.getTarget(), Params, Results);
  1206. for (MVT VT : Results)
  1207. MFI->addResult(VT);
  1208. // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
  1209. // the param logic here with ComputeSignatureVTs
  1210. assert(MFI->getParams().size() == Params.size() &&
  1211. std::equal(MFI->getParams().begin(), MFI->getParams().end(),
  1212. Params.begin()));
  1213. return Chain;
  1214. }
  1215. void WebAssemblyTargetLowering::ReplaceNodeResults(
  1216. SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
  1217. switch (N->getOpcode()) {
  1218. case ISD::SIGN_EXTEND_INREG:
  1219. // Do not add any results, signifying that N should not be custom lowered
  1220. // after all. This happens because simd128 turns on custom lowering for
  1221. // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
  1222. // illegal type.
  1223. break;
  1224. default:
  1225. llvm_unreachable(
  1226. "ReplaceNodeResults not implemented for this op for WebAssembly!");
  1227. }
  1228. }
  1229. //===----------------------------------------------------------------------===//
  1230. // Custom lowering hooks.
  1231. //===----------------------------------------------------------------------===//
  1232. SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
  1233. SelectionDAG &DAG) const {
  1234. SDLoc DL(Op);
  1235. switch (Op.getOpcode()) {
  1236. default:
  1237. llvm_unreachable("unimplemented operation lowering");
  1238. return SDValue();
  1239. case ISD::FrameIndex:
  1240. return LowerFrameIndex(Op, DAG);
  1241. case ISD::GlobalAddress:
  1242. return LowerGlobalAddress(Op, DAG);
  1243. case ISD::GlobalTLSAddress:
  1244. return LowerGlobalTLSAddress(Op, DAG);
  1245. case ISD::ExternalSymbol:
  1246. return LowerExternalSymbol(Op, DAG);
  1247. case ISD::JumpTable:
  1248. return LowerJumpTable(Op, DAG);
  1249. case ISD::BR_JT:
  1250. return LowerBR_JT(Op, DAG);
  1251. case ISD::VASTART:
  1252. return LowerVASTART(Op, DAG);
  1253. case ISD::BlockAddress:
  1254. case ISD::BRIND:
  1255. fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
  1256. return SDValue();
  1257. case ISD::RETURNADDR:
  1258. return LowerRETURNADDR(Op, DAG);
  1259. case ISD::FRAMEADDR:
  1260. return LowerFRAMEADDR(Op, DAG);
  1261. case ISD::CopyToReg:
  1262. return LowerCopyToReg(Op, DAG);
  1263. case ISD::EXTRACT_VECTOR_ELT:
  1264. case ISD::INSERT_VECTOR_ELT:
  1265. return LowerAccessVectorElement(Op, DAG);
  1266. case ISD::INTRINSIC_VOID:
  1267. case ISD::INTRINSIC_WO_CHAIN:
  1268. case ISD::INTRINSIC_W_CHAIN:
  1269. return LowerIntrinsic(Op, DAG);
  1270. case ISD::SIGN_EXTEND_INREG:
  1271. return LowerSIGN_EXTEND_INREG(Op, DAG);
  1272. case ISD::BUILD_VECTOR:
  1273. return LowerBUILD_VECTOR(Op, DAG);
  1274. case ISD::VECTOR_SHUFFLE:
  1275. return LowerVECTOR_SHUFFLE(Op, DAG);
  1276. case ISD::SETCC:
  1277. return LowerSETCC(Op, DAG);
  1278. case ISD::SHL:
  1279. case ISD::SRA:
  1280. case ISD::SRL:
  1281. return LowerShift(Op, DAG);
  1282. case ISD::FP_TO_SINT_SAT:
  1283. case ISD::FP_TO_UINT_SAT:
  1284. return LowerFP_TO_INT_SAT(Op, DAG);
  1285. case ISD::LOAD:
  1286. return LowerLoad(Op, DAG);
  1287. case ISD::STORE:
  1288. return LowerStore(Op, DAG);
  1289. case ISD::CTPOP:
  1290. case ISD::CTLZ:
  1291. case ISD::CTTZ:
  1292. return DAG.UnrollVectorOp(Op.getNode());
  1293. }
  1294. }
  1295. static bool IsWebAssemblyGlobal(SDValue Op) {
  1296. if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
  1297. return WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace());
  1298. return false;
  1299. }
  1300. static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
  1301. SelectionDAG &DAG) {
  1302. const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op);
  1303. if (!FI)
  1304. return std::nullopt;
  1305. auto &MF = DAG.getMachineFunction();
  1306. return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());
  1307. }
  1308. SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
  1309. SelectionDAG &DAG) const {
  1310. SDLoc DL(Op);
  1311. StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
  1312. const SDValue &Value = SN->getValue();
  1313. const SDValue &Base = SN->getBasePtr();
  1314. const SDValue &Offset = SN->getOffset();
  1315. if (IsWebAssemblyGlobal(Base)) {
  1316. if (!Offset->isUndef())
  1317. report_fatal_error("unexpected offset when storing to webassembly global",
  1318. false);
  1319. SDVTList Tys = DAG.getVTList(MVT::Other);
  1320. SDValue Ops[] = {SN->getChain(), Value, Base};
  1321. return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
  1322. SN->getMemoryVT(), SN->getMemOperand());
  1323. }
  1324. if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
  1325. if (!Offset->isUndef())
  1326. report_fatal_error("unexpected offset when storing to webassembly local",
  1327. false);
  1328. SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
  1329. SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
  1330. SDValue Ops[] = {SN->getChain(), Idx, Value};
  1331. return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
  1332. }
  1333. if (WebAssembly::isWasmVarAddressSpace(SN->getAddressSpace()))
  1334. report_fatal_error(
  1335. "Encountered an unlowerable store to the wasm_var address space",
  1336. false);
  1337. return Op;
  1338. }
  1339. SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
  1340. SelectionDAG &DAG) const {
  1341. SDLoc DL(Op);
  1342. LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
  1343. const SDValue &Base = LN->getBasePtr();
  1344. const SDValue &Offset = LN->getOffset();
  1345. if (IsWebAssemblyGlobal(Base)) {
  1346. if (!Offset->isUndef())
  1347. report_fatal_error(
  1348. "unexpected offset when loading from webassembly global", false);
  1349. SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
  1350. SDValue Ops[] = {LN->getChain(), Base};
  1351. return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
  1352. LN->getMemoryVT(), LN->getMemOperand());
  1353. }
  1354. if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
  1355. if (!Offset->isUndef())
  1356. report_fatal_error(
  1357. "unexpected offset when loading from webassembly local", false);
  1358. SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
  1359. EVT LocalVT = LN->getValueType(0);
  1360. SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
  1361. {LN->getChain(), Idx});
  1362. SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
  1363. assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
  1364. return Result;
  1365. }
  1366. if (WebAssembly::isWasmVarAddressSpace(LN->getAddressSpace()))
  1367. report_fatal_error(
  1368. "Encountered an unlowerable load from the wasm_var address space",
  1369. false);
  1370. return Op;
  1371. }
  1372. SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
  1373. SelectionDAG &DAG) const {
  1374. SDValue Src = Op.getOperand(2);
  1375. if (isa<FrameIndexSDNode>(Src.getNode())) {
  1376. // CopyToReg nodes don't support FrameIndex operands. Other targets select
  1377. // the FI to some LEA-like instruction, but since we don't have that, we
  1378. // need to insert some kind of instruction that can take an FI operand and
  1379. // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
  1380. // local.copy between Op and its FI operand.
  1381. SDValue Chain = Op.getOperand(0);
  1382. SDLoc DL(Op);
  1383. Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
  1384. EVT VT = Src.getValueType();
  1385. SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
  1386. : WebAssembly::COPY_I64,
  1387. DL, VT, Src),
  1388. 0);
  1389. return Op.getNode()->getNumValues() == 1
  1390. ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
  1391. : DAG.getCopyToReg(Chain, DL, Reg, Copy,
  1392. Op.getNumOperands() == 4 ? Op.getOperand(3)
  1393. : SDValue());
  1394. }
  1395. return SDValue();
  1396. }
  1397. SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
  1398. SelectionDAG &DAG) const {
  1399. int FI = cast<FrameIndexSDNode>(Op)->getIndex();
  1400. return DAG.getTargetFrameIndex(FI, Op.getValueType());
  1401. }
  1402. SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
  1403. SelectionDAG &DAG) const {
  1404. SDLoc DL(Op);
  1405. if (!Subtarget->getTargetTriple().isOSEmscripten()) {
  1406. fail(DL, DAG,
  1407. "Non-Emscripten WebAssembly hasn't implemented "
  1408. "__builtin_return_address");
  1409. return SDValue();
  1410. }
  1411. if (verifyReturnAddressArgumentIsConstant(Op, DAG))
  1412. return SDValue();
  1413. unsigned Depth = Op.getConstantOperandVal(0);
  1414. MakeLibCallOptions CallOptions;
  1415. return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
  1416. {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
  1417. .first;
  1418. }
  1419. SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
  1420. SelectionDAG &DAG) const {
  1421. // Non-zero depths are not supported by WebAssembly currently. Use the
  1422. // legalizer's default expansion, which is to return 0 (what this function is
  1423. // documented to do).
  1424. if (Op.getConstantOperandVal(0) > 0)
  1425. return SDValue();
  1426. DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
  1427. EVT VT = Op.getValueType();
  1428. Register FP =
  1429. Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
  1430. return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
  1431. }
  1432. SDValue
  1433. WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
  1434. SelectionDAG &DAG) const {
  1435. SDLoc DL(Op);
  1436. const auto *GA = cast<GlobalAddressSDNode>(Op);
  1437. MachineFunction &MF = DAG.getMachineFunction();
  1438. if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
  1439. report_fatal_error("cannot use thread-local storage without bulk memory",
  1440. false);
  1441. const GlobalValue *GV = GA->getGlobal();
  1442. // Currently only Emscripten supports dynamic linking with threads. Therefore,
  1443. // on other targets, if we have thread-local storage, only the local-exec
  1444. // model is possible.
  1445. auto model = Subtarget->getTargetTriple().isOSEmscripten()
  1446. ? GV->getThreadLocalMode()
  1447. : GlobalValue::LocalExecTLSModel;
  1448. // Unsupported TLS modes
  1449. assert(model != GlobalValue::NotThreadLocal);
  1450. assert(model != GlobalValue::InitialExecTLSModel);
  1451. if (model == GlobalValue::LocalExecTLSModel ||
  1452. model == GlobalValue::LocalDynamicTLSModel ||
  1453. (model == GlobalValue::GeneralDynamicTLSModel &&
  1454. getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))) {
  1455. // For DSO-local TLS variables we use offset from __tls_base
  1456. MVT PtrVT = getPointerTy(DAG.getDataLayout());
  1457. auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
  1458. : WebAssembly::GLOBAL_GET_I32;
  1459. const char *BaseName = MF.createExternalSymbolName("__tls_base");
  1460. SDValue BaseAddr(
  1461. DAG.getMachineNode(GlobalGet, DL, PtrVT,
  1462. DAG.getTargetExternalSymbol(BaseName, PtrVT)),
  1463. 0);
  1464. SDValue TLSOffset = DAG.getTargetGlobalAddress(
  1465. GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
  1466. SDValue SymOffset =
  1467. DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
  1468. return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
  1469. }
  1470. assert(model == GlobalValue::GeneralDynamicTLSModel);
  1471. EVT VT = Op.getValueType();
  1472. return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
  1473. DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
  1474. GA->getOffset(),
  1475. WebAssemblyII::MO_GOT_TLS));
  1476. }
  1477. SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
  1478. SelectionDAG &DAG) const {
  1479. SDLoc DL(Op);
  1480. const auto *GA = cast<GlobalAddressSDNode>(Op);
  1481. EVT VT = Op.getValueType();
  1482. assert(GA->getTargetFlags() == 0 &&
  1483. "Unexpected target flags on generic GlobalAddressSDNode");
  1484. if (!WebAssembly::isValidAddressSpace(GA->getAddressSpace()))
  1485. fail(DL, DAG, "Invalid address space for WebAssembly target");
  1486. unsigned OperandFlags = 0;
  1487. if (isPositionIndependent()) {
  1488. const GlobalValue *GV = GA->getGlobal();
  1489. if (getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)) {
  1490. MachineFunction &MF = DAG.getMachineFunction();
  1491. MVT PtrVT = getPointerTy(MF.getDataLayout());
  1492. const char *BaseName;
  1493. if (GV->getValueType()->isFunctionTy()) {
  1494. BaseName = MF.createExternalSymbolName("__table_base");
  1495. OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
  1496. } else {
  1497. BaseName = MF.createExternalSymbolName("__memory_base");
  1498. OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
  1499. }
  1500. SDValue BaseAddr =
  1501. DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
  1502. DAG.getTargetExternalSymbol(BaseName, PtrVT));
  1503. SDValue SymAddr = DAG.getNode(
  1504. WebAssemblyISD::WrapperREL, DL, VT,
  1505. DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
  1506. OperandFlags));
  1507. return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
  1508. }
  1509. OperandFlags = WebAssemblyII::MO_GOT;
  1510. }
  1511. return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
  1512. DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
  1513. GA->getOffset(), OperandFlags));
  1514. }
  1515. SDValue
  1516. WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
  1517. SelectionDAG &DAG) const {
  1518. SDLoc DL(Op);
  1519. const auto *ES = cast<ExternalSymbolSDNode>(Op);
  1520. EVT VT = Op.getValueType();
  1521. assert(ES->getTargetFlags() == 0 &&
  1522. "Unexpected target flags on generic ExternalSymbolSDNode");
  1523. return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
  1524. DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
  1525. }
  1526. SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
  1527. SelectionDAG &DAG) const {
  1528. // There's no need for a Wrapper node because we always incorporate a jump
  1529. // table operand into a BR_TABLE instruction, rather than ever
  1530. // materializing it in a register.
  1531. const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
  1532. return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
  1533. JT->getTargetFlags());
  1534. }
  1535. SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
  1536. SelectionDAG &DAG) const {
  1537. SDLoc DL(Op);
  1538. SDValue Chain = Op.getOperand(0);
  1539. const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
  1540. SDValue Index = Op.getOperand(2);
  1541. assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
  1542. SmallVector<SDValue, 8> Ops;
  1543. Ops.push_back(Chain);
  1544. Ops.push_back(Index);
  1545. MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
  1546. const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
  1547. // Add an operand for each case.
  1548. for (auto *MBB : MBBs)
  1549. Ops.push_back(DAG.getBasicBlock(MBB));
  1550. // Add the first MBB as a dummy default target for now. This will be replaced
  1551. // with the proper default target (and the preceding range check eliminated)
  1552. // if possible by WebAssemblyFixBrTableDefaults.
  1553. Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
  1554. return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
  1555. }
  1556. SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
  1557. SelectionDAG &DAG) const {
  1558. SDLoc DL(Op);
  1559. EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
  1560. auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
  1561. const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
  1562. SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
  1563. MFI->getVarargBufferVreg(), PtrVT);
  1564. return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
  1565. MachinePointerInfo(SV));
  1566. }
  1567. SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
  1568. SelectionDAG &DAG) const {
  1569. MachineFunction &MF = DAG.getMachineFunction();
  1570. unsigned IntNo;
  1571. switch (Op.getOpcode()) {
  1572. case ISD::INTRINSIC_VOID:
  1573. case ISD::INTRINSIC_W_CHAIN:
  1574. IntNo = Op.getConstantOperandVal(1);
  1575. break;
  1576. case ISD::INTRINSIC_WO_CHAIN:
  1577. IntNo = Op.getConstantOperandVal(0);
  1578. break;
  1579. default:
  1580. llvm_unreachable("Invalid intrinsic");
  1581. }
  1582. SDLoc DL(Op);
  1583. switch (IntNo) {
  1584. default:
  1585. return SDValue(); // Don't custom lower most intrinsics.
  1586. case Intrinsic::wasm_lsda: {
  1587. auto PtrVT = getPointerTy(MF.getDataLayout());
  1588. const char *SymName = MF.createExternalSymbolName(
  1589. "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
  1590. if (isPositionIndependent()) {
  1591. SDValue Node = DAG.getTargetExternalSymbol(
  1592. SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
  1593. const char *BaseName = MF.createExternalSymbolName("__memory_base");
  1594. SDValue BaseAddr =
  1595. DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
  1596. DAG.getTargetExternalSymbol(BaseName, PtrVT));
  1597. SDValue SymAddr =
  1598. DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
  1599. return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
  1600. }
  1601. SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
  1602. return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
  1603. }
  1604. case Intrinsic::wasm_shuffle: {
  1605. // Drop in-chain and replace undefs, but otherwise pass through unchanged
  1606. SDValue Ops[18];
  1607. size_t OpIdx = 0;
  1608. Ops[OpIdx++] = Op.getOperand(1);
  1609. Ops[OpIdx++] = Op.getOperand(2);
  1610. while (OpIdx < 18) {
  1611. const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
  1612. if (MaskIdx.isUndef() ||
  1613. cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
  1614. Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
  1615. } else {
  1616. Ops[OpIdx++] = MaskIdx;
  1617. }
  1618. }
  1619. return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
  1620. }
  1621. }
  1622. }
  1623. SDValue
  1624. WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
  1625. SelectionDAG &DAG) const {
  1626. SDLoc DL(Op);
  1627. // If sign extension operations are disabled, allow sext_inreg only if operand
  1628. // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
  1629. // extension operations, but allowing sext_inreg in this context lets us have
  1630. // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
  1631. // everywhere would be simpler in this file, but would necessitate large and
  1632. // brittle patterns to undo the expansion and select extract_lane_s
  1633. // instructions.
  1634. assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
  1635. if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  1636. return SDValue();
  1637. const SDValue &Extract = Op.getOperand(0);
  1638. MVT VecT = Extract.getOperand(0).getSimpleValueType();
  1639. if (VecT.getVectorElementType().getSizeInBits() > 32)
  1640. return SDValue();
  1641. MVT ExtractedLaneT =
  1642. cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
  1643. MVT ExtractedVecT =
  1644. MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
  1645. if (ExtractedVecT == VecT)
  1646. return Op;
  1647. // Bitcast vector to appropriate type to ensure ISel pattern coverage
  1648. const SDNode *Index = Extract.getOperand(1).getNode();
  1649. if (!isa<ConstantSDNode>(Index))
  1650. return SDValue();
  1651. unsigned IndexVal = cast<ConstantSDNode>(Index)->getZExtValue();
  1652. unsigned Scale =
  1653. ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
  1654. assert(Scale > 1);
  1655. SDValue NewIndex =
  1656. DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
  1657. SDValue NewExtract = DAG.getNode(
  1658. ISD::EXTRACT_VECTOR_ELT, DL, Extract.getValueType(),
  1659. DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
  1660. return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
  1661. Op.getOperand(1));
  1662. }
  1663. static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
  1664. SDLoc DL(Op);
  1665. if (Op.getValueType() != MVT::v2f64)
  1666. return SDValue();
  1667. auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
  1668. unsigned &Index) -> bool {
  1669. switch (Op.getOpcode()) {
  1670. case ISD::SINT_TO_FP:
  1671. Opcode = WebAssemblyISD::CONVERT_LOW_S;
  1672. break;
  1673. case ISD::UINT_TO_FP:
  1674. Opcode = WebAssemblyISD::CONVERT_LOW_U;
  1675. break;
  1676. case ISD::FP_EXTEND:
  1677. Opcode = WebAssemblyISD::PROMOTE_LOW;
  1678. break;
  1679. default:
  1680. return false;
  1681. }
  1682. auto ExtractVector = Op.getOperand(0);
  1683. if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  1684. return false;
  1685. if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
  1686. return false;
  1687. SrcVec = ExtractVector.getOperand(0);
  1688. Index = ExtractVector.getConstantOperandVal(1);
  1689. return true;
  1690. };
  1691. unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
  1692. SDValue LHSSrcVec, RHSSrcVec;
  1693. if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
  1694. !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
  1695. return SDValue();
  1696. if (LHSOpcode != RHSOpcode)
  1697. return SDValue();
  1698. MVT ExpectedSrcVT;
  1699. switch (LHSOpcode) {
  1700. case WebAssemblyISD::CONVERT_LOW_S:
  1701. case WebAssemblyISD::CONVERT_LOW_U:
  1702. ExpectedSrcVT = MVT::v4i32;
  1703. break;
  1704. case WebAssemblyISD::PROMOTE_LOW:
  1705. ExpectedSrcVT = MVT::v4f32;
  1706. break;
  1707. }
  1708. if (LHSSrcVec.getValueType() != ExpectedSrcVT)
  1709. return SDValue();
  1710. auto Src = LHSSrcVec;
  1711. if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
  1712. // Shuffle the source vector so that the converted lanes are the low lanes.
  1713. Src = DAG.getVectorShuffle(
  1714. ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
  1715. {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
  1716. }
  1717. return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
  1718. }
  1719. SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
  1720. SelectionDAG &DAG) const {
  1721. if (auto ConvertLow = LowerConvertLow(Op, DAG))
  1722. return ConvertLow;
  1723. SDLoc DL(Op);
  1724. const EVT VecT = Op.getValueType();
  1725. const EVT LaneT = Op.getOperand(0).getValueType();
  1726. const size_t Lanes = Op.getNumOperands();
  1727. bool CanSwizzle = VecT == MVT::v16i8;
  1728. // BUILD_VECTORs are lowered to the instruction that initializes the highest
  1729. // possible number of lanes at once followed by a sequence of replace_lane
  1730. // instructions to individually initialize any remaining lanes.
  1731. // TODO: Tune this. For example, lanewise swizzling is very expensive, so
  1732. // swizzled lanes should be given greater weight.
  1733. // TODO: Investigate looping rather than always extracting/replacing specific
  1734. // lanes to fill gaps.
  1735. auto IsConstant = [](const SDValue &V) {
  1736. return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
  1737. };
  1738. // Returns the source vector and index vector pair if they exist. Checks for:
  1739. // (extract_vector_elt
  1740. // $src,
  1741. // (sign_extend_inreg (extract_vector_elt $indices, $i))
  1742. // )
  1743. auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
  1744. auto Bail = std::make_pair(SDValue(), SDValue());
  1745. if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  1746. return Bail;
  1747. const SDValue &SwizzleSrc = Lane->getOperand(0);
  1748. const SDValue &IndexExt = Lane->getOperand(1);
  1749. if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
  1750. return Bail;
  1751. const SDValue &Index = IndexExt->getOperand(0);
  1752. if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  1753. return Bail;
  1754. const SDValue &SwizzleIndices = Index->getOperand(0);
  1755. if (SwizzleSrc.getValueType() != MVT::v16i8 ||
  1756. SwizzleIndices.getValueType() != MVT::v16i8 ||
  1757. Index->getOperand(1)->getOpcode() != ISD::Constant ||
  1758. Index->getConstantOperandVal(1) != I)
  1759. return Bail;
  1760. return std::make_pair(SwizzleSrc, SwizzleIndices);
  1761. };
  1762. // If the lane is extracted from another vector at a constant index, return
  1763. // that vector. The source vector must not have more lanes than the dest
  1764. // because the shufflevector indices are in terms of the destination lanes and
  1765. // would not be able to address the smaller individual source lanes.
  1766. auto GetShuffleSrc = [&](const SDValue &Lane) {
  1767. if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
  1768. return SDValue();
  1769. if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
  1770. return SDValue();
  1771. if (Lane->getOperand(0).getValueType().getVectorNumElements() >
  1772. VecT.getVectorNumElements())
  1773. return SDValue();
  1774. return Lane->getOperand(0);
  1775. };
  1776. using ValueEntry = std::pair<SDValue, size_t>;
  1777. SmallVector<ValueEntry, 16> SplatValueCounts;
  1778. using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
  1779. SmallVector<SwizzleEntry, 16> SwizzleCounts;
  1780. using ShuffleEntry = std::pair<SDValue, size_t>;
  1781. SmallVector<ShuffleEntry, 16> ShuffleCounts;
  1782. auto AddCount = [](auto &Counts, const auto &Val) {
  1783. auto CountIt =
  1784. llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
  1785. if (CountIt == Counts.end()) {
  1786. Counts.emplace_back(Val, 1);
  1787. } else {
  1788. CountIt->second++;
  1789. }
  1790. };
  1791. auto GetMostCommon = [](auto &Counts) {
  1792. auto CommonIt =
  1793. std::max_element(Counts.begin(), Counts.end(), llvm::less_second());
  1794. assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
  1795. return *CommonIt;
  1796. };
  1797. size_t NumConstantLanes = 0;
  1798. // Count eligible lanes for each type of vector creation op
  1799. for (size_t I = 0; I < Lanes; ++I) {
  1800. const SDValue &Lane = Op->getOperand(I);
  1801. if (Lane.isUndef())
  1802. continue;
  1803. AddCount(SplatValueCounts, Lane);
  1804. if (IsConstant(Lane))
  1805. NumConstantLanes++;
  1806. if (auto ShuffleSrc = GetShuffleSrc(Lane))
  1807. AddCount(ShuffleCounts, ShuffleSrc);
  1808. if (CanSwizzle) {
  1809. auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
  1810. if (SwizzleSrcs.first)
  1811. AddCount(SwizzleCounts, SwizzleSrcs);
  1812. }
  1813. }
  1814. SDValue SplatValue;
  1815. size_t NumSplatLanes;
  1816. std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
  1817. SDValue SwizzleSrc;
  1818. SDValue SwizzleIndices;
  1819. size_t NumSwizzleLanes = 0;
  1820. if (SwizzleCounts.size())
  1821. std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
  1822. NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
  1823. // Shuffles can draw from up to two vectors, so find the two most common
  1824. // sources.
  1825. SDValue ShuffleSrc1, ShuffleSrc2;
  1826. size_t NumShuffleLanes = 0;
  1827. if (ShuffleCounts.size()) {
  1828. std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
  1829. llvm::erase_if(ShuffleCounts,
  1830. [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
  1831. }
  1832. if (ShuffleCounts.size()) {
  1833. size_t AdditionalShuffleLanes;
  1834. std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
  1835. GetMostCommon(ShuffleCounts);
  1836. NumShuffleLanes += AdditionalShuffleLanes;
  1837. }
  1838. // Predicate returning true if the lane is properly initialized by the
  1839. // original instruction
  1840. std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
  1841. SDValue Result;
  1842. // Prefer swizzles over shuffles over vector consts over splats
  1843. if (NumSwizzleLanes >= NumShuffleLanes &&
  1844. NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
  1845. Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
  1846. SwizzleIndices);
  1847. auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
  1848. IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
  1849. return Swizzled == GetSwizzleSrcs(I, Lane);
  1850. };
  1851. } else if (NumShuffleLanes >= NumConstantLanes &&
  1852. NumShuffleLanes >= NumSplatLanes) {
  1853. size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
  1854. size_t DestLaneCount = VecT.getVectorNumElements();
  1855. size_t Scale1 = 1;
  1856. size_t Scale2 = 1;
  1857. SDValue Src1 = ShuffleSrc1;
  1858. SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
  1859. if (Src1.getValueType() != VecT) {
  1860. size_t LaneSize =
  1861. Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
  1862. assert(LaneSize > DestLaneSize);
  1863. Scale1 = LaneSize / DestLaneSize;
  1864. Src1 = DAG.getBitcast(VecT, Src1);
  1865. }
  1866. if (Src2.getValueType() != VecT) {
  1867. size_t LaneSize =
  1868. Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
  1869. assert(LaneSize > DestLaneSize);
  1870. Scale2 = LaneSize / DestLaneSize;
  1871. Src2 = DAG.getBitcast(VecT, Src2);
  1872. }
  1873. int Mask[16];
  1874. assert(DestLaneCount <= 16);
  1875. for (size_t I = 0; I < DestLaneCount; ++I) {
  1876. const SDValue &Lane = Op->getOperand(I);
  1877. SDValue Src = GetShuffleSrc(Lane);
  1878. if (Src == ShuffleSrc1) {
  1879. Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
  1880. } else if (Src && Src == ShuffleSrc2) {
  1881. Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
  1882. } else {
  1883. Mask[I] = -1;
  1884. }
  1885. }
  1886. ArrayRef<int> MaskRef(Mask, DestLaneCount);
  1887. Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
  1888. IsLaneConstructed = [&](size_t, const SDValue &Lane) {
  1889. auto Src = GetShuffleSrc(Lane);
  1890. return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
  1891. };
  1892. } else if (NumConstantLanes >= NumSplatLanes) {
  1893. SmallVector<SDValue, 16> ConstLanes;
  1894. for (const SDValue &Lane : Op->op_values()) {
  1895. if (IsConstant(Lane)) {
  1896. // Values may need to be fixed so that they will sign extend to be
  1897. // within the expected range during ISel. Check whether the value is in
  1898. // bounds based on the lane bit width and if it is out of bounds, lop
  1899. // off the extra bits and subtract 2^n to reflect giving the high bit
  1900. // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
  1901. // cannot possibly be out of range.
  1902. auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
  1903. int64_t Val = Const ? Const->getSExtValue() : 0;
  1904. uint64_t LaneBits = 128 / Lanes;
  1905. assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
  1906. "Unexpected out of bounds negative value");
  1907. if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
  1908. auto NewVal = ((uint64_t)Val % (1ll << LaneBits)) - (1ll << LaneBits);
  1909. ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
  1910. } else {
  1911. ConstLanes.push_back(Lane);
  1912. }
  1913. } else if (LaneT.isFloatingPoint()) {
  1914. ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
  1915. } else {
  1916. ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
  1917. }
  1918. }
  1919. Result = DAG.getBuildVector(VecT, DL, ConstLanes);
  1920. IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
  1921. return IsConstant(Lane);
  1922. };
  1923. } else {
  1924. // Use a splat (which might be selected as a load splat)
  1925. Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
  1926. IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
  1927. return Lane == SplatValue;
  1928. };
  1929. }
  1930. assert(Result);
  1931. assert(IsLaneConstructed);
  1932. // Add replace_lane instructions for any unhandled values
  1933. for (size_t I = 0; I < Lanes; ++I) {
  1934. const SDValue &Lane = Op->getOperand(I);
  1935. if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
  1936. Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
  1937. DAG.getConstant(I, DL, MVT::i32));
  1938. }
  1939. return Result;
  1940. }
  1941. SDValue
  1942. WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
  1943. SelectionDAG &DAG) const {
  1944. SDLoc DL(Op);
  1945. ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
  1946. MVT VecType = Op.getOperand(0).getSimpleValueType();
  1947. assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
  1948. size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
  1949. // Space for two vector args and sixteen mask indices
  1950. SDValue Ops[18];
  1951. size_t OpIdx = 0;
  1952. Ops[OpIdx++] = Op.getOperand(0);
  1953. Ops[OpIdx++] = Op.getOperand(1);
  1954. // Expand mask indices to byte indices and materialize them as operands
  1955. for (int M : Mask) {
  1956. for (size_t J = 0; J < LaneBytes; ++J) {
  1957. // Lower undefs (represented by -1 in mask) to {0..J}, which use a
  1958. // whole lane of vector input, to allow further reduction at VM. E.g.
  1959. // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
  1960. uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
  1961. Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
  1962. }
  1963. }
  1964. return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
  1965. }
  1966. SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
  1967. SelectionDAG &DAG) const {
  1968. SDLoc DL(Op);
  1969. // The legalizer does not know how to expand the unsupported comparison modes
  1970. // of i64x2 vectors, so we manually unroll them here.
  1971. assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
  1972. SmallVector<SDValue, 2> LHS, RHS;
  1973. DAG.ExtractVectorElements(Op->getOperand(0), LHS);
  1974. DAG.ExtractVectorElements(Op->getOperand(1), RHS);
  1975. const SDValue &CC = Op->getOperand(2);
  1976. auto MakeLane = [&](unsigned I) {
  1977. return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
  1978. DAG.getConstant(uint64_t(-1), DL, MVT::i64),
  1979. DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
  1980. };
  1981. return DAG.getBuildVector(Op->getValueType(0), DL,
  1982. {MakeLane(0), MakeLane(1)});
  1983. }
  1984. SDValue
  1985. WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
  1986. SelectionDAG &DAG) const {
  1987. // Allow constant lane indices, expand variable lane indices
  1988. SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
  1989. if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) {
  1990. // Ensure the index type is i32 to match the tablegen patterns
  1991. uint64_t Idx = cast<ConstantSDNode>(IdxNode)->getZExtValue();
  1992. SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
  1993. Ops[Op.getNumOperands() - 1] =
  1994. DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
  1995. return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
  1996. }
  1997. // Perform default expansion
  1998. return SDValue();
  1999. }
  2000. static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
  2001. EVT LaneT = Op.getSimpleValueType().getVectorElementType();
  2002. // 32-bit and 64-bit unrolled shifts will have proper semantics
  2003. if (LaneT.bitsGE(MVT::i32))
  2004. return DAG.UnrollVectorOp(Op.getNode());
  2005. // Otherwise mask the shift value to get proper semantics from 32-bit shift
  2006. SDLoc DL(Op);
  2007. size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
  2008. SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
  2009. unsigned ShiftOpcode = Op.getOpcode();
  2010. SmallVector<SDValue, 16> ShiftedElements;
  2011. DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
  2012. SmallVector<SDValue, 16> ShiftElements;
  2013. DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
  2014. SmallVector<SDValue, 16> UnrolledOps;
  2015. for (size_t i = 0; i < NumLanes; ++i) {
  2016. SDValue MaskedShiftValue =
  2017. DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
  2018. SDValue ShiftedValue = ShiftedElements[i];
  2019. if (ShiftOpcode == ISD::SRA)
  2020. ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
  2021. ShiftedValue, DAG.getValueType(LaneT));
  2022. UnrolledOps.push_back(
  2023. DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
  2024. }
  2025. return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
  2026. }
  2027. SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
  2028. SelectionDAG &DAG) const {
  2029. SDLoc DL(Op);
  2030. // Only manually lower vector shifts
  2031. assert(Op.getSimpleValueType().isVector());
  2032. auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
  2033. if (!ShiftVal)
  2034. return unrollVectorShift(Op, DAG);
  2035. // Use anyext because none of the high bits can affect the shift
  2036. ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
  2037. unsigned Opcode;
  2038. switch (Op.getOpcode()) {
  2039. case ISD::SHL:
  2040. Opcode = WebAssemblyISD::VEC_SHL;
  2041. break;
  2042. case ISD::SRA:
  2043. Opcode = WebAssemblyISD::VEC_SHR_S;
  2044. break;
  2045. case ISD::SRL:
  2046. Opcode = WebAssemblyISD::VEC_SHR_U;
  2047. break;
  2048. default:
  2049. llvm_unreachable("unexpected opcode");
  2050. }
  2051. return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
  2052. }
  2053. SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
  2054. SelectionDAG &DAG) const {
  2055. SDLoc DL(Op);
  2056. EVT ResT = Op.getValueType();
  2057. EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
  2058. if ((ResT == MVT::i32 || ResT == MVT::i64) &&
  2059. (SatVT == MVT::i32 || SatVT == MVT::i64))
  2060. return Op;
  2061. if (ResT == MVT::v4i32 && SatVT == MVT::i32)
  2062. return Op;
  2063. return SDValue();
  2064. }
  2065. //===----------------------------------------------------------------------===//
  2066. // Custom DAG combine hooks
  2067. //===----------------------------------------------------------------------===//
  2068. static SDValue
  2069. performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
  2070. auto &DAG = DCI.DAG;
  2071. auto Shuffle = cast<ShuffleVectorSDNode>(N);
  2072. // Hoist vector bitcasts that don't change the number of lanes out of unary
  2073. // shuffles, where they are less likely to get in the way of other combines.
  2074. // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
  2075. // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
  2076. SDValue Bitcast = N->getOperand(0);
  2077. if (Bitcast.getOpcode() != ISD::BITCAST)
  2078. return SDValue();
  2079. if (!N->getOperand(1).isUndef())
  2080. return SDValue();
  2081. SDValue CastOp = Bitcast.getOperand(0);
  2082. MVT SrcType = CastOp.getSimpleValueType();
  2083. MVT DstType = Bitcast.getSimpleValueType();
  2084. if (!SrcType.is128BitVector() ||
  2085. SrcType.getVectorNumElements() != DstType.getVectorNumElements())
  2086. return SDValue();
  2087. SDValue NewShuffle = DAG.getVectorShuffle(
  2088. SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
  2089. return DAG.getBitcast(DstType, NewShuffle);
  2090. }
  2091. /// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
  2092. /// split up into scalar instructions during legalization, and the vector
  2093. /// extending instructions are selected in performVectorExtendCombine below.
  2094. static SDValue
  2095. performVectorExtendToFPCombine(SDNode *N,
  2096. TargetLowering::DAGCombinerInfo &DCI) {
  2097. auto &DAG = DCI.DAG;
  2098. assert(N->getOpcode() == ISD::UINT_TO_FP ||
  2099. N->getOpcode() == ISD::SINT_TO_FP);
  2100. EVT InVT = N->getOperand(0)->getValueType(0);
  2101. EVT ResVT = N->getValueType(0);
  2102. MVT ExtVT;
  2103. if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
  2104. ExtVT = MVT::v4i32;
  2105. else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
  2106. ExtVT = MVT::v2i32;
  2107. else
  2108. return SDValue();
  2109. unsigned Op =
  2110. N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
  2111. SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
  2112. return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
  2113. }
  2114. static SDValue
  2115. performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
  2116. auto &DAG = DCI.DAG;
  2117. assert(N->getOpcode() == ISD::SIGN_EXTEND ||
  2118. N->getOpcode() == ISD::ZERO_EXTEND);
  2119. // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
  2120. // possible before the extract_subvector can be expanded.
  2121. auto Extract = N->getOperand(0);
  2122. if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
  2123. return SDValue();
  2124. auto Source = Extract.getOperand(0);
  2125. auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
  2126. if (IndexNode == nullptr)
  2127. return SDValue();
  2128. auto Index = IndexNode->getZExtValue();
  2129. // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
  2130. // extracted subvector is the low or high half of its source.
  2131. EVT ResVT = N->getValueType(0);
  2132. if (ResVT == MVT::v8i16) {
  2133. if (Extract.getValueType() != MVT::v8i8 ||
  2134. Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
  2135. return SDValue();
  2136. } else if (ResVT == MVT::v4i32) {
  2137. if (Extract.getValueType() != MVT::v4i16 ||
  2138. Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
  2139. return SDValue();
  2140. } else if (ResVT == MVT::v2i64) {
  2141. if (Extract.getValueType() != MVT::v2i32 ||
  2142. Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
  2143. return SDValue();
  2144. } else {
  2145. return SDValue();
  2146. }
  2147. bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
  2148. bool IsLow = Index == 0;
  2149. unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
  2150. : WebAssemblyISD::EXTEND_HIGH_S)
  2151. : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
  2152. : WebAssemblyISD::EXTEND_HIGH_U);
  2153. return DAG.getNode(Op, SDLoc(N), ResVT, Source);
  2154. }
  2155. static SDValue
  2156. performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
  2157. auto &DAG = DCI.DAG;
  2158. auto GetWasmConversionOp = [](unsigned Op) {
  2159. switch (Op) {
  2160. case ISD::FP_TO_SINT_SAT:
  2161. return WebAssemblyISD::TRUNC_SAT_ZERO_S;
  2162. case ISD::FP_TO_UINT_SAT:
  2163. return WebAssemblyISD::TRUNC_SAT_ZERO_U;
  2164. case ISD::FP_ROUND:
  2165. return WebAssemblyISD::DEMOTE_ZERO;
  2166. }
  2167. llvm_unreachable("unexpected op");
  2168. };
  2169. auto IsZeroSplat = [](SDValue SplatVal) {
  2170. auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
  2171. APInt SplatValue, SplatUndef;
  2172. unsigned SplatBitSize;
  2173. bool HasAnyUndefs;
  2174. return Splat &&
  2175. Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
  2176. HasAnyUndefs) &&
  2177. SplatValue == 0;
  2178. };
  2179. if (N->getOpcode() == ISD::CONCAT_VECTORS) {
  2180. // Combine this:
  2181. //
  2182. // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
  2183. //
  2184. // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
  2185. //
  2186. // Or this:
  2187. //
  2188. // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
  2189. //
  2190. // into (f32x4.demote_zero_f64x2 $x).
  2191. EVT ResVT;
  2192. EVT ExpectedConversionType;
  2193. auto Conversion = N->getOperand(0);
  2194. auto ConversionOp = Conversion.getOpcode();
  2195. switch (ConversionOp) {
  2196. case ISD::FP_TO_SINT_SAT:
  2197. case ISD::FP_TO_UINT_SAT:
  2198. ResVT = MVT::v4i32;
  2199. ExpectedConversionType = MVT::v2i32;
  2200. break;
  2201. case ISD::FP_ROUND:
  2202. ResVT = MVT::v4f32;
  2203. ExpectedConversionType = MVT::v2f32;
  2204. break;
  2205. default:
  2206. return SDValue();
  2207. }
  2208. if (N->getValueType(0) != ResVT)
  2209. return SDValue();
  2210. if (Conversion.getValueType() != ExpectedConversionType)
  2211. return SDValue();
  2212. auto Source = Conversion.getOperand(0);
  2213. if (Source.getValueType() != MVT::v2f64)
  2214. return SDValue();
  2215. if (!IsZeroSplat(N->getOperand(1)) ||
  2216. N->getOperand(1).getValueType() != ExpectedConversionType)
  2217. return SDValue();
  2218. unsigned Op = GetWasmConversionOp(ConversionOp);
  2219. return DAG.getNode(Op, SDLoc(N), ResVT, Source);
  2220. }
  2221. // Combine this:
  2222. //
  2223. // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
  2224. //
  2225. // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
  2226. //
  2227. // Or this:
  2228. //
  2229. // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
  2230. //
  2231. // into (f32x4.demote_zero_f64x2 $x).
  2232. EVT ResVT;
  2233. auto ConversionOp = N->getOpcode();
  2234. switch (ConversionOp) {
  2235. case ISD::FP_TO_SINT_SAT:
  2236. case ISD::FP_TO_UINT_SAT:
  2237. ResVT = MVT::v4i32;
  2238. break;
  2239. case ISD::FP_ROUND:
  2240. ResVT = MVT::v4f32;
  2241. break;
  2242. default:
  2243. llvm_unreachable("unexpected op");
  2244. }
  2245. if (N->getValueType(0) != ResVT)
  2246. return SDValue();
  2247. auto Concat = N->getOperand(0);
  2248. if (Concat.getValueType() != MVT::v4f64)
  2249. return SDValue();
  2250. auto Source = Concat.getOperand(0);
  2251. if (Source.getValueType() != MVT::v2f64)
  2252. return SDValue();
  2253. if (!IsZeroSplat(Concat.getOperand(1)) ||
  2254. Concat.getOperand(1).getValueType() != MVT::v2f64)
  2255. return SDValue();
  2256. unsigned Op = GetWasmConversionOp(ConversionOp);
  2257. return DAG.getNode(Op, SDLoc(N), ResVT, Source);
  2258. }
  2259. // Helper to extract VectorWidth bits from Vec, starting from IdxVal.
  2260. static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
  2261. const SDLoc &DL, unsigned VectorWidth) {
  2262. EVT VT = Vec.getValueType();
  2263. EVT ElVT = VT.getVectorElementType();
  2264. unsigned Factor = VT.getSizeInBits() / VectorWidth;
  2265. EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
  2266. VT.getVectorNumElements() / Factor);
  2267. // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
  2268. unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
  2269. assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
  2270. // This is the index of the first element of the VectorWidth-bit chunk
  2271. // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
  2272. IdxVal &= ~(ElemsPerChunk - 1);
  2273. // If the input is a buildvector just emit a smaller one.
  2274. if (Vec.getOpcode() == ISD::BUILD_VECTOR)
  2275. return DAG.getBuildVector(ResultVT, DL,
  2276. Vec->ops().slice(IdxVal, ElemsPerChunk));
  2277. SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
  2278. return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
  2279. }
  2280. // Helper to recursively truncate vector elements in half with NARROW_U. DstVT
  2281. // is the expected destination value type after recursion. In is the initial
  2282. // input. Note that the input should have enough leading zero bits to prevent
  2283. // NARROW_U from saturating results.
  2284. static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
  2285. SelectionDAG &DAG) {
  2286. EVT SrcVT = In.getValueType();
  2287. // No truncation required, we might get here due to recursive calls.
  2288. if (SrcVT == DstVT)
  2289. return In;
  2290. unsigned SrcSizeInBits = SrcVT.getSizeInBits();
  2291. unsigned NumElems = SrcVT.getVectorNumElements();
  2292. if (!isPowerOf2_32(NumElems))
  2293. return SDValue();
  2294. assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
  2295. assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
  2296. LLVMContext &Ctx = *DAG.getContext();
  2297. EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
  2298. // Narrow to the largest type possible:
  2299. // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
  2300. EVT InVT = MVT::i16, OutVT = MVT::i8;
  2301. if (SrcVT.getScalarSizeInBits() > 16) {
  2302. InVT = MVT::i32;
  2303. OutVT = MVT::i16;
  2304. }
  2305. unsigned SubSizeInBits = SrcSizeInBits / 2;
  2306. InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
  2307. OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
  2308. // Split lower/upper subvectors.
  2309. SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
  2310. SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
  2311. // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
  2312. if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
  2313. Lo = DAG.getBitcast(InVT, Lo);
  2314. Hi = DAG.getBitcast(InVT, Hi);
  2315. SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
  2316. return DAG.getBitcast(DstVT, Res);
  2317. }
  2318. // Recursively narrow lower/upper subvectors, concat result and narrow again.
  2319. EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
  2320. Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
  2321. Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
  2322. PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
  2323. SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
  2324. return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
  2325. }
  2326. static SDValue performTruncateCombine(SDNode *N,
  2327. TargetLowering::DAGCombinerInfo &DCI) {
  2328. auto &DAG = DCI.DAG;
  2329. SDValue In = N->getOperand(0);
  2330. EVT InVT = In.getValueType();
  2331. if (!InVT.isSimple())
  2332. return SDValue();
  2333. EVT OutVT = N->getValueType(0);
  2334. if (!OutVT.isVector())
  2335. return SDValue();
  2336. EVT OutSVT = OutVT.getVectorElementType();
  2337. EVT InSVT = InVT.getVectorElementType();
  2338. // Currently only cover truncate to v16i8 or v8i16.
  2339. if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
  2340. (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
  2341. return SDValue();
  2342. SDLoc DL(N);
  2343. APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
  2344. OutVT.getScalarSizeInBits());
  2345. In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
  2346. return truncateVectorWithNARROW(OutVT, In, DL, DAG);
  2347. }
  2348. SDValue
  2349. WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
  2350. DAGCombinerInfo &DCI) const {
  2351. switch (N->getOpcode()) {
  2352. default:
  2353. return SDValue();
  2354. case ISD::VECTOR_SHUFFLE:
  2355. return performVECTOR_SHUFFLECombine(N, DCI);
  2356. case ISD::SIGN_EXTEND:
  2357. case ISD::ZERO_EXTEND:
  2358. return performVectorExtendCombine(N, DCI);
  2359. case ISD::UINT_TO_FP:
  2360. case ISD::SINT_TO_FP:
  2361. return performVectorExtendToFPCombine(N, DCI);
  2362. case ISD::FP_TO_SINT_SAT:
  2363. case ISD::FP_TO_UINT_SAT:
  2364. case ISD::FP_ROUND:
  2365. case ISD::CONCAT_VECTORS:
  2366. return performVectorTruncZeroCombine(N, DCI);
  2367. case ISD::TRUNCATE:
  2368. return performTruncateCombine(N, DCI);
  2369. }
  2370. }